In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM 
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint  

In [None]:
df = pd.read_csv("sales_data.csv")

In [None]:
#convert date from object datatype to datetime datatype
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [None]:
#converting date to month period, and sum the no.of items of each month
df['timestamp'] = df['timestamp'].dt.to_period('M') 
monthly_sales = df.groupby('timestamp').sum().reset_index()

In [None]:
df.info()

In [None]:
#convert the timestamp column to timestamp datatype
monthly_sales['timestamp'] = monthly_sales['timestamp'].dt.to_timestamp()

monthly_sales.head()

In [None]:
#Vishualize monthly sales
plt.figure(figsize=(15, 5))
plt.plot(monthly_sales['timestamp'], monthly_sales['sales'])
plt.xlabel("Date")
plt.ylabel("Sales")
plt.title("Monthly Customer Sales")
plt.show()

In [None]:
monthly_sales['sales_diff'] =  monthly_sales['sales'].diff() #differance
monthly_sales = monthly_sales.dropna()
monthly_sales.head()

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(monthly_sales['timestamp'], monthly_sales['sales'])
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Monthly Customer Sales Differance')
plt.show()

In [None]:
#dropping of sales and sales
sd = monthly_sales.drop(['timestamp', 'sales'], axis=1)

In [None]:
#preparing the supervised data
for i in range (1, 13):
    col_name = 'month_', str(i)
    sd[col_name] = sd['sales_diff'].shift(i)

sd = sd.dropna(). reset_index(drop=True)
sd.head()

In [None]:
#split the data
train_data = sd[:, -12]
test_data = sd[-12, :]
print("Train data", train_data.shape)
print("Test data", test_data.shape)

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)

In [None]:
x_train, y_train = train_data[:, 1:], train_data[:,0:1]
x_test, y_test = test_data[:, 1:], test_data[:,0:1]
y_train = y_train.ravel()
y_test = y_test.ravel()

In [None]:
#Make prediction 
sales_dates = monthly_sales['timestamp'][-12:].reset_index(drop=True)
predict_df = pd.DataFrame(sales_dates)

In [None]:
actual_sales = monthly_sales['sales'][-13:].to_list()
print(actual_sales)

In [None]:
#add regression model
lr = LinearRegression()
lr.fit(x_train, y_train)
prediction = lr.predict(x_test)

In [None]:
prediction = lr.reshape(-1, 1)
lr_test = np.concatenate([prediction, x_test], axis=1)
lr_test = scaler.inverse_transform(lr_test)

In [None]:
results = []
for index in range(0, len(lr_test)):
    results.append(lr_test[index][0] + actual_sales[index])

lr_series = pd.Series(results, name='LinearPrediction')
predict = predict(lr_series, left_index=True, tight_index=True)
print(predict)

In [None]:
mse = np.sqrt(mean_squared_error(predict['LinearPrediction'], monthly_sales['sales'][-12:]))
mae = mena_absolute_error(predict['LinearPrediction'], monthly_sales['sales'][-12:])
r2 = r2_score = (predict['LinearPrediction'], monthly_sales['sales'][-12:])
print("mse:", mse)
print("mae:", mae)
print("r2_score", r2)

In [None]:
#visualization
plt.figure(figsize=(15, 5))
plt.plot(monthly_sales['sales'], monthly_sales['timestamp'])
plt.plot(predict['LinearPrediction'], predict('timestamp'))
plt.title("Customer sales forecast using LR model")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend(['Actual Sales', 'Predicted_Sales'])
plt.show()