# **Packages' Import**

In [None]:
import pandas            as pd                                   # Library for Read/Write IO File (CSV, Excel, TXT, JSON, etc.)
import matplotlib.pyplot as plt                                  # Library for plotting and figures
from tensorflow.keras.models import Sequential                   # Importation of sequantial Model for LSTM
from tensorflow.keras.layers import LSTM,Dense,Dropout           # Importation of LSTM Units
from sklearn.metrics        import mean_squared_error            # Mean Squared Error for forecasting performance evaluation
from sklearn.metrics        import mean_absolute_error           # Mean Absolute Error for forecasting performance evaluation
from sklearn.metrics        import mean_absolute_percentage_error# MAPE for evaluation the percentage of error
from sklearn.preprocessing  import MinMaxScaler

# **Step 1 - Business & Data Understanding**

The goal of this step is to:
* identify the number of samples;
* identify the type of forecasting;
* study the stationarity of your time series.


In [None]:
df=pd.read_csv("Retail Sales Dataset - MiniProjet.csv")        # Read dataset using read_csv(filepath) function
df.head()                                               # Display the first five rows

In [None]:
df.info()

This dataset includes:
* 937 samples (2.5 years);
* Daily Sampled - Short Term;


In [None]:
plt.xlabel("Day")
plt.ylabel("Number of Sales per day")
plt.plot(df["Sales"])
plt.title("Distribution of the Sales over 2.5 years")

# **Step 2 - Data preparation**

The goal of this step is to:
* Construct the X and Y where X denotes the matrix features and y is the label vector;
* Split dataset into training set and testing set: X_train,X_test,y_train,y_test.

## **2.2. Data Loading**

In [None]:
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

In [None]:
def load_data(data,seq_len):
  X=[]
  y=[]
  for i in range(seq_len,len(data)):
    X.append(data.iloc[i-seq_len:i,1])
    y.append(data.iloc[i,1])
  return X,y
X,y=load_data(df,80)
print(len(X))
print(len(y))

## **2.2. Data Split**

We split the dataset into 80% for train and 20% for test

In [None]:
train_size=len(y)*0.8
print(train_size)

In [None]:
train_size=685

In [None]:
X_train=X[:train_size]
y_train=y[:train_size]
X_test=X[train_size:]
y_test=y[train_size:]

In [None]:
print(len(y_train))
print(len(y_test))

In [None]:
import numpy as np
X_train=np.array(X_train)
y_train=np.array(y_train)
y_test=np.array(y_test)
X_test=np.array(X_test)
X_train=X_train.reshape(685,80,1)
X_test=X_test.reshape(172,80,1)

# **Step 3- Deep Learning**

In [None]:
model=Sequential()
model.add(LSTM(80,input_shape=(80,1)))
model.add(Dense(100))
model.add(Dense(1))

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, epochs=100, batch_size=2, verbose=2, validation_data=(X_test, y_test))


# **Step 4- Performance Evaluation**

In [None]:
print(" **** Performance of LSTM *****")
y_pred_lstm=model.predict(X_test)
mse=mean_squared_error(y_test,y_pred_lstm)
mae=mean_absolute_error(y_test,y_pred_lstm)
mape=mean_absolute_percentage_error(y_test,y_pred_lstm)
print("MSE= ",mse)
print("MAE= ",mae)
print("MAPEE= ",mape)


In [None]:
plt.xlabel("Month")
plt.ylabel("Number of Passenger")
plt.plot(y_test,color='red',label='Real')
plt.plot(y_pred_lstm,color='blue',label='LSTM Regression')
plt.title("ML Experimental Study for Airline Passengers Forecasting")
plt.legend()