In [4]:
#Load and read the Walmart sales dataset
import pandas as pd
sales_data = pd.read_csv("C://Users//halde//Videos//games//Walmart.csv")
sales_data

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment
0,1,05-02-2010,1643690.90,0,42.31,2.572,211.096358,8.106
1,1,12-02-2010,1641957.44,1,38.51,2.548,211.242170,8.106
2,1,19-02-2010,1611968.17,0,39.93,2.514,211.289143,8.106
3,1,26-02-2010,1409727.59,0,46.63,2.561,211.319643,8.106
4,1,05-03-2010,1554806.68,0,46.50,2.625,211.350143,8.106
...,...,...,...,...,...,...,...,...
6430,45,28-09-2012,713173.95,0,64.88,3.997,192.013558,8.684
6431,45,05-10-2012,733455.07,0,64.89,3.985,192.170412,8.667
6432,45,12-10-2012,734464.36,0,54.47,4.000,192.327265,8.667
6433,45,19-10-2012,718125.53,0,56.47,3.969,192.330854,8.667


In [5]:
#Check for missing values in the dataset
missing_values = sales_data.isnull().sum()
print("Missing Values:\n", missing_values)

Missing Values:
 Store           0
Date            0
Weekly_Sales    0
Holiday_Flag    0
Temperature     0
Fuel_Price      0
CPI             0
Unemployment    0
dtype: int64


In [17]:
#Split the data into train and test sets
train_size = int(len(sales_data) * 0.8)
train = (sales_data[:train_size])
test = (sales_data[train_size:])
print(train.shape)
print(test.shape)

(5148, 8)
(1287, 8)


In [18]:
#Scale the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train['Weekly_Sales'].values.reshape(-1, 1))
test_scaled = scaler.transform(test['Weekly_Sales'].values.reshape(-1, 1))

In [19]:
#Prepare the data for LSTM
import numpy as np
def prepare_data(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i+time_steps, 0])
        y.append(data[i+time_steps, 0])
    return np.array(X), np.array(y)
time_steps = 5
X_train, y_train = prepare_data(train_scaled, time_steps)
X_test, y_test = prepare_data(test_scaled, time_steps)

In [20]:
#Build the LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
model = Sequential()
model.add(LSTM(units=50, activation='relu', input_shape=(time_steps, 1)))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

  super().__init__(**kwargs)


In [21]:
#Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, verbose=0)

<keras.src.callbacks.history.History at 0x246583b2d30>

In [22]:
#Make predictions
test_predictions = model.predict(X_test)

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


In [23]:
#Transform the`predicted_sales`from the scaled representation back to the original units of the data
predicted_sales = scaler.inverse_transform(test_predictions)
print(predicted_sales)

[[517440.38]
 [515110.7 ]
 [484154.22]
 ...
 [724776.94]
 [732425.8 ]
 [724494.1 ]]


In [24]:
#Compute accuracy metrics
from sklearn.metrics import mean_squared_error
test_rmse = np.sqrt(mean_squared_error(test['Weekly_Sales'][time_steps:], predicted_sales[:, 0]))
test_mape = np.mean(np.abs((test['Weekly_Sales'][time_steps:].values - predicted_sales[:, 0]) / test['Weekly_Sales'][time_steps:].values)) * 100
print(f'Test RMSE: {test_rmse:.2f}')
print(f'Test MAPE: {test_mape:.2f}%')
print(f'AVerage Accuracy: {100-test_mape:.2f}%')

Test RMSE: 111127.62
Test MAPE: 6.70%
AVerage Accuracy: 93.30%


In [25]:
#Plot actual vs predicted values for the test set
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
plt.plot(test.index[time_steps:], test['Weekly_Sales'][time_steps:], label='Actual')
plt.plot(test.index[time_steps:], predicted_sales[:, 0], label='Predicted')
plt.legend()
plt.show()

In [26]:
#Save the model
model.save("lstm_model.h5")

