In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import math
import os




In [None]:
# Create folders if missing
os.makedirs("results", exist_ok=True)
os.makedirs("models", exist_ok=True)



In [None]:
# Load dataset
df = pd.read_csv('../data/sales.csv', parse_dates=['date'])
df = df.groupby('date')['sales'].sum().reset_index()
df = df.sort_values('date')



In [None]:
# EDA
print(df.head())
plt.figure(figsize=(10,5))
sns.lineplot(x='date', y='sales', data=df)
plt.title('Daily Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.show()



In [None]:
# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df['sales'].values.reshape(-1, 1))



In [None]:
# Create time-series sequences
def create_sequences(data, time_step=30):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i+time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

time_step = 30
X, y = create_sequences(scaled_data, time_step)
X = X.reshape(X.shape[0], X.shape[1], 1)



In [None]:
# Split dataset
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]



In [None]:
# Build LSTM Model
model = Sequential([
    LSTM(128, activation='relu', return_sequences=True, input_shape=(time_step, 1)),
    Dropout(0.2),
    LSTM(64, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')



In [None]:
# Train model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), verbose=1)



In [None]:
# Save model
model.save('../models/trained_lstm_model.h5')



In [None]:
# Predict
predicted_sales = model.predict(X_test)
predicted_sales = scaler.inverse_transform(predicted_sales)
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))



In [None]:
# Evaluation
rmse = math.sqrt(mean_squared_error(y_test_scaled, predicted_sales))
mae = mean_absolute_error(y_test_scaled, predicted_sales)
print(f"RMSE: {rmse:.2f}, MAE: {mae:.2f}")



In [None]:
# Plot predictions
plt.figure(figsize=(10,6))
plt.plot(y_test_scaled, label='Actual Sales')
plt.plot(predicted_sales, label='Predicted Sales')
plt.title('Sales Forecasting using LSTM')
plt.xlabel('Time')
plt.ylabel('Sales')
plt.legend()
plt.savefig('../results/predictions.png')
plt.show()