In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt

In [None]:
# Dataset Description:
# The dataset contains historical sales data for a retail store.
# It includes columns such as 'date', 'store', 'sales', and other features related to store performance.

# Step 1: Load the Dataset
# Replace 'path_to_dataset' with the actual path to the downloaded dataset
df = pd.read_csv('retail_sales.csv', parse_dates=['date'], index_col='date')
# Display basic info and first few rows
print(df.info())
print(df.head())

In [None]:
# Step 2: Visualize the Data
plt.figure(figsize=(12, 6))
sns.lineplot(data=df, x=df.index, y='sales')
plt.title('Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.show()

In [None]:
# Step 3: Check Stationarity
from statsmodels.tsa.stattools import adfuller

def check_stationarity(series):
    result = adfuller(series)
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')
    if result[1] <= 0.05:
        print("The time series is stationary.")
    else:
        print("The time series is non-stationary.")

check_stationarity(df['sales'])

# If non-stationary, apply differencing
df['sales_diff'] = df['sales'].diff().dropna()
check_stationarity(df['sales_diff'].dropna())

In [None]:
# Step 4: Train an ARIMA Model
train_size = int(len(df) * 0.8)
train, test = df['sales'][:train_size], df['sales'][train_size:]

model = ARIMA(train, order=(5,1,0))  # Adjust p, d, q as needed
model_fit = model.fit()
print(model_fit.summary())

In [None]:
# Step 5: Forecast Sales
forecast = model_fit.forecast(steps=len(test))

In [None]:
# Step 6: Evaluate the Model
mae = mean_absolute_error(test, forecast)
mse = mean_squared_error(test, forecast)
rmse = sqrt(mse)
print(f'MAE: {mae}, MSE: {mse}, RMSE: {rmse}')

In [None]:
# Step 7: Plot Predictions vs Actual Sales
plt.figure(figsize=(12, 6))
plt.plot(test.index, test, label='Actual Sales')
plt.plot(test.index, forecast, label='Predicted Sales', linestyle='dashed')
plt.title('Sales Forecasting using ARIMA')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()