<a href="https://colab.research.google.com/github/arsh-datascience/Data-Science-and-Machine-Learning-Projects/blob/main/Retail_Analytics_%26_AI_Powered_Sales_Forecasting_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error

df = pd.read_csv("Retail_Sales_Data_Unlox.csv")
df['Date'] = pd.to_datetime(df['Date'])

df.drop_duplicates(inplace=True)

df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Quarter'] = df['Date'].dt.quarter
df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.day_name()

df['Promotion_Flag'] = df['Promotion_Applied'].map({'Yes': 1, 'No': 0})

print("Total Revenue:", df['Revenue'].sum())

monthly_sales = df.groupby(pd.Grouper(key='Date', freq='M'))['Revenue'].sum()

plt.figure()
monthly_sales.plot(title="Monthly Revenue Trend")
plt.show()

category_sales = df.groupby('Product_Category')['Revenue'].sum()

plt.figure()
category_sales.sort_values().plot(kind='barh', title="Revenue by Category")
plt.show()

store_sales = df.groupby('Store_ID')['Revenue'].sum().sort_values(ascending=False)
print(store_sales.head())

promo_impact = df.groupby('Promotion_Applied')['Revenue'].mean()
print(promo_impact)

store_features = df.groupby('Store_ID').agg(
    Avg_Revenue=('Revenue', 'mean'),
    Avg_Units_Sold=('Units_Sold', 'mean'),
    Avg_Rating=('Store_Rating', 'mean'),
    Revenue_Volatility=('Revenue', 'std')
).reset_index()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(
    store_features[['Avg_Revenue', 'Avg_Units_Sold', 'Avg_Rating', 'Revenue_Volatility']]
)

kmeans = KMeans(n_clusters=3, random_state=42)
store_features['Cluster'] = kmeans.fit_predict(X_scaled)

print(store_features.head())

ts = df.groupby('Date')['Revenue'].sum().reset_index()
ts.columns = ['ds', 'y']

model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False
)
model.fit(ts)

future = model.make_future_dataframe(periods=180)
forecast = model.predict(future)

model.plot(forecast)
plt.show()


actual = ts['y'][-90:]
predicted = forecast['yhat'][-90:]

print("MAE:", mean_absolute_error(actual, predicted))
print("RMSE:", np.sqrt(mean_squared_error(actual, predicted)))



FileNotFoundError: [Errno 2] No such file or directory: 'Retail_Sales_Data_Unlox.csv'