# Retail Sales Analysis

This notebook analyzes retail sales data to identify trends, top-selling products, and forecast future sales. Includes EDA, visualizations, and ARIMA forecasting.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore')

# Set plot style
sns.set(style='whitegrid')

# Load data
try:
    df = pd.read_csv('C:/Users/user/Documents/retail-sales-analysis/data/retail_sales.csv')
    print('Data loaded successfully')
    print(df.head())
except FileNotFoundError:
    print('Error: retail_sales.csv not found in data/')
    raise

# Convert Date to datetime
df['Date'] = pd.to_datetime(df['Date'])
print('Date converted to datetime')

In [None]:
# EDA: Basic statistics
print('Dataset Info:')
print(df.info())
print('\nSummary Statistics:')
print(df.describe())

# Check missing values
print('\nMissing Values:')
print(df.isnull().sum())

# Sales by category
category_sales = df.groupby('Product Category')['Total Amount'].sum().sort_values(ascending=False)
print('\nSales by Category:')
print(category_sales)

In [None]:
# Visualize sales trend over time
monthly_sales = df.groupby(df['Date'].dt.to_period('M'))['Total Amount'].sum()
plt.figure(figsize=(10, 6))
monthly_sales.plot()
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Sales ($)') 
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('C:/Users/user/Documents/retail-sales-analysis/figures/sales_trend.png')
plt.show()
print('Sales trend plot saved')

In [None]:
# Visualize top products by sales
top_products = df.groupby('Product Category')['Total Amount'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_products.values, y=top_products.index)
plt.title('Top Product Categories by Sales')
plt.xlabel('Total Sales ($)')
plt.ylabel('Product Category')
plt.tight_layout()
plt.savefig('C:/Users/user/Documents/retail-sales-analysis/figures/top_products.png')
plt.show()
print('Top products plot saved')

In [None]:
# Sales forecasting with ARIMA
monthly_sales_ts = monthly_sales.to_timestamp()
model = ARIMA(monthly_sales_ts, order=(1, 1, 1))
results = model.fit()
forecast = results.forecast(steps=12)

plt.figure(figsize=(10, 6))
plt.plot(monthly_sales_ts, label='Historical Sales')
plt.plot(forecast, label='Forecast', color='red')
plt.title('Sales Forecast (12 Months)')
plt.xlabel('Date')
plt.ylabel('Total Sales ($)')
plt.legend()
plt.tight_layout()
plt.savefig('C:/Users/user/Documents/retail-sales-analysis/figures/sales_forecast.png')
plt.show()
print('Sales forecast plot saved')

# Save processed data
df.to_csv('C:/Users/user/Documents/retail-sales-analysis/data/processed_sales.csv', index=False)
print('Processed data saved')