# 🛍️ Sales Trend Analysis for an E-commerce Brand

*Author: Olubiyi Blossom*

**Description:** This notebook performs end-to-end sales trend analysis on the `data.csv` dataset (Online Retail). It includes cleaning, exploratory analysis, visualization, and a forecasting section that predicts the next 6 months of sales.

---

In [None]:
# 1. Imports
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

plt.style.use('seaborn-v0_8')
%matplotlib inline


In [None]:
# 2. Load dataset (extracted from data.csv.zip into data_files/)
import os
csv_path = 'data_files/data.csv'
print('loading:', csv_path)
df = pd.read_csv(csv_path, encoding='ISO-8859-1')
print('Shape:', df.shape)
display(df.head())


In [None]:
# 3. Initial info
print('Columns:', df.columns.tolist())
print('\nMissing values per column:')
print(df.isnull().sum())
print('\nData types:')
print(df.dtypes)


In [None]:
# 4. Data cleaning
# Convert InvoiceDate to datetime
if not pd.api.types.is_datetime64_any_dtype(df['InvoiceDate']):
    df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'], errors='coerce')

# Drop rows with missing CustomerID or InvoiceDate
df = df.dropna(subset=['CustomerID','InvoiceDate'])

# Remove non-positive quantities
df = df[df['Quantity'] > 0]

# Compute TotalSales
df['TotalSales'] = df['Quantity'] * df['UnitPrice']

print('Cleaned shape:', df.shape)
display(df.head())


In [None]:
# 5. Feature engineering
# Create date features
df['Date'] = df['InvoiceDate'].dt.date
# Month as period for grouping
df['Month'] = df['InvoiceDate'].dt.to_period('M')
df['Year'] = df['InvoiceDate'].dt.year

print('Sample Date fields:')
display(df[['InvoiceDate','Date','Month','Year']].head())


In [None]:
# 6. Monthly Sales Trend
monthly_sales = df.groupby('Month')['TotalSales'].sum().reset_index()
monthly_sales['Month_str'] = monthly_sales['Month'].astype(str)
monthly_sales = monthly_sales.sort_values('Month_str')

plt.figure(figsize=(12,5))
sns.lineplot(data=monthly_sales, x='Month_str', y='TotalSales', marker='o')
plt.xticks(rotation=45)
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Revenue (£)')
plt.tight_layout()
plt.show()

monthly_sales.head()


In [None]:
# 7. Top Countries and Products
# Top countries
top_countries = df.groupby('Country')['TotalSales'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10,5))
sns.barplot(x=top_countries.values, y=top_countries.index, palette='Blues_d')
plt.title('Top 10 Countries by Total Sales')
plt.xlabel('Total Revenue (£)')
plt.ylabel('Country')
plt.show()

# Top products
product_sales = df.groupby('Description')['TotalSales'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10,6))
sns.barplot(x=product_sales.values, y=product_sales.index, palette='Oranges_d')
plt.title('Top 10 Products by Revenue')
plt.xlabel('Total Revenue (£)')
plt.ylabel('Product')
plt.show()


In [None]:
# 8. Daily Sales Trend

daily_sales = df.groupby('Date')['TotalSales'].sum().reset_index()
plt.figure(figsize=(12,5))
sns.lineplot(data=daily_sales, x='Date', y='TotalSales')
plt.title('Daily Sales Trend')
plt.xlabel('Date')
plt.ylabel('Revenue (£)')
plt.tight_layout()
plt.show()


## 9. Key Insights & Recommendations

- **Top product** and **top country** are shown below programmatically.
- Recommendations: focus marketing on high-performing products and regions; consider targeted promotions during peak months; investigate causes for any sudden drops.


In [None]:
# Identify top product and country
try:
    top_product = product_sales.index[0]
except Exception:
    top_product = None
try:
    top_country = top_countries.index[0]
except Exception:
    top_country = None

print('Top product:', top_product)
print('Top country:', top_country)

# Save some visuals
os.makedirs('images', exist_ok=True)
plt.figure(figsize=(8,4))
ax = sns.lineplot(data=monthly_sales, x='Month_str', y='TotalSales', marker='o')
ax.set_xticklabels(monthly_sales['Month_str'], rotation=45)
plt.tight_layout()
plt.savefig('images/monthly_sales_trend.png', dpi=150)
plt.close()

plt.figure(figsize=(8,4))
ax = sns.barplot(x=product_sales.values, y=product_sales.index, palette='Oranges_d')
plt.tight_layout()
plt.savefig('images/top_products.png', dpi=150)
plt.close()

plt.figure(figsize=(8,4))
ax = sns.barplot(x=top_countries.values, y=top_countries.index, palette='Blues_d')
plt.tight_layout()
plt.savefig('images/top_countries.png', dpi=150)
plt.close()

print('Saved sample images to images/ directory')


## 10. Forecasting: Predict next 6 months of sales

This section attempts forecasting using Prophet (if installed) otherwise falls back to SARIMAX from statsmodels. The code handles both cases and explains installation steps if needed.


In [None]:
# Forecasting next 6 months
from datetime import datetime

# Prepare monthly series
monthly_ts = monthly_sales.copy()
monthly_ts['ds'] = pd.to_datetime(monthly_ts['Month_str'] + '-01')
monthly_ts.rename(columns={'TotalSales':'y'}, inplace=True)
monthly_ts = monthly_ts[['ds','y']].sort_values('ds')

print('Time range:', monthly_ts['ds'].min(), 'to', monthly_ts['ds'].max())

# Try Prophet
model_used = None
try:
    from prophet import Prophet
    m = Prophet()
    m.fit(monthly_ts)
    future = m.make_future_dataframe(periods=6, freq='M')
    forecast = m.predict(future)
    model_used = 'Prophet'
    print('Used Prophet for forecasting')
    fig = m.plot(forecast)
    fig.set_size_inches(12,5)
except Exception as e:
    print('Prophet not available or failed with error:', e)
    print('Falling back to SARIMAX (statsmodels)')
    try:
        from statsmodels.tsa.statespace.sarimax import SARIMAX
        sar = SARIMAX(monthly_ts['y'], order=(1,1,1), seasonal_order=(1,1,1,12), enforce_stationarity=False, enforce_invertibility=False)
        res = sar.fit(disp=False)
        pred = res.get_forecast(steps=6)
        pred_ci = pred.conf_int()
        model_used = 'SARIMAX'
        # Plot
        ax = monthly_ts.plot(x='ds', y='y', figsize=(12,5), label='Observed')
        future_index = pd.date_range(start=monthly_ts['ds'].max()+pd.offsets.MonthBegin(1), periods=6, freq='MS')
        ax.plot(future_index, pred.predicted_mean, label='Forecast')
        ax.fill_between(future_index, pred_ci.iloc[:,0], pred_ci.iloc[:,1], color='k', alpha=0.15)
        ax.set_title('Sales Forecast (SARIMAX)')
        ax.set_ylabel('TotalSales')
        ax.legend()
    except Exception as e2:
        print('SARIMAX also failed:', e2)
        print('Please install prophet (pip install prophet) or statsmodels')

print('Model used:', model_used)


In [None]:
# 11. Save cleaned data
df.to_csv('cleaned_sales_data.csv', index=False)
print('Cleaned data saved to cleaned_sales_data.csv')


---

## Conclusion

This notebook provides a complete analysis pipeline: data loading, cleaning, EDA, visualization, and forecasting. Save the notebook and images, push to GitHub, and include the README.md in the repository for context.

---

**Author:** Olubiyi Blossom
