In [None]:
#importing the dataset
from google.colab import files
uploads=files.upload()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

#load dataset
df=pd.read_csv('Superstore.csv', encoding='latin1')

print(f"Dataset shape: {df.shape}")
print("\nColumns: ")
print(df.columns.to_list())

#convert order date to datetime
df['Order Date']=pd.to_datetime(df['Order Date'])

#check date range
print(f"\nDate range: {df['Order Date'].min()} to {df['Order Date'].max()}")

#sort by date
df=df.sort_values('Order Date')
print("\nFirst few rows: ")
print(df[['Order Date','Sales','Category','Sub-Category']].head(10))

#aggregating to monthly total sales
monthly_sales=df.groupby(df['Order Date'].dt.to_period('M'))['Sales'].sum()

#convert to timestamp for plotting
monthly_sales.index=monthly_sales.index.to_timestamp()

print("\nMonthly sales:")
print(monthly_sales)
print(f"\nTotal months: {len(monthly_sales)}")
print(f"Data range:{monthly_sales.index.min()} to {monthly_sales.index.max()}")

#statistics
print(f"\nSales stastics: {monthly_sales.describe()}")

plt.figure(figsize=(14,6))
plt.plot(monthly_sales.index,monthly_sales.values,marker='o',linewidth=2)
plt.title("Monthly Sales Over Time",fontsize=16)
plt.xlabel("Date")
plt.ylabel("Sales ($)")
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

from statsmodels.tsa.seasonal import seasonal_decompose

#decompose the time series
decomposition=seasonal_decompose(monthly_sales,model='additive',period=12)

#plotting original, trend, seasonality and residual(noise)
fig,axes=plt.subplots(4,1,figsize=(14,5))

#original
axes[0].plot(monthly_sales.index,monthly_sales.values,color='blue')
axes[0].set_title('Original Monthly Sales', fontsize=14)
axes[0].set_ylabel('Sales ($)')
axes[0].grid(alpha=0.3)

#trend
axes[1].plot(decomposition.trend.index,decomposition.trend.values,color='green')
axes[1].set_title('Trend Component',fontsize=14)
axes[1].set_ylabel('Trend')
axes[1].grid(alpha=0.3)

#seasonality
axes[2].plot(decomposition.seasonal.index,decomposition.seasonal.values,color='orange')
axes[2].set_title('Seasonal Component',fontsize=14)
axes[2].set_ylabel('Seasonality')
axes[2].grid(alpha=0.3)

#residual
axes[3].plot(decomposition.resid.index,decomposition.resid.values,color='orange')
axes[3].set_title('Residual Component',fontsize=14)
axes[3].set_ylabel('Residual')
axes[3].set_xlabel('Date')
axes[3].grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("Time series decomposition complete")
print("\nWhat we learned:")
print("- Trend: Overall direction of sales")
print("- Seasonal: Repeating 12-month patterns")
print("- Residual: Random fluctuations")



In [None]:
#create lagged features
def create_time_series_features(df):
  df=df.copy()

  #lag features
  df['lag_1']=df['Sales'].shift(1) #last month
  df['lag_2']=df['Sales'].shift(2) #last two months
  df['lag_3']=df['Sales'].shift(3) #last three months
  df['lag_12']=df['Sales'].shift(12) #same month last year

   # Rolling statistics
  df['rolling_mean_3'] = df['Sales'].shift(1).rolling(window=3).mean()
  df['rolling_mean_6'] = df['Sales'].shift(1).rolling(window=6).mean()

  # Time-based features
  df['month'] = df.index.month
  df['quarter'] = df.index.quarter
  df['year'] = df.index.year

  # Cyclical encoding for month (important for seasonality!)
  df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
  df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

  return df

# Convert to DataFrame
df_ts = monthly_sales.to_frame(name='Sales')

# Create features
df_features = create_time_series_features(df_ts)

# Drop NaN rows (caused by lagging/rolling)
df_features = df_features.dropna()

print(f"Data points after feature engineering: {len(df_features)}")
print("\nColumns:")
print(df_features.columns.tolist())



In [None]:
!pip install prophet -q
from prophet import Prophet

# Prepapre data for prophet
df_prophet = monthly_sales.reset_index()
df_prophet.columns = ['ds', 'y'] #'ds' for date and 'y' for values

print("Data prepared for Prophet:")
print(df_prophet.head())
print(f"\nTotal data points: {len(df_prophet)}")

# Split train/test
train_size = int(len(df_prophet) * 0.8)
train_prophet = df_prophet[:train_size]
test_prophet = df_prophet[train_size:]

print(f"\nTrain: {len(train_prophet)} months")
print(f"Test: {len(test_prophet)} months")

model_prophet = Prophet(
    seasonality_mode='multiplicative',  # Sales multiply in holiday season
    yearly_seasonality=True,            # Capture yearly patterns
    weekly_seasonality=False,           # No weekly data
    daily_seasonality=False             # No daily data
)

print("\nTraining Prophet model...")
model_prophet.fit(train_prophet)
print("\nModel trained")

# Make predictions on test period
future = test_prophet[['ds']]
forecast = model_prophet.predict(future)

# Evaluate
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_true = test_prophet['y'].values
y_pred = forecast['yhat'].values

print("\nPROPHET MODEL PERFORMANCE")
print("-"*30)
print(f"Test MAE: ${mean_absolute_error(y_true, y_pred):,.2f}")
print(f"Test RMSE: ${np.sqrt(mean_squared_error(y_true, y_pred)):,.2f}")
print(f"Test RÂ²: {r2_score(y_true, y_pred):.4f}")

# visualization
plt.figure(figsize=(14, 6))
plt.plot(train_prophet['ds'], train_prophet['y'], label='Train Actual', marker='o')
plt.plot(test_prophet['ds'], test_prophet['y'], label='Test Actual', marker='o', color='orange')
plt.plot(test_prophet['ds'], y_pred, label='Test Predictions (Prophet)', marker='x', color='red', linestyle='--')
plt.axvline(x=train_prophet['ds'].iloc[-1], color='gray', linestyle='--', alpha=0.5, label='Train/Test Split')
plt.title('Prophet: Actual vs Predicted Sales')
plt.xlabel('Date')
plt.ylabel('Sales ($)')
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Retrain on ALL data (train + test)
model_final = Prophet(
    seasonality_mode='multiplicative',
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False
)

model_final.fit(df_prophet)

# Forecast next 6 months into the future
future_dates = model_final.make_future_dataframe(periods=6, freq='MS')  # MS = Month Start
forecast_future = model_final.predict(future_dates)

# Get only the future predictions (last 6 rows)
future_predictions = forecast_future[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(6)

print("FUTURE SALES FORECAST (Next 6 Months)")
print("="*60)
print(future_predictions.to_string(index=False))

# Visualize
fig, ax = plt.subplots(figsize=(14, 6))
model_final.plot(forecast_future, ax=ax)
plt.title('Sales Forecast: Historical + Future 6 Months')
plt.xlabel('Date')
plt.ylabel('Sales ($)')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()