In [3]:
import pandas as pd 
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

pd.set_option('display.max_columns', None)
sns.set(style="whitegrid")

In [6]:
df = pd.read_csv("../data/Iphone_Sales.csv")
df.head()

Unnamed: 0,SEMAINE,BASIC_MODEL_NAME,DATE_LANCEMENT,NOMBRE_CMD,ANCIENNETE_MODELE,PRIX_DE_DETAIL,RABAIS,NOMBRE_CLIENT_FIN_CONTRAT,CROIS_BYOD,SEMAINE_NUM,Fct_CROIS
0,2022-11-12T00:00:00Z,IPHONE 12,2020-10-16T00:00:00Z,85,2,882.0,0,115.0,0.2,1,
1,2022-11-12T00:00:00Z,IPHONE 13,2021-09-17T00:00:00Z,3704,1,1071.0,1,83.0,0.2,1,
2,2022-11-12T00:00:00Z,IPHONE 14,2022-09-16T00:00:00Z,289,0,1173.0,0,125.0,0.2,1,
3,2022-11-19T00:00:00Z,IPHONE 12,2020-10-16T00:00:00Z,58,2,882.0,0,140.0,0.2,2,
4,2022-11-19T00:00:00Z,IPHONE 13,2021-09-17T00:00:00Z,6593,1,1071.0,1,50.0,0.2,2,


In [7]:
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())

Shape: (375, 11)

Columns: ['SEMAINE', 'BASIC_MODEL_NAME', 'DATE_LANCEMENT', 'NOMBRE_CMD', 'ANCIENNETE_MODELE', 'PRIX_DE_DETAIL', 'RABAIS', 'NOMBRE_CLIENT_FIN_CONTRAT', 'CROIS_BYOD', 'SEMAINE_NUM', 'Fct_CROIS']


In [8]:
df.dtypes

SEMAINE                       object
BASIC_MODEL_NAME              object
DATE_LANCEMENT                object
NOMBRE_CMD                     int64
ANCIENNETE_MODELE              int64
PRIX_DE_DETAIL               float64
RABAIS                         int64
NOMBRE_CLIENT_FIN_CONTRAT    float64
CROIS_BYOD                   float64
SEMAINE_NUM                    int64
Fct_CROIS                    float64
dtype: object

In [9]:
df.Fct_CROIS.value_counts()

Fct_CROIS
0.053097    2
2.955017    1
3.275862    1
0.060215    1
0.221933    1
           ..
0.378286    1
1.027027    1
0.660754    1
1.064403    1
0.121760    1
Name: count, Length: 268, dtype: int64

## Forecasting Setup

- **Target**: Weekly sales volume (`NOMBRE_CMD`)
- **Granularity**: Weekly, per iPhone model `BASIC_MODEL_NAME` 
- **Forecast Horizon**: 8 weeks
- **Frequency**: Weekly
- **Features to engineer**:
  - Lagged sales (t-1, t-2, t-7)
  - Rolling averages (last 3–4 weeks)
  - Seasonality (week number, month)
  - Promotion flags
  - Time since product launch `ANCIENNETE_MODELE`

---

## Metrics

We'll evaluate using:
- **MAPE** (Mean Absolute Percentage Error)
- **WAPE** (Weighted Absolute Percentage Error)
- **RMSE** (Root Mean Square Error)

We will report metrics:
- Globally (all models)
- Per iPhone model
- For short (1–2 week) and long (8 week) horizons

In [10]:
df.rename(columns={
    "NOMBRE_CMD": "sales",
    "BASIC_MODEL_NAME": "model",
    "SEMAINE": "week_id",
}, inplace=True)

In [11]:
print("Number of unique iPhone models:", df["model"].nunique())
df["model"].value_counts().head()

Number of unique iPhone models: 5


model
IPHONE 14    119
IPHONE 13    106
IPHONE 15     76
IPHONE 12     50
IPHONE 16     24
Name: count, dtype: int64

In [13]:
df.SEMAINE_NUM

0        1
1        1
2        1
3        2
4        2
      ... 
370    118
371    118
372    119
373    119
374    119
Name: SEMAINE_NUM, Length: 375, dtype: int64