In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import warnings

warnings.filterwarnings('ignore')

In [18]:
sales_data = pd.read_csv(r'C:\Users\amank\OneDrive\Desktop\Car_Forcasting\data\processed_data\sales_data_clean.csv')

In [3]:
sales_data = sales_data[['INVOICEDAT', 'Model_new']]

In [4]:
sales_data

Unnamed: 0,INVOICEDAT,Model_new
0,2021-05-21,Ford Transit Variants
1,2021-06-28,Ford Focus Variants
2,2021-06-03,ecosport
3,2021-07-07,Ford Fiesta Variants
4,2021-05-14,Ford Focus Variants
...,...,...
33404,2024-01-31,Ford Transit Variants
33405,2024-01-31,Ford Puma Variants
33406,2024-01-31,Ford Puma Variants
33407,2024-01-12,Ford Puma Variants


In [11]:
sales_data.Model_new.value_counts().head()

Model_new
Ford Fiesta Variants    5399
Ford Focus Variants     3837
Ford Puma Variants      3785
Other                   3758
Ford Kuga Variants      3215
Name: count, dtype: int64

In [14]:
import pandas as pd
from prophet import Prophet
import numpy as np

# Convert the date column to datetime format if not already
sales_data['INVOICEDAT'] = pd.to_datetime(sales_data['INVOICEDAT'])

# Forecasting settings
forecast_period = 365  # Forecast for 90 days (approximately 3 months)
monthly_forecasts = {}

# Iterate over each unique model in the 'Model_new' column
for model_name in sales_data['Model_new'].unique():
    # Filter and aggregate data for the current model to daily frequency
    model_data = sales_data[sales_data['Model_new'] == model_name]
    daily_data = model_data.groupby('INVOICEDAT').size().reset_index(name='y')
    daily_data = daily_data.rename(columns={'INVOICEDAT': 'ds'})

    # Check if there's enough data to fit the Prophet model
    if len(daily_data) < 2:
        print(f"Skipping '{model_name}' due to insufficient data.")
        continue

    # Initialize the Prophet model with additional parameters
    prophet_model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        seasonality_mode='multiplicative',
        changepoint_prior_scale=0.1,  # Adjust as necessary
        n_changepoints=30
    )
    
    # Add custom 6-month seasonality (biannual)
    prophet_model.add_seasonality(name='biannual', period=182.5, fourier_order=5)
    
    prophet_model.fit(daily_data)

    # Create future dates and generate forecast
    future_dates = prophet_model.make_future_dataframe(periods=forecast_period)
    forecast = prophet_model.predict(future_dates)

    # Filter forecast results to the next three months only
    forecast['month'] = forecast['ds'].dt.to_period('M')
    last_date = daily_data['ds'].max()
    forecast_next_three_months = forecast[forecast['ds'] > last_date].groupby('month')['yhat'].sum().head(12)

    # Store forecasted monthly results for the current model
    monthly_forecasts[model_name] = forecast_next_three_months.values

# Create a DataFrame from the forecast dictionary
forecast_model_df = pd.DataFrame.from_dict(
    monthly_forecasts, 
    orient='index', 
    columns=forecast_next_three_months.reset_index()['month']
).reset_index().rename(columns={'index': 'Model'})

forecast_model_df

15:28:29 - cmdstanpy - INFO - Chain [1] start processing
15:28:29 - cmdstanpy - INFO - Chain [1] done processing
15:28:29 - cmdstanpy - INFO - Chain [1] start processing
15:28:30 - cmdstanpy - INFO - Chain [1] done processing
15:28:30 - cmdstanpy - INFO - Chain [1] start processing
15:28:30 - cmdstanpy - INFO - Chain [1] done processing
15:28:31 - cmdstanpy - INFO - Chain [1] start processing
15:28:31 - cmdstanpy - INFO - Chain [1] done processing
15:28:32 - cmdstanpy - INFO - Chain [1] start processing
15:28:32 - cmdstanpy - INFO - Chain [1] done processing
15:28:33 - cmdstanpy - INFO - Chain [1] start processing
15:28:33 - cmdstanpy - INFO - Chain [1] done processing
15:28:33 - cmdstanpy - INFO - Chain [1] start processing
15:28:33 - cmdstanpy - INFO - Chain [1] done processing
15:28:34 - cmdstanpy - INFO - Chain [1] start processing
15:28:34 - cmdstanpy - INFO - Chain [1] done processing
15:28:35 - cmdstanpy - INFO - Chain [1] start processing
15:28:35 - cmdstanpy - INFO - Chain [1]

Skipping 'nan' due to insufficient data.


15:28:45 - cmdstanpy - INFO - Chain [1] done processing
15:28:46 - cmdstanpy - INFO - Chain [1] start processing
15:28:46 - cmdstanpy - INFO - Chain [1] done processing
15:28:46 - cmdstanpy - INFO - Chain [1] start processing
15:28:46 - cmdstanpy - INFO - Chain [1] done processing
15:28:47 - cmdstanpy - INFO - Chain [1] start processing
15:28:48 - cmdstanpy - INFO - Chain [1] done processing
15:28:48 - cmdstanpy - INFO - Chain [1] start processing
15:28:49 - cmdstanpy - INFO - Chain [1] done processing
15:28:49 - cmdstanpy - INFO - Chain [1] start processing
15:28:49 - cmdstanpy - INFO - Chain [1] done processing
15:28:50 - cmdstanpy - INFO - Chain [1] start processing
15:28:51 - cmdstanpy - INFO - Chain [1] done processing
15:28:51 - cmdstanpy - INFO - Chain [1] start processing
15:28:51 - cmdstanpy - INFO - Chain [1] done processing
15:28:51 - cmdstanpy - INFO - Chain [1] start processing
15:28:52 - cmdstanpy - INFO - Chain [1] done processing
15:28:52 - cmdstanpy - INFO - Chain [1] 

month,Model,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12,2025-01
0,Ford Transit Variants,112.18816,151.324423,128.538258,129.196785,111.06658,121.701577,110.224454,146.051472,147.193507,116.116876,87.14296,96.744916
1,Ford Focus Variants,163.126443,164.448689,134.538546,140.0315,128.640953,126.228614,138.660742,144.60561,152.617024,133.376598,110.240568,130.785681
2,ecosport,4.019359,38.013956,48.611323,46.655312,34.37375,32.892003,32.779725,29.781347,31.423313,30.91885,24.710211,21.963546
3,Ford Fiesta Variants,201.814121,238.416195,191.650702,179.282738,211.704768,194.14823,217.14465,248.102021,211.701246,157.201729,125.435946,162.002284
4,Ford Kuga Variants,149.910154,190.039588,149.537169,161.807129,152.753956,151.583448,136.258344,169.351284,140.87193,137.00379,127.351648,147.741837
5,s-max,50.554196,34.475922,36.951915,40.446771,44.981013,39.799948,39.241177,41.118027,38.148052,38.943297,41.564285,42.833946
6,Ford Ranger Variants,82.978191,104.499594,78.094509,84.713124,95.327196,75.971345,64.13007,87.725689,77.474639,75.361997,88.865133,99.186956
7,Other,246.084313,281.465994,238.721553,270.812848,256.676726,289.727643,285.835845,292.385346,287.195548,286.904229,280.521259,373.040366
8,c-max,1.206895,32.775467,36.627055,33.395748,35.736191,41.288757,35.55464,31.739202,35.779039,40.0164,38.29929,35.720352
9,Ford Mondeo Variants,36.582723,51.890542,40.94189,46.873617,38.045008,40.97275,46.090711,34.360619,42.820317,40.198533,37.311745,42.45507


In [40]:
#forecast_model_df = forecast_model_df.set_index('Model').astype(int).reset_index()

# Merging the DataFrames
df = forecast_model_df.merge(
    sales_data[['Make_new', 'Model_new']],
    left_on='Model',
    right_on='Model_new',
    how='left'
).drop_duplicates().drop('Model_new', axis=1).reset_index(drop=True)

# Reorder columns
new_column_order = ['Make_new', 'Model'] + [col for col in df.columns if col not in ['Make_new', 'Model']]
df = df[new_column_order].reset_index(drop=True)

df.loc[df['Model'] == 'Other', 'Make_new'] = 'Other' 

df.drop_duplicates(inplace=True)
df.dropna(inplace = True)
df = df.reset_index(drop=True)
df.iloc[17,0] = 'Other'
df.drop_duplicates(inplace=True)

# Display the result
df.head(50)

Unnamed: 0,Make_new,Model,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12,2025-01,Make
0,ford,Ford Transit Variants,112,151,128,129,111,121,110,146,147,116,87,96,ford
1,ford,Ford Focus Variants,163,164,134,140,128,126,138,144,152,133,110,130,ford
2,ford,ecosport,4,38,48,46,34,32,32,29,31,30,24,21,ford
3,ford,Ford Fiesta Variants,201,238,191,179,211,194,217,248,211,157,125,162,ford
4,ford,Ford Kuga Variants,149,190,149,161,152,151,136,169,140,137,127,147,ford
5,ford,s-max,50,34,36,40,44,39,39,41,38,38,41,42,ford
6,ford,Ford Ranger Variants,82,104,78,84,95,75,64,87,77,75,88,99,ford
7,Other,Other,246,281,238,270,256,289,285,292,287,286,280,373,ford
8,ford,c-max,1,32,36,33,35,41,35,31,35,40,38,35,ford
9,ford,Ford Mondeo Variants,36,51,40,46,38,40,46,34,42,40,37,42,ford
