In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

 

# Import files 
merged=pd.read_csv('/kaggle/input/merged-new/merged_train-2.csv')
df_test=pd.read_csv('/kaggle/input/merged-new/merged_test-3.csv')
sampl_subm=pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/sample_submission.csv')

In [None]:
# Select only some columns
merged=merged[['date','day_of_week',
       'store_nbr', 'oil',
       'hol_Nat',  'hol_Reg', 'hol_Loc',
       'hol_type_Additional', 'hol_type_Bridge',
       'hol_type_Event', 'hol_type_Holiday', 'hol_type_Transfer',
       'hol_type_Work Day', 'family', 'onpromotion','sales']]

df_test=df_test[['id','date','day_of_week',
       'store_nbr', 'oil',
       'hol_Nat',  'hol_Reg', 'hol_Loc',
       'hol_type_Additional', 'hol_type_Bridge',
       'hol_type_Event', 'hol_type_Holiday', 'hol_type_Transfer',
       'hol_type_Work Day', 'family', 'onpromotion']]

In [None]:
# Define the  list of regressors
regressors=['day_of_week', 'oil',
       'hol_Nat',  'hol_Reg', 'hol_Loc',
       'hol_type_Additional', 'hol_type_Bridge',
       'hol_type_Event', 'hol_type_Holiday', 'hol_type_Transfer',
       'hol_type_Work Day', 'onpromotion']

### Prophet

In [None]:
import logging
import cmdstanpy
from itertools import product
from tqdm import tqdm
from prophet import Prophet

# Suppress cmdstanpy logs
logging.getLogger("cmdstanpy").setLevel(logging.WARNING)

# Ensure 'date' is in datetime format
merged['date'] = pd.to_datetime(merged['date'], errors='coerce')

# Rename and select columns
train_new = merged.rename(columns={'date': 'ds', 'sales': 'y'})
 

# Initialize the dictionary to store the models
models = {}

# Get the unique categories and store numbers
families = train_new['family'].unique()
store_nbrs = train_new['store_nbr'].unique()

# Initialize the progress bar
progress_bar = tqdm(product(families, store_nbrs), total=len(families) * len(store_nbrs), leave=False)

for fam, str_nbr in progress_bar:
    # Update the progress bar with the current family and store
    progress_bar.set_description(f"Processing family {fam}, store {str_nbr}")
    
    # Filter the dataframe for the specific category and store number
    df_group = train_new[(train_new['family'] == fam) & (train_new['store_nbr'] == str_nbr)]
    
    # Drop rows where 'y' is NaN
    df_group = df_group.dropna(subset=['y'])
    
    # Check if we have enough rows for Prophet
    if df_group.shape[0] > 1:
        # Initialize the Prophet model
        model = Prophet()
        for reg in regressors: 
            model.add_regressor(reg)
     
        
        # Fit the model to the category-store specific data
        model.fit(df_group)
        
        # Store the fitted model in the dictionary with category and store number as key
        models[(fam, str_nbr)] = model
    else:
        progress_bar.write(f"Skipping {fam} and store {str_nbr} due to insufficient data.")

In [None]:
predictions = []  # Initialize the list to store predictions
df_test['date']=pd.to_datetime(df_test['date'], errors='coerce')
df_test= df_test.rename(columns={'date': 'ds'})

for fam, str_nbr in product(families, store_nbrs):
    # Filter the test data for the specific family and store number
    df_group_test = df_test[
        (df_test['family'] == fam) & (df_test['store_nbr'] == str_nbr)
    ].copy()
    
    # Check if test data is available
    if df_group_test.empty:
        print(f"No test data found for family: {fam} and store number: {str_nbr}")
        continue

    # Check if the corresponding model exists
    if (fam, str_nbr) in models:
        model = models[(fam, str_nbr)]  # Get the trained model for the family and store
        
        # Prepare the input data for prediction
        df_group_test['sales']=0
        # Make predictions for the specific combination
        forecast = model.predict(df_group_test)
        #print('Forecast:', forecast.iloc[1], 'fam:', fam, 'store_nbr:', str_nbr)
        forecast = forecast.reset_index(drop=True)
        df_group_test = df_group_test.reset_index(drop=True)
        #print(forecast['yhat'])
        # Add the predictions to the dataframe
        df_group_test['sales'] = forecast['yhat']
        #print(df_group_test['yhat'])
        # Store the predictions in the result DataFrame
        predictions.append(df_group_test[['id','ds', 'sales', 'family', 'store_nbr']])
    else:
        print(f"No model found for family: {fam} and store number: {str_nbr}")

# Combine predictions into a single DataFrame
predictions_df = pd.concat(predictions, ignore_index=True)

# Display  the predictions
print(predictions_df)

In [None]:
# NO negative sales
predictions_df.loc[predictions_df['sales']<0,'sales']=0

In [None]:
# Making 0 sales for some stores that are not selling for a month 

last_month = merged['date'].max() - pd.DateOffset(months=1)

# Step 1: Identify store-family combinations with no sales in the last month
no_sales_combinations = merged.loc[merged['date'] >= last_month].groupby(['store_nbr', 'family'])['sales'].sum()
no_sales_combinations = no_sales_combinations[no_sales_combinations == 0].reset_index()[['store_nbr', 'family']]

# Step 2: Set sales to 0 in df_test for the identified combinations
for _, row in no_sales_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    predictions_df.loc[(predictions_df['store_nbr'] == store_nbr) & (predictions_df['family'] == family), 'sales'] = 0

In [None]:
submission=predictions_df[['id','sales']]
submission

In [None]:
submission.to_csv('/kaggle/working/submission.csv', index=False)