In [52]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Loading the dataset
data = pd.read_csv('sales_data.csv')

# Preprocessing
data['date'] = pd.to_datetime(data['date'])
data.sort_values(by=['Item Id', 'date'], inplace=True)

# Feature Engineering
data['day_of_week'] = data['date'].dt.dayofweek
data['month'] = data['date'].dt.month
data['year'] = data['date'].dt.year
data.dropna(subset=['ad_spend'],inplace=True)

# function to create lag features
def create_lag_features(df, lags, column):
    for lag in lags:
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

# Applying lag features for 'units' column
grouped = data.groupby('Item Id')
data = grouped.apply(lambda x: create_lag_features(x, [1, 2, 3], 'units')).reset_index(drop=True)
data.dropna(inplace=True)

# function to build and train the SARIMAX model
def train_sarimax(train, order, seasonal_order):
    exog_train = train[['ad_spend', 'units_lag_1', 'units_lag_2', 'units_lag_3']]
    y_train = train['units']
    model = SARIMAX(y_train, 
                    exog=exog_train,
                    order=order, 
                    seasonal_order=seasonal_order)
    model_fit = model.fit(disp=False)
    return model_fit



In [None]:
# parameters for SARIMAX
order = (1, 1, 1)
seasonal_order = (1, 1, 1, 12)

# submission DataFrame
submission_list = []

# Grouping by Item Id for predictions
grouped = data.groupby('Item Id')
for item_id, group in grouped:
    train = group[group['date'] < '2023-01-01']
    test = group[group['date'] >= '2023-01-01']
    
    if len(train) < 10 or len(test) < 1:
        continue
    
    model_fit = train_sarimax(train, order, seasonal_order)
    
    exog_test = test[['ad_spend', 'units_lag_1', 'units_lag_2', 'units_lag_3']]
    
    # Forecasting the number of units sold
    predictions = model_fit.forecast(steps=len(test), exog=exog_test)
    
    # Append to submission list
    for date, item_id, prediction in zip(test['date'], test['Item Id'], predictions):
        submission_list.append([ID, int(round(prediction))])

# Converting submission list to DataFrame
submission_df = pd.DataFrame(submission_list, columns=['ID', 'TARGET'])

# Saving the submission file
submission_df.to_csv('result.csv', index=False)
