<a href="https://colab.research.google.com/github/jaidatta71/ML---Berkeley/blob/main/WALMART_Sales_forecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Walmart Sales Forecasting

### 1. Import Libraries

In [84]:
# Data handling
import pandas as pd
import numpy as np
# Viz
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
# Sklearn
from sklearn import model_selection, metrics

# Feature selection
!pip install eli5
import eli5
from eli5.sklearn import PermutationImportance
# Models
import xgboost as xgb
!pip install catboost
import catboost as cb
import lightgbm as lgb
from sklearn import linear_model, ensemble
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor

# Remove warnings
import warnings
warnings.filterwarnings('ignore')



### Load Data files

In [85]:
features = pd.read_csv('/content/drive/MyDrive/walmart/features/features.csv')
train = pd.read_csv('/content/drive/MyDrive/walmart/train/train.csv')
stores = pd.read_csv('/content/drive/MyDrive/walmart/stores.csv')
test = pd.read_csv('/content/drive/MyDrive/walmart/test/test.csv')
sample_submission = pd.read_csv('/content/drive/MyDrive/walmart/sampleSubmission/sampleSubmission.csv')

In [86]:
features.head()

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday
0,1,2/5/2010,42.31,2.572,,,,,,211.096358,8.106,False
1,1,2/12/2010,38.51,2.548,,,,,,211.24217,8.106,True
2,1,2/19/2010,39.93,2.514,,,,,,211.289143,8.106,False
3,1,2/26/2010,46.63,2.561,,,,,,211.319643,8.106,False
4,1,3/5/2010,46.5,2.625,,,,,,211.350143,8.106,False


In [87]:
feature_store = features.merge(stores, how='inner', on = "Store")

#train_df = train.merge(feature_store, how='inner', on = ['Store','Date','IsHoliday']).sort_values(by=['Store','Dept','Date']).reset_index(drop=True)
#test_df  = test.merge(feature_store,  how='inner', on = ['Store','Date','IsHoliday']).sort_values(by=['Store','Dept','Date']).reset_index(drop=True)

### Convert Date Columns to Date & Time

In [94]:
# Converting date column to datetime
feature_store['Date'] = pd.to_datetime(feature_store['Date'])
train['Date'] = pd.to_datetime(train['Date'])
test['Date']  = pd.to_datetime(test['Date'])

# Adding some basic datetime features
feature_store['Day'] = feature_store['Date'].dt.day
feature_store['Week'] = feature_store['Date'].dt.isocalendar().week
feature_store['Month'] = feature_store['Date'].dt.month
feature_store['Year'] = feature_store['Date'].dt.year

In [89]:
train_df = train.merge(feature_store, how='inner', on = ['Store','Date','IsHoliday']).sort_values(by=['Store','Dept','Date']).reset_index(drop=True)
test_df  = test.merge(feature_store,  how='inner', on = ['Store','Date','IsHoliday']).sort_values(by=['Store','Dept','Date']).reset_index(drop=True)

## EDA

In [None]:
train_df.describe().T

## Sales Data Analysis

### 1. Each year Sales on corresponding weeks
#### Sales remain relatively stable throughout the year, except a dip around week 42 and a subsequent resurgence during the holiday season


In [124]:

train_df.head()
#train_df.groupby('Week').sum()
df_weeks = train_df.groupby('Week').sum(numeric_only=True)

fig = px.line(data_frame = df_weeks, x=df_weeks.index, y='Weekly_Sales',
              template='simple_white',
              labels={'Weekly_Sales' : 'Total Sales', 'x' : 'Weeks'})

fig.update_layout(
    template=template,
    title={'text':'<b>Sales over the year across every week</b>', 'x': 0.075},
    xaxis=dict(tickmode='linear', showline=True),
    yaxis=dict(showline=True))

### Markdowns relationship with sales
#### Markdowns (MDs) play a significant role in boosting sales during the beginning and end of the year

In [131]:
fig = px.line(df_weeks, x=df_weeks.index, y=['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'Weekly_Sales'],
              template='simple_white',
              labels={'value' : 'Total Sales', 'x' : 'Weeks'})

fig.update_layout(
    template=template,
    title={'text':'<b>Impact of mark downs in Sales over the year across every week</b>', 'x': 0.075},
    xaxis=dict(tickmode='linear', showline=True),
    yaxis=dict(showline=True))