<a href="https://colab.research.google.com/github/jeffheaton/present/blob/master/WUSTL/CABI-Demand/demand_eda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Washington University [Olin School of Business](https://olin.wustl.edu/EN-US/Pages/default.aspx)
[Center for Analytics and Business Insights](https://olin.wustl.edu/EN-US/Faculty-Research/research-centers/center-analytics-business-insights/Pages/default.aspx) (CABI)  
[Deep Learning for Demand Forecasting](https://github.com/jeffheaton/present/tree/master/WUSTL/CABI-Demand)  
Copyright 2022 by [Jeff Heaton](https://www.youtube.com/c/HeatonResearch), Released under [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) 

# Exploratory Data Analysis (EDA)

In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    COLAB = True
    print("Note: using Google CoLab")
except:
    print("Note: not using Google CoLab")
    COLAB = False

In [None]:
import pandas as pd
import os

PATH = "/content/drive/MyDrive/projects/demand/"

df_sales = pd.read_csv(os.path.join(PATH,"sales_train.csv"))
df_items = pd.read_csv(os.path.join(PATH,"items.csv"))
df_resturant = pd.read_csv(os.path.join(PATH,"resturants.csv"))

df_sales.date = pd.to_datetime(df_sales.date, errors='coerce') 
df_sales['weekday'] = df_sales.date.dt.day_name()

start_date = df_sales.date.min()
df_sales['week'] = (((df_sales.date - start_date).dt.days)/7).astype(int)

In [None]:
import pandas as pd

#df_sales[['date','item_count']].groupby(['date']).sum().plot(kind='bar',figsize=(16,2))
df_plot = df_sales[['date','item_count']].groupby(['date']).sum().reset_index()
ax=df_plot.plot(x="date",figsize=(20,4),kind="line")

In [None]:
day_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

df_plot = df_sales[['weekday','item_count']].groupby(['weekday']).sum().reset_index()
print(df_plot)
ax = df_plot.set_index("weekday").loc[day_order].plot(kind="bar", legend=False)

In [None]:
df_sales_single_year = df_sales[df_sales['date'].dt.year == 2020]
df_plot = df_sales_single_year[['date','item_count']].groupby(['date']).sum().reset_index()     #.groupby([['date','item_id']]) #.sum().reset_index()
ax = df_plot.plot(x="date",figsize=(200,40),kind="bar",width=1.0)

In [None]:
#from google.colab import files
#files.download("test.csv")

# Most Popular Items

In [None]:
# df_sales[['date','item_count']].groupby(['date']).sum().reset_index()
df_sales.item_id.unique()

In [None]:
df2 = df_sales[['item_id','item_count']].groupby(['item_id']).sum().reset_index().sort_values('item_count',ascending=False)
df_items2 = df_items[['id','name']]
df_items2.columns = ['item_id','name']
df_popular = df2.merge(df_items2)
print(df_popular.to_string())

# Individual Item Plots

In [None]:
df_plot = df_sales_single_year[['date','item_id','item_count']].groupby(['date','item_id']).sum().reset_index()
df_plot = df_plot.pivot(index=['date'],columns='item_id',values='item_count').reset_index()
df_plot.columns.name = None
df_plot.plot(x="date",figsize=(200,40),kind="bar",width=1.0, stacked=True)

In [None]:
df_plot = df_sales_single_year[['week','item_id','item_count']].groupby(['week','item_id']).sum().reset_index()
df_plot = df_plot.pivot(index=['week'],columns='item_id',values='item_count').reset_index()
df_plot.columns.name = None
df_plot.plot(x="week",figsize=(100,40),kind="bar",width=1.0, stacked=True)

In [None]:
day_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

df_plot = df_sales_single_year[['weekday','item_id','item_count']].groupby(['weekday','item_id']).sum().reset_index()
df_plot = df_plot.pivot(index=['weekday'],columns='item_id',values='item_count').reset_index()
df_plot.columns.name = None
ax = df_plot.set_index('weekday').loc[day_order].plot(kind="bar",width=1.0, stacked=True, legend=False)

# Scratch Area

https://stackoverflow.com/questions/69724325/python-stacked-bar-plot-based-on-values-from-multiple-columns-within-the-same-d

In [None]:
df_plot