# Store Sales Time Series Forecasting

We are given the historical sales data of Corporación Favorita, a large Ecuadorian-based grocery retailer. The task is to make unit sales predictions for thousands of items sold at different Favorita stores. 

In this notebook, we try to explore the given data...

In [None]:
! pip install ipynbname
import sys
sys.path.append("../input/chartart") 
from plot import Figure, Group, Post
import numpy as np 
import pandas as pd
from geopy.geocoders import Nominatim
from sklearn import preprocessing

# What is the aggregate sales of Each Store?

In [None]:
md1 = Figure('md1')
md1.markdown('# What is the aggregate sales of Each Store?')

trainDf = pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/train.csv')
trainDf.head()

In [None]:
salesByStore = trainDf.groupby(by='store_nbr')['sales'].sum()
sb = Figure('salesByStore',"Total Sales by Store")
sb.bar(salesByStore.index.tolist(),salesByStore.values.tolist(),c='#21a698')
sb.set_x_label('Store Number')
sb.set_y_label('Total Sales')
sb.show()

# Which are the most sold item categories?

In [None]:
md2 = Figure('md2')
md2.markdown('# Which are the most sold item categories?')
salesByItem = trainDf.groupby(by='family')['sales'].sum()
ib = Figure('itemBar',"Total Sales by Item")
ib.pie(salesByItem.values.tolist(),salesByItem.index.tolist())
ib.show()

# Does Holiday have an impact on sales?

In [None]:
md3 = Figure('md3')
md3.markdown('# Does Holiday have an impact on sales?')

holidayDf = pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/holidays_events.csv')
holidayDf = holidayDf[holidayDf.locale == 'National']
holidayDf.head()

In [None]:
sampleTS1 = trainDf[((trainDf.store_nbr==1)&(trainDf.family=='GROCERY I'))]
sampleTS2 = sampleTS1[sampleTS1.date.isin(holidayDf.date.to_list())]

tsPlot = Figure('timeseries1',"Store 1 ; Grocery I")
tsPlot.line('date','sales',sampleTS1,c='#7e943b',labels='Store 1 Sales')
tsPlot.scatter('date','sales',sampleTS2,labels='Holidays')
#tsPlot.line('date','sales',sampleTS2,c='#802b77',labels='Store 2')
tsPlot.show()

# Let us explore the various store categories and location.

In [None]:
md4 = Figure('md4')
md4.markdown('# Let us explore the various store categories and location.')

def get_coordinates(row):
    place = row['city']+' , '+row['state']
    geolocator = Nominatim(user_agent="testapp")
    location = geolocator.geocode(place)
    return([location.latitude, location.longitude])

storesDf = pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/stores.csv')
storesDf['coordinates'] = storesDf.apply(lambda x: get_coordinates(x),axis=1)
storesDf.head()

In [None]:
storeType = storesDf.type.value_counts()
storeTypePie = Figure('storeTypePie',"Store Types")
storeTypePie.pie(storeType .values.tolist(),storeType .index.tolist())
storeTypePie.show()

In [None]:
marker = []
for index,row in storesDf.iterrows():
    coord = row['coordinates']
    marker.append({"latitude": coord[0], "longitude": coord[1], "color": "#75165a", "toolTip": row['city'], "icon": "location", "width": 10.0, "height": 10.0})
    
data = [{"Stores": 'Total Stores: '+str(len(storesDf)),"Country": "Ecuador"}]
shapeColorMapper = [{"value":'Total Stores: '+str(len(storesDf)), "color": "#b1e3d2"}]

brc = Figure(chart_id='storesMap', title='Store Locations\n')
brc.map('world', data, 'Country', zoomLevel=100.0, focalLatitude=-0.2201641 , focalLongitude=-78.5123274, marker=marker, shapeColorValueMapper= 'Stores', shapeColorMapper= shapeColorMapper,)
brc.show()

# Let us explore whether oil prices affect the store prices?

In [None]:
md5 = Figure('md5')
md5.markdown('# Let us explore whether oil prices affect the store prices?')

oilDf = pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/oil.csv').dropna()
oilLp = Figure('oilLinePlt','Oil Prices')
oilLp.line('date','dcoilwtico',oilDf,c='#2b3c66',labels='Oil Prices')
oilLp.set_y_label('Price')
oilLp.set_x_label('Date')
oilLp.show()

In [None]:
def normalize(lst):
    return ((lst-np.mean(lst))/np.std(lst))*100

oilDf = pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/oil.csv').dropna()
autoDf = trainDf[trainDf.family=='AUTOMOTIVE'].groupby('date')['sales'].sum()
grocery1Df = trainDf[trainDf.family=='GROCERY I'].groupby('date')['sales'].sum()
grocery2Df = trainDf[trainDf.family=='GROCERY II'].groupby('date')['sales'].sum()

oilLp1 = Figure('oilLinePlt1','Normalized Prices')
oilLp1.line(autoDf.index.tolist(),normalize(autoDf.values.tolist()),labels='Automotive')
oilLp1.line(grocery1Df.index.tolist(),normalize(grocery1Df.values.tolist()),labels='Grocery I')
oilLp1.line(grocery2Df.index.tolist(),normalize(grocery2Df.values.tolist()),labels='Grocery II')
oilLp1.line('date','dcoilwtico',oilDf,c='#2b3c66',labels='Oil Prices')
oilLp1.set_y_label('Price')
oilLp1.set_x_label('Date')
oilLp1.show()

# Creating ChartArt Post

In [None]:
'''
post = Post('kagglePost',"Store Sales Time Series Forecasting")
for fig in [md1,sb,md2,ib,md3,tsPlot,md4,storeTypePie,brc,md5,oilLp,oilLp1]:
    post.add(fig)
post.preview()
'''