In [None]:
# Global Imports

# data io and normalization
import pandas as pd
import numpy as np

# modelling
import tensorflow as tf
import sklearn

# plotting and visualizations
import seaborn as sns
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt


# system
import sys
import os

In [None]:
# Global Variables

BATCH_SIZE = 64
TRAIN_EPOCHS = 100
DATA_PATH = '../input/m5-forecasting-accuracy/'
TRAIN_PATH = ''
TRAIN_RATIO = 0.9

In [None]:
# Utility Functions

# Form Weekly, Monthly And Yearly Batches
def form_batch(data, period):
    batched = []
    for i in range(int(len(data)/period)+1):
        batch = data[i*period:(i+1)*period]
        if len(batch) < period:
            batch += [0 for _ in range(period - len(batch))]
        batched.append(batch)

    batched = np.mean(batched, axis=1)
    return batched

# seeing trends in sales of items
def plot_data(data):
    weekly = form_batch(data, 7)
    monthly = form_batch(data, 30)
    yearly = form_batch(data, 365)

    fig, axes = plt.subplots(1, 4)

    fig.set_figwidth(25)
    plt.tight_layout(pad=3.0)
    axes[0].plot(item['sales'])
    axes[1].plot(weekly)
    axes[2].plot(monthly)
    axes[3].plot(yearly)

    plt.show()

In [None]:
# Loading Data

all_files = os.listdir(DATA_PATH)

cal = pd.read_csv(DATA_PATH+'calendar.csv')
sp = pd.read_csv(DATA_PATH+'sell_prices.csv')
stv = pd.read_csv(DATA_PATH+'sales_train_validation.csv')
submission = pd.read_csv(DATA_PATH+'sample_submission.csv')

In [None]:
# Defining Data And Getting Useful Feature Columns

'''
ignoring the event columns as 92% of data doesn't contain event information, 
but will compare their effect later.
'''
calendar_columns = ['date', 'd', 'wm_yr_wk', 'snap_CA', 'snap_TX', 'snap_WI']
sell_price_columns = ['store_id', 'item_id', 'wm_yr_wk', 'sell_price']
day_ids = [col for col in list(stv.columns) if 'd_' in col]
train_days = day_ids[:int(len(day_ids)*TRAIN_RATIO)]
valid_days = day_ids[len(train_days):]
item_columns = ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']

In [None]:
# Creating A Single Dataframe With All The Information For Training And Validation

sale_prices = sp[[*sell_price_columns]]

train_data = stv[[*item_columns]]
train_data['sales'] = stv[[*train_days]].values.tolist()
train_data['prices'] = stv.apply(
                            lambda row: sale_prices.iloc[
                                                        (sale_prices['item_id']==row[0])&\
                                                        (sale_prices['store_id']==row[4])
                                                    ]['sell_price']
                        )

valid_data = stv[[*item_columns]]
valid_data['sales'] = stv[[*valid_days]].values.tolist()
valid_data['prices'] = None

In [None]:
train_data.head()

In [None]:
# Getting Information 
item = train_data.iloc[2]

In [None]:
# EDA