# Predict Future Sales

Problem: Determine the least loss for a predictive model of future sales.

Reference: [Predict Future Sales](https://www.kaggle.com/c/competitive-data-science-predict-future-sales)


College: FASAM

Course: Postgraduate on Big Data and Machine Learning

Subject: Neura Network

Professor: Lucas Assis

Team:


*   Andryev Silva Lemes
*   Francisco Rosa Santana


# Libraries

In [1]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from datetime import datetime

from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout, Activation, Dense
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.optimizers import RMSprop

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


# Dataset

In [0]:
shops = pd.read_csv('https://drive.google.com/uc?authuser=0&id=1MaCeRxC1J1kJQqN9CdLmd36jjKuB7Tb7&export=download', encoding='UTF-8')
items = pd.read_csv('https://drive.google.com/uc?authuser=0&id=1fqcSlFw8UT2bDh7jwYblqYQi0eNl4Kgc&export=download', encoding='UTF-8')
itemsCategoria = pd.read_csv('https://drive.google.com/uc?authuser=0&id=1BnYi85_1DVquA89-rZfPE-XVZXaFjFWm&export=download', encoding='UTF-8')
salesTrain = pd.read_csv('https://drive.google.com/uc?authuser=0&id=1m11i2F6J1xxaN81YtRth0sARbyZeE_oZ&export=download', encoding='UTF-8')
test = pd.read_csv('https://drive.google.com/uc?authuser=0&id=1imHd3PoLtkOwYiV6teqXgZRfyCcH2bBd&export=download', encoding='UTF-8')

# Data cleaning

In [0]:
dataset = salesTrain[salesTrain.item_cnt_day > 0.0]

dataset = dataset[dataset.item_price > 0.0]

dataset = dataset[dataset.item_cnt_day < 800]

dataset = dataset[dataset.item_price < 48000]

dataset = dataset[['date_block_num', 'shop_id', 'item_id', 'item_cnt_day']]

dataset = dataset.groupby(['shop_id', 'date_block_num', 'item_id'], as_index=False)['item_cnt_day'].sum().rename(columns={'item_cnt_day':'item_cnt_month'})

dataset['item_cnt_month'] = dataset['item_cnt_month'].clip(0, 20)

dataset.sort_values(by=['date_block_num'], ascending=[False]).head(1)

x_data = dataset[['shop_id', 'item_id']]
y_data = dataset[['item_cnt_month']].fillna(0)

X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=42)


# Neural network model

In [4]:
batch_size = 2
nb_classes = 1
nb_epoch = 15

# create model
model = Sequential()
model.add(Dense(64, input_shape=X_train.shape[1:], activation='relu', kernel_initializer='normal'))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(nb_classes)) 
model.add(Activation('linear')) 

rms = RMSprop(lr=0.005)
model.compile(optimizer=rms, loss='mean_squared_error', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                192       
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
activation_1 (Activation)    (None, 32)                0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
__________

# Neural network training

In [None]:
checkpointer = ModelCheckpoint(filepath="model.h5", verbose=0, save_best_only=True, monitor='val_loss', mode='auto')

history = model.fit(X_train, y_train, epochs=nb_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_test, y_test), verbose=1, callbacks=[checkpointer]).history

# Results

In [None]:
score =  model.evaluate(X_test, y_test, verbose=0)

print('Loss:', score[0])