# Program 2 SALES FORECASTS

##### PURPOSE:  
This program implements a embedded neural network in tensorflow to perform a partner by partner
sales forecast.  This is not an inferential program although it measures accuracy against a test set in mean 
average percentage error.  

This is a demonstration/training program and is not production grade.

##### INPUT: 
Original Data And features developed in the prior programs.

##### OUTPUT: 
Sales forecasts and accutacy on a test set of known data 

In [85]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

Import standard python and sklearn libraries

In [86]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn_pandas import DataFrameMapper
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import os as os

Import tensorflow and tensorflow libraries

In [87]:
import tensorflow as tf
from tensorflow.keras import layers,callbacks,losses,optimizers,initializers,models,regularizers
from tensorflow.keras.layers import Dense,Dropout,BatchNormalization,Embedding,Flatten,concatenate,Input
from tensorflow.keras.callbacks import CSVLogger,ReduceLROnPlateau,ModelCheckpoint,EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.losses import mean_squared_error, mean_absolute_error,mean_absolute_percentage_error
from tensorflow.keras.optimizers import SGD,RMSprop,Adam,Adamax
from tensorflow.keras.initializers import RandomNormal,RandomUniform,TruncatedNormal
from tensorflow.keras.metrics import mean_absolute_percentage_error

Set seed for initializers

In [88]:
tf.set_random_seed(73)

Check for gpu and expect this output:

[
  name: "/cpu:0"device_type: "CPU",
  name: "/gpu:0"device_type: "GPU"
]


In [89]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3179301970092214904
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 5910947058565456372
physical_device_desc: "device: XLA_CPU device"
]


Read data file

In [90]:
dt = pd.read_pickle(os.getcwd() + '/Partner_Features.pkl')
dt.sort_values(by=['ID','Date'],inplace=True)
dt.reset_index(drop=True,inplace=True)

Identify categorical, continuous, data and target columns

In [91]:
def label_vars(dt):
    cat_vars = list(dt.columns[:31])+list(dt.columns[-3:-1])
    cont_vars = list(dt.columns[31:36])+list(dt.columns[36:50])+list(dt.columns[-7:-5])
    dep = ['Rev']
    date = ['Date']
    dt = dt[cat_vars + cont_vars + dep + date].copy()
    dt.sort_values(by=['ID','Date'],inplace=True)
    dt.reset_index(drop=True,inplace=True)
    return dt,cat_vars,cont_vars

In [92]:
df,cat_vars,cont_vars = label_vars(dt)

Create categorical embed maximum length,embedding dict, and categorical map function of labelencoder to set number of categories in each category feature.

In [93]:
def cat_data(df,cat_vars):
    cat_emb_max = [len(df[c].unique()) for c in cat_vars]
    cat_vars_dict = dict(zip(cat_vars,cat_emb_max))
    cat_map = [(c,LabelEncoder()) for c in cat_vars]
    return cat_vars_dict,cat_map

In [94]:
cat_vars_dict,cat_map = cat_data(df,cat_vars)

Create continuous data map function with minmaxwscaler and range default to 0,1

In [95]:
def cont_data(cont_vars,mn=0,mx=1):
    cont_map = [([c],MinMaxScaler(feature_range = (mn,mx),copy=False)) for c in cont_vars]
    return cont_map

In [96]:
cont_map = cont_data(cont_vars)

Fit map function to continuous and categorical features but do not apply transform until after data is split into train, validate and test.  This fits labels and scaled range to entire data set rather than train,validate and test separtely.  DataFrameMapper from sklearn-pandas will only transform the features by column label inluded in the category and continuous feature lists called cat_map or cont_map.

In [97]:
def vars_mapped(cat_map,cont_map,df):
    cat_mapper = DataFrameMapper(cat_map)
    cat_map_fit = cat_mapper.fit(df)
    cont_mapper = DataFrameMapper(cont_map)
    cont_map_fit = cont_mapper.fit(df)
    return cat_map_fit,cont_map_fit

In [98]:
cat_map_fit,cont_map_fit = vars_mapped(cat_map,cont_map,df)

Set train, test, validate sets with validation as one quarter of each year and test at last quarter

In [99]:
def split_data(df,vstart=2,tstart=1):
    dates = list(df.Date.unique())
    dates.sort()
    dates_validate = dates[-vstart:]
    #dates_test = dates[-tstart:]
    dates_train = dates[:-vstart]
    data = df.sort_values(by=['ID','Date'])
    data_train = data.loc[data.Date.isin(dates_train)]
    data_validate = data.loc[data.Date.isin(dates_validate)]
    return data_train,data_validate,data

In [100]:
data_train,data_validate,data=split_data(df)

Encode and scale data and reshape into array of vectors. 
___________________________________________________________________________________________________________
Since the input layer of the neural network is a horizontally concatenated layer of each categorical variable in its own embedding input shared with the continuous variables each in its own dense input the train, validate and test data needs to be reshaped into a list of vectors for each feature.  To keep the array in mixed dtypes (i.e., int and float), input data is a list of arrays with each element in the list being a vector for the shared input features.

In [101]:
def map_shape_data(data_train,data_validate,cat_map_fit,cont_map_fit):
    #set target variables
    y_tr = np.log(data_train.Rev.values).reshape(-1,1)
    y_val = np.log(data_validate.Rev.values).reshape(-1,1)
    #transform categorical data
    cat_train = cat_map_fit.transform(data_train).astype(np.int64)
    cat_validate = cat_map_fit.transform(data_validate).astype(np.int64)
    #transform continuous variables
    cont_train = cont_map_fit.transform(data_train).astype(np.float32)
    cont_validate = cont_map_fit.transform(data_validate).astype(np.float32)
    #combine categorical and continuous data into array of vectors
    data_tr = np.hsplit(cat_train,cat_train.shape[1])+np.hsplit(cont_train,cont_train.shape[1])
    data_val = np.hsplit(cat_validate,cat_validate.shape[1])+np.hsplit(cont_validate,cont_validate.shape[1])
    return y_tr,y_val,data_tr,data_val

In [102]:
y_tr,y_val,data_tr,data_val = map_shape_data(data_train,data_validate,cat_map_fit,cont_map_fit)

Function to create single input vector (input_shape = 1) for categorical input layer

In [103]:
def cat_input(feat,cat_vars_dict):
    name = feat[0]
    c1 = len(feat[1].classes_)
    c2 = cat_vars_dict[name]
    if c2 > 50:c2 = 50
    if c2 < 5:c2 = 5
    inp = Input(shape=(1,),dtype='int64',name=name + '_in')
    #no third dimension for a time distributed series so flattened into column of 1
    #embedding layer is map of number of classes (c) to number of embedded features (c2)
    u = Flatten(name=name+'_flt')(Embedding(c1,c2,input_length=1)(inp))
    return inp,u

Create list of Input,Flatten,and Embedding layers for the categorical features

In [104]:
embs = [cat_input(feat,cat_vars_dict) for feat in cat_map_fit.features]

The deprecation warning is an incompatibility between keras and tensorflow.keras.  The error message is an outstanding bug in tensorflow and does not occur in keras.  Tensorflow has an open issue report regarding this error message.

Function to create Input and Dense layer for continuous features

In [105]:
def cont_input(feat):
    name = feat[0][0]
    inp = Input((1,), name=name+'_in')
    d = Dense(1, name = name + '_d')(inp)
    return inp,d

Create list of Input and Dense layers for continuous features

In [106]:
conts = [cont_input(feat) for feat in cont_map_fit.features]

Build a four layer model using a shared input layer for the categorical and continuous variables.  The hideen 2 layers are high node counts because sample count in input data is large. 

In [107]:
def embed_model(conts,embs):
    #concatenate the inputs and embedded layers with the inputs and continuous dense layers
    #referred to as 'shared layers' in tensorflow.keras documentation
    x = concatenate([emb for inp,emb in embs] + [d for inp,d in conts])
    #apply L2 normalization using the BatchNormalization method on continuous features
    x = Dense(128, activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
    #apply small dropout for first normalization
    x = Dropout(rate=0.6)(x)
    #apply additional L2 normalization using the BatchNormalization method
    x =	BatchNormalization()(x)
    x = Dense(128, activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
    #apply small dropout for normalization
    x =	Dropout(rate=0.6)(x)
    #apply L2 normalization using the BatchNormalization method
    x = BatchNormalization()(x)
    x = Dense(64,activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)  
    x =	Dropout(rate=0.6)(x)
    #apply L2 normalization using the BatchNormalization method
    x = BatchNormalization()(x)
    x = Dense(1, activation='relu',kernel_initializer='uniform',bias_initializer='zeros')(x)
    model = Model([inp for inp,emb in embs] + [inp for inp,d in conts], x)
    model.compile(optimizer='Adam',loss='mean_absolute_error',metrics=['mape'])
    return model

Implement logger,reduce the learning rate when loss function change gets small,add early stopping and build model

In [108]:
csv_logger = CSVLogger('Partner_Error.csv')
rlr = ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=5,min_lr=0.0001)
mc = ModelCheckpoint('Partner_Best_Model',save_best_only=True)
model = embed_model(conts,embs)

This next process is cpu/gpu intensive.  This code should be run on a gpu.

In [109]:
model.fit(data_tr,y_tr,batch_size=64,epochs=25,verbose=1,validation_data = (data_val,y_val),callbacks=[csv_logger,rlr,mc])

Train on 7000 samples, validate on 1000 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7f9538ef7b38>

In [110]:
model.load_weights('Partner_Best_Model')

In [111]:
def prediction(model_data,model=model):
    pred = model.predict(model_data)
    return pred

In [112]:
pred_tr = prediction(data_tr)
pred_val = prediction(data_val)

In [113]:
def array_to_list(arr):
    listed = [item for sublist in arr for item in sublist]
    return listed

In [114]:
def results_to_dataframe(df,pred_tr,pred_val,y_tr,y_val):
    pred_tr = array_to_list(pred_tr)
    pred_val = array_to_list(pred_val)
    preds = pred_tr + pred_val
    actuals = list(y_tr) + list(y_val)
    dr = pd.DataFrame()
    dr['Date'] = df.Date
    dr['ID'] = df.ID
    dr['Actual'] = actuals
    dr['Predict'] = preds
    dr = dr.loc[:,['ID','Date','Actual','Predict']]
    dr.to_pickle('Scaled_Predictions_Qtr.pkl')
    return

In [115]:
results_to_dataframe(df,pred_tr,pred_val,y_tr,y_val)

In [1]:
def validation_accuracy(y_val,pred_val):
    mean_squared_error=mse(pred_val,y_val)
    mean_absolute_error=mape(pred_val,y_val)
    mean_absolute_percentage_error=(mape/y_val)*100
    return mean_squared_error,mean_absolute_error,mean_absolute_percentage_error

##### End of code: Close this file using File 'Close and Halt' from dropdown menu