In [32]:
#This notebook is intended to experiment the model on one customer with 
# model parameters to get best optimised model
#Loading the libraries
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten    
from keras.layers import LSTM
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from sklearn.preprocessing import MinMaxScaler
from keras import optimizers

# Reading csv file
#customers timeseries updated has data for 100 customers
#df = pd.read_csv("customers_timeseries_updated.csv")
# part-00000-581ed144-c0f2-45b7-b743-a35fb688f431-c000.csv dataset has 
# data for 1000 customers
df = pd.read_csv("part-00000-581ed144-c0f2-45b7-b743-a35fb688f431-c000.csv")
#only using required columns for modelling
df = df[['client_debtor_number','dates','fv_cost']]
# number of unique customers in the data
len(df['client_debtor_number'].unique())

1000

In [33]:
# subsetting one customer data from the set
# 1015193 is customer from cluster 1 with 130 customers
#df_1015130 = df[df['client_debtor_number'] == 1015193]
# 816580 is customer 
df_cust = df[df['client_debtor_number'] == 8165580]
del df_cust['client_debtor_number']


In [34]:
# splitting train and test datasets 
#training 2010-2017 years data, testing on 2018 data\
train_x = df_cust[df_cust['dates'].str.contains('/2018') == False]
test_x = df_cust[df_cust['dates'].str.contains('/2018') == True]
train_x.reset_index(drop=True,inplace=True)
test_x.reset_index(drop=True,inplace=True)


In [59]:
# lag_feature function is used to get new column in the dataframe with 
#lagged data,number of lags can be given as function parameter 
def lag_feature(df, lag=1):
    if not type(df) == pd.DataFrame:
        df = pd.DataFrame(df, columns=['fv_cost'])
    
    def rename_lag(ser, j):
        ser.name = ser.name + f'_{j}'
        return ser
        
    # add a column lagged by `i` steps
    for i in range(1, lag + 1):
        df = df.join(df.fv_cost.shift(i).pipe(rename_lag, i))

    df.dropna(inplace=True)
    return df

# Prepare training data function is used to scale the fv_cost values 
# between -1 to 1 and calls lag_feature to create lag columns 
def prepare_training_data(series_data, lag):
    " Converts a series of data into a lagged, scaled sample."
    # scale training data
    scaler = MinMaxScaler(feature_range=(-1, 1))
    #cost_vals = scaler.fit_transform(series_data.values.reshape(-1, 1))
    cost_vals = scaler.fit_transform(series_data.values.reshape(-1, 1))
    # convert series to lagged features
    cost_lagged = lag_feature(cost_vals, lag=lag)

    # X, y format taking the first column (original time series) to be the y
    X = cost_lagged.drop('fv_cost', axis=1).values
    y = cost_lagged.fv_cost.values
    
    # keras expects 3 dimensional X
    X = X.reshape(X.shape[0], 1, X.shape[1])
    
    return X, y, scaler


#sorting train and test data based on dates column values
train_x['dates'] = pd.to_datetime(train_x['dates'],format= '%d/%m/%Y')
train_x.sort_values(by='dates',inplace=True)
test_x['dates'] = pd.to_datetime(test_x['dates'],format= '%d/%m/%Y')
test_x.sort_values(by='dates',inplace=True)

# create train and test datasets in a format required by the model
train_x_cust,train_y_cust, scalar_train = prepare_training_data(train_x['fv_cost'], 3)
test_x_cust,test_y_cust,scalar_test = prepare_training_data(test_x['fv_cost'], 3)
print(train_x_cust.shape)
print(train_y_cust.shape)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(1017, 1, 3)
(1017,)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [105]:
# Model description
# lag is number of lags used to prepare the data
lag =  3

# model parameters
num_neurons = 50 #number of neurons/nodes for the layer
# actually we can give data in batches to the model
# number of samples in the data should be divisible by batch size
# we are giving all data for a customer as one batch
batch_size = 1  
# input_shape as required by LSTM layer
batch_input_shape=(batch_size, 1, lag)
# dropout rate used in dropout layer
dropout_rate =0.2
# instantiate a sequential model
model = Sequential()

#add convolution layer
# input_shape=(3,1) 3 lags columns and 1 y value
# when strides>1 you cannot have dilation>1
# activation_func options ['softmax', 'softplus', 'softsign', 'relu', 
#                   'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
# in conv1D layer we can modify filters,kernel_size,strides and activation
# parameters
model.add(Conv1D(filters=num_neurons,batch_size=1, kernel_size=3,
                 strides=3, 
                 padding="same",activation='linear',dilation_rate=1, 
                 input_shape=(1, 3),data_format='channels_first'))
# maxpooling layers tries to reduce the features by taking maximum value for
# window/pool size selected,strides and pool_size can be changed
model.add(MaxPooling1D(pool_size=3,strides=3, padding="same"))
model.add(Dropout(dropout_rate))
# add LSTM layer - stateful MUST be true here in 
# order to learn the patterns within a series
model.add(LSTM(units=num_neurons, 
              batch_input_shape=batch_input_shape, return_sequences=False,# as we only want last hidden output 
              stateful=True))

# followed by a dense layer with a single output for regression
#model.add(Dense(16,activation='linear'))
#model.add(Dense(18,activation='exponential'))
model.add(Dense(1,activation='linear'))
# we can add dropoutlayer after dense as well again
#optimizer function options 
#['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
adam = optimizers.Adam(lr=0.01, decay=0.01) 
sgd = optimizers.SGD(lr=0.01, momentum=0.0, decay=0.01, nesterov=False)
adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
#compile the model
# give the name directly if you want to use function with default parameters
model.compile(loss='mean_squared_error', optimizer='RMSprop')
#if you want manual edited parameters you can use below code
#adam = optimizers.Adam(lr=0.01, decay=0.01)
#model.compile(loss='mean_squared_error', optimizer=adam)

In [106]:
# fit the model
model.fit(train_x_cust, train_y_cust, epochs=10, batch_size=batch_size, verbose=2, shuffle=False)


Epoch 1/10
 - 18s - loss: 0.0182
Epoch 2/10
 - 14s - loss: 0.0161
Epoch 3/10
 - 14s - loss: 0.0164
Epoch 4/10
 - 14s - loss: 0.0169
Epoch 5/10
 - 14s - loss: 0.0166
Epoch 6/10
 - 14s - loss: 0.0167
Epoch 7/10
 - 14s - loss: 0.0165
Epoch 8/10
 - 14s - loss: 0.0169
Epoch 9/10
 - 14s - loss: 0.0173
Epoch 10/10
 - 14s - loss: 0.0219


<keras.callbacks.History at 0x1a400d6ba8>

In [107]:
# Evaluate model performance
# default batch size is 32 so we need to give batch size explicitly 
# if we want different batch size or 
# input size should be divisible by batch size for stateful LSTM
trainScore = model.evaluate(train_x_cust,train_y_cust,batch_size=1,verbose=2)
print('Train Score: %.2f MSE (%.2f RMSE)' % 
      (trainScore, sqrt(trainScore)))
testScore = model.evaluate(test_x_cust,test_y_cust,batch_size=1,verbose=2)
print('Test Score: %.2f MSE (%.2f RMSE)' % 
      (testScore,sqrt(testScore)))

# Model Predicitions
trainPredict = model.predict(train_x_cust,batch_size=1)
testPredict = model.predict(test_x_cust,batch_size=1)
# use these values when difference lag values are used for model
#yhat_act_test = [scalar_test.inverse_transform(testPredict)[i-lag]+diff_test[i]
#                 for i in range(lag,len(diff_test))]

#yhat_act_train = [scalar_train.inverse_transform(trainPredict)[i-lag]+diff_train[i]
#                  for i in range(lag,len(diff_train))]

# Getting inverse scaled values for the predicted values 
yhat_act_test = scalar_test.inverse_transform(testPredict)
yhat_act_train = scalar_train.inverse_transform(trainPredict)
print("scaled test rmse ",sqrt(mean_squared_error(test_y_cust,testPredict)))
print("scaled train rmse ",sqrt(mean_squared_error(train_y_cust,trainPredict)))
print ("actual test rmse ",sqrt(mean_squared_error(test_x.fv_cost[-(len(yhat_act_test)):], yhat_act_test)))
print ("actual train rmse ",sqrt(mean_squared_error(train_x.fv_cost[-(len(yhat_act_train)):],yhat_act_train)))


Train Score: 0.04 MSE (0.19 RMSE)
Test Score: 0.19 MSE (0.44 RMSE)
scaled test rmse  0.438323752274294
scaled train rmse  0.19371055505121876
actual test rmse  0.219161876137147
actual train rmse  1.3858194970365958


Extra Code

In [None]:
#Extra code to use difference values
# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        #cannot take log here because of NAN's errors
        value = (dataset[i] - dataset[i - interval])/(1+abs(dataset[i - interval]))
        diff.append(value)
    return diff
 
# invert differenced forecast
def inverse_difference(last_ob, value):
    return value + last_ob

#sorting data based on dates
train_x['dates'] = pd.to_datetime(train_x['dates'],format= '%d/%m/%Y')
train_x.sort_values(by='dates',inplace=True)
test_x['dates'] = pd.to_datetime(test_x['dates'],format= '%d/%m/%Y')
test_x.sort_values(by='dates',inplace=True)

#diff_train = np.array(difference(train_x['fv_cost']))
#diff_test = np.array(difference(test_x['fv_cost']))
#train_x_cust,train_y_cust, scalar_train = prepare_training_data(diff_train, 3)
#test_x_cust,test_y_cust,scalar_test = prepare_training_data(diff_test, 3)
train_x_cust,train_y_cust, scalar_train = prepare_training_data(train_x['fv_cost'], 3)
test_x_cust,test_y_cust,scalar_test = prepare_training_data(test_x['fv_cost'], 3)

print(train_x_cust.shape)
print(train_y_cust.shape)

Gridsearch CV experiement

In [17]:
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

# Function to create model, required for KerasRegressor
def create_model(optimizer='adam',strides_num =3,kernel_size =3,num_neurons=1,
                 activation_func ='relu',dropout_rate = 0.2):
    lag =  3
    num_neurons = 24
    batch_size = 1  # this forces the lstm to step through each time-step one at a time
    batch_input_shape=(batch_size, 1, lag)
    model = Sequential()
    model.add(Conv1D(filters=num_neurons,batch_size=1, kernel_size=kernel_size, strides=strides_num, 
                     padding="same",activation=activation_func,dilation_rate=1, input_shape=(1, 3),
                     data_format='channels_first'))
    model.add(MaxPooling1D(pool_size=3,strides=3, padding="same"))
    model.add(Dropout(dropout_rate))

    model.add(LSTM(units=num_neurons, 
              batch_input_shape=batch_input_shape, return_sequences=False,# as we only want last hidden output 
              stateful=True))

    # followed by a dense layer with a single output for regression
    model.add(Dense(1))
    # we can add dropoutlayer after dense as well again
    model.compile(loss='mean_absolute_error', optimizer=optimizer)
    return model


model = KerasRegressor(build_fn=create_model, verbose=2,batch_size = 1,epochs=5)


In [19]:
#optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
#activation_func = ['softmax', 'softplus', 'softsign', 'relu', 
#                   'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
activation_func = ['softmax', 'relu', 
                   'tanh', 'sigmoid', 'linear']
#param_grid = dict(optimizer=optimizer,activation_func = activation_func)
param_grid = dict(activation_func = activation_func)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(train_x_cust, train_y_cust)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

KeyboardInterrupt: 

#FORECAST BIAS
forecast_errors = [expected[i]-predictions[i] for i in range(len(expected))]
bias = sum(forecast_errors) * 1.0/len(expected)
print('Bias: %f' % bias)

In [12]:
# write predictions into csv
result_prediction = pd.DataFrame()#, original_fv : test_y_cust, prediction_fv :testPredict})
#result_prediction['dates'] = test_x['dates']
result_prediction['original_fv'] = test_y_cust
result_prediction['prediction_fv'] = testPredict

result_prediction.to_csv("predictions_for_client_1015193.csv")

In [None]:
# weekly average for 3 months
weekly_avg_pred = [(sum(pred_list[x:x+7]))/7 for x in range(0, len(pred_list), 7)]
weekly_avg_pred

https://stats.stackexchange.com/questions/305863/how-to-train-lstm-model-on-multiple-time-series-data

https://stats.stackexchange.com/questions/67362/shall-i-use-weekly-or-monthly-data-for-forecast

In [None]:
#weekly predicitons
#weekly data
week_data = df_1015130.set_index('dates').resample('1W').mean()
#week_data['dates'] = pd.to_datetime(week_data['dates'], errors='coerce')

week_data = week_data.resample('1W')


In [None]:
#week_data['dates'] = pd.to_datetime(week_data['dates'], unit='D',utc=True)
week_data = df_1015130
##week_data['New']=week_data.dates.map(week_data.set_index('dates').iloc[1:].resample('D').sum().rolling(7,min_periods =1).visit.mean()).shift()
week_data['dates'] = pd.to_datetime(week_data['dates'])


In [None]:
week_data.set_index('dates',inplace=True)
week_data_resample = week_data.resample('1W').mean()

In [None]:
week_data_resample.describe()

In [None]:
#c_index = week_data_resample.index("/2018")
#l2 = l[:c_index]
week_data_resample['dates'] = week_data_resample.index
week_data_resample['dates']=week_data_resample['dates'].astype(str)
week_data_resample = week_data_resample.fillna(0)

In [None]:
train_week = week_data_resample[week_data_resample['dates'].str.contains('2018') == False]
test_week = week_data_resample[week_data_resample['dates'].str.contains('2018') == True]

train_x_week,train_y_week, scalar_train = prepare_training_data(train_week['fv_cost'], 3)
test_x_week,test_y_week,scalar_test = prepare_training_data(test_week['fv_cost'], 3)
print(train_x_week.shape)
print(train_y_week.shape)


In [None]:
_12_weeks_ahead,all_pred = timestep_ahead_week(12,model,futureElement,x)
print ("scaled prediction value on 3rd month ahead ",_12_weeks_ahead)
print ("all predicitons ",all_pred)

In [None]:
# converting list of arrays to lists
#l = [arr.tolist() for arr in l]
#[l.tolist() for l in list1]

#weekly_pred = [arr.tolist() for arr in all_pred] #list object has no attribute tolist
weekly_pred =[np.array(arr).tolist() for arr in all_pred]
weekly_pred