In [276]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
display(HTML('<style>.container { width:90% !important; }</style>'))
import random
import warnings
import datetime
import plotly.graph_objects as go

from tensorflow.keras.layers import Dense, Input, Conv1D, MaxPooling1D, Flatten, Dropout, GlobalMaxPooling1D, LSTM, BatchNormalization
from keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.initializers import Ones

### data access

In [2]:
weekly_income = pd.read_csv("transactions_per_users_daily.csv")
feuture_orders = pd.read_csv("frequency_prediction.csv")
weekly_income.head()

Unnamed: 0,user_id,transaction_value,transaction_count,days
0,user_356,5.956667,3,2018-01-01
1,user_375,5.333333,3,2018-01-02
2,user_46,16.17,3,2018-01-02
3,user_472,4.766,5,2018-01-02
4,user_657,18.666667,3,2018-01-02


### data manipulations

In [3]:
weekly_income['days'] = weekly_income['days'].apply(lambda x: datetime.datetime.strptime(str(x)[0:10], '%Y-%m-%d'))
weekly_income['weeks'] = weekly_income['days'].apply(lambda x: x + datetime.timedelta(days=-x.isoweekday() + 1))
weekly_income = weekly_income.groupby(["user_id", "weeks"]).agg({"transaction_value": "sum"}).reset_index()

In [4]:
weekly_income = weekly_income.sort_values(by=["user_id", "weeks"], ascending=True)

In [5]:
weekly_income['order_seq_num'] = weekly_income.sort_values(by=["user_id", "weeks"]).groupby(['user_id']).cumcount()+1
weekly_income = weekly_income.sort_values(by=["user_id", "weeks"])
weekly_income

Unnamed: 0,user_id,weeks,transaction_value,order_seq_num
0,user_0,2018-01-15,46.490714,1
1,user_0,2018-01-29,173.471000,2
2,user_0,2018-02-19,114.085000,3
3,user_0,2018-02-26,153.651333,4
4,user_0,2018-03-05,10.417778,5
...,...,...,...,...
315294,user_9999,2019-03-18,71.486667,15
315295,user_9999,2019-03-25,21.915000,16
315296,user_9999,2019-04-08,0.090000,17
315297,user_9999,2019-04-15,17.023333,18


#### * Number of previous order to check
   - For this case, we assume it is better for to use last 30 order for each user.

In [6]:
weekly_income = pd.merge(weekly_income, 
                          weekly_income.groupby('user_id'
                                            )['order_seq_num'].max().reset_index().rename(columns={"order_seq_num": "max_order"}), 
                          on='user_id', how='left')
weekly_income['prev_orders'] = weekly_income['max_order'] - 30
weekly_income

Unnamed: 0,user_id,weeks,transaction_value,order_seq_num,max_order,prev_orders
0,user_0,2018-01-15,46.490714,1,46,16
1,user_0,2018-01-29,173.471000,2,46,16
2,user_0,2018-02-19,114.085000,3,46,16
3,user_0,2018-02-26,153.651333,4,46,16
4,user_0,2018-03-05,10.417778,5,46,16
...,...,...,...,...,...,...
315294,user_9999,2019-03-18,71.486667,15,19,-11
315295,user_9999,2019-03-25,21.915000,16,19,-11
315296,user_9999,2019-04-08,0.090000,17,19,-11
315297,user_9999,2019-04-15,17.023333,18,19,-11


In [7]:
weekly_income = weekly_income.query("order_seq_num > prev_orders")

In [8]:
weekly_income['order_seq_num'] = weekly_income.sort_values(by=["user_id", "weeks"]).groupby(['user_id']).cumcount()+1
weekly_income['order_seq_num'] = weekly_income.apply(
    lambda row: row['order_seq_num'] + abs(row['prev_orders']) if row['prev_orders'] != 0 else row['order_seq_num'], axis=1)
weekly_income

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,user_id,weeks,transaction_value,order_seq_num,max_order,prev_orders
16,user_0,2018-07-09,13.886667,17,46,16
17,user_0,2018-07-16,164.162667,18,46,16
18,user_0,2018-07-23,27.870000,19,46,16
19,user_0,2018-07-30,238.601000,20,46,16
20,user_0,2018-08-06,36.420000,21,46,16
...,...,...,...,...,...,...
315294,user_9999,2019-03-18,71.486667,26,19,-11
315295,user_9999,2019-03-25,21.915000,27,19,-11
315296,user_9999,2019-04-08,0.090000,28,19,-11
315297,user_9999,2019-04-15,17.023333,29,19,-11


### Min - Max Normalization For Model Training

In [9]:
def min_max_norm(value, _min, _max):
    if abs(_max - _min) != 0:
        return (value - _min) / abs(_max - _min)
    else: return 0

In [10]:
weekly_income['user_max'], weekly_income['user_min'] = weekly_income['transaction_value'], weekly_income['transaction_value']
users_min_max = weekly_income.groupby("user_id").agg({"user_max": "max", "user_min": "min"}).reset_index()
display(users_min_max.head())
weekly_income = pd.merge(weekly_income.drop(["user_max", "user_min"], axis=1), users_min_max, on='user_id', how='left')
weekly_income.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,user_id,user_max,user_min
0,user_0,334.010004,1.59
1,user_1,475.0,0.8
2,user_10,1348.580833,15.897
3,user_100,1537.9,0.55
4,user_1000,1950.46,10.0


Unnamed: 0,user_id,weeks,transaction_value,order_seq_num,max_order,prev_orders,user_max,user_min
0,user_0,2018-07-09,13.886667,17,46,16,334.010004,1.59
1,user_0,2018-07-16,164.162667,18,46,16,334.010004,1.59
2,user_0,2018-07-23,27.87,19,46,16,334.010004,1.59
3,user_0,2018-07-30,238.601,20,46,16,334.010004,1.59
4,user_0,2018-08-06,36.42,21,46,16,334.010004,1.59


In [11]:
weekly_income['transaction_value_norm'] = weekly_income.apply(lambda row: min_max_norm(row['transaction_value'],   
                                                                                         row['user_min'], 
                                                                                         row['user_max']), axis=1)

In [12]:
max_order = max(weekly_income['max_order'])
max_order

71

### Features Set Aggrigations

In [310]:
weekly_income

Unnamed: 0,user_id,weeks,transaction_value,order_seq_num,max_order,prev_orders,user_max,user_min,transaction_value_norm
0,user_0,2018-07-09,13.886667,42,71,16,334.010004,1.59,0.036991
1,user_0,2018-07-16,164.162667,43,71,16,334.010004,1.59,0.489058
2,user_0,2018-07-23,27.870000,44,71,16,334.010004,1.59,0.079057
3,user_0,2018-07-30,238.601000,45,71,16,334.010004,1.59,0.712987
4,user_0,2018-08-06,36.420000,46,71,16,334.010004,1.59,0.104777
...,...,...,...,...,...,...,...,...,...
277874,user_9999,2019-03-18,71.486667,67,71,-11,71.486667,0.09,1.000000
277875,user_9999,2019-03-25,21.915000,68,71,-11,71.486667,0.09,0.305687
277876,user_9999,2019-04-08,0.090000,69,71,-11,71.486667,0.09,0.000000
277877,user_9999,2019-04-15,17.023333,70,71,-11,71.486667,0.09,0.237173


In [13]:
weekly_income['order_seq_num'] = weekly_income.sort_values(by=["user_id", "weeks"], ascending=False).groupby(['user_id']).cumcount()+0
weekly_income['max_order'] = max_order
weekly_income['order_seq_num'] = weekly_income['max_order'] - weekly_income['order_seq_num']
weekly_income.query("user_id == 'user_0'")

Unnamed: 0,user_id,weeks,transaction_value,order_seq_num,max_order,prev_orders,user_max,user_min,transaction_value_norm
0,user_0,2018-07-09,13.886667,42,71,16,334.010004,1.59,0.036991
1,user_0,2018-07-16,164.162667,43,71,16,334.010004,1.59,0.489058
2,user_0,2018-07-23,27.87,44,71,16,334.010004,1.59,0.079057
3,user_0,2018-07-30,238.601,45,71,16,334.010004,1.59,0.712987
4,user_0,2018-08-06,36.42,46,71,16,334.010004,1.59,0.104777
5,user_0,2018-09-03,73.7675,47,71,16,334.010004,1.59,0.217127
6,user_0,2018-10-08,63.997,48,71,16,334.010004,1.59,0.187735
7,user_0,2018-10-15,50.631667,49,71,16,334.010004,1.59,0.147529
8,user_0,2018-10-22,30.24,50,71,16,334.010004,1.59,0.086186
9,user_0,2018-10-29,13.3,51,71,16,334.010004,1.59,0.035227


In [None]:
monthly_income_per_user = pd.DataFrame(np.array(weekly_income.pivot_table(index="user_id", 
                                                                          columns="order_seq_num", 
                                                                          aggfunc={"transaction_value_norm": "first"}).reset_index()))
monthly_income_per_user = monthly_income_per_user.fillna(0)
monthly_income_per_user.head()

### CNN Model implementation

In [439]:
## tunned paramters:
params = {
    'activation': 'relu',
    'batch_size': 32,
    'epochs': 40,
    'l1': 0.0001,
    'l2': 0.0001,
    'lr': 0.001,
    'split_ratio': 0.8,
    'filters': 2,
    'kernel_size': 4,
    'max_pooling_unit': 2,
    'lstm_units': 32,
    'units': 8,
    'loss': 'mae',
    'drop_out_ratio': 0.1
}

### Train - Test Split

In [345]:
index = range(len(monthly_income_per_user))
train_index = random.sample(index, int(len(monthly_income_per_user) * params['split_ratio']))
test_index = list(set(index) - set(train_index))

In [346]:
len(monthly_income_per_user[list(monthly_income_per_user.columns)[1:-1]].iloc[train_index])

16889

In [347]:
train = monthly_income_per_user[list(monthly_income_per_user.columns)[1:-1]].iloc[train_index]
test = monthly_income_per_user[list(monthly_income_per_user.columns)[1:-1]].iloc[test_index]

In [348]:
train_x = train.values.reshape(len(train_index), 29, 1)
test_x = test.values.reshape(len(test_index), 29, 1)

In [349]:
train_x.shape[1], train_x.shape[2]

(29, 1)

### Train Model

In [453]:
_input = Input(shape=(train_x.shape[1], 1, ))
conv = Conv1D(filters=params['filters'], 
              kernel_size=params['kernel_size'], 
              padding='same', activation=params['activation'],
              # kernel_regularizer=l1_l2(l1=params['l1'], l2=params['l2']),
              # bias_regularizer=l2(params['l2']),
              # activity_regularizer=l2(params['l2'])
             )(_input)
conv = BatchNormalization()(conv)
conv = MaxPooling1D(params['max_pooling_unit'])(conv)
conv = Dropout(params['drop_out_ratio'])(conv)
conv = LSTM(params['lstm_units'], 
            use_bias=False, 
            activation=params['activation'],
            # kernel_regularizer=l1_l2(l1=params['l1'], l2=params['l2']),
            # bias_regularizer=l2(params['l2']),
            # activity_regularizer=l2(params['l2'])
           )(conv)
conv = BatchNormalization()(conv)
conv = Flatten()(conv)
conv = Dense(params['units'], activation=params['activation'],
             # kernel_regularizer=l1_l2(l1=params['l1'], l2=params['l2']),
             # bias_regularizer=l2(params['l2']),
             # activity_regularizer=l2(params['l2'])
            )(conv) 
conv = BatchNormalization()(conv)
output = Dense(1, activation=params['activation'], 
              kernel_regularizer=l1_l2(l1=params['l1'], l2=params['l2']),
              bias_regularizer=l2(params['l2']),
              activity_regularizer=l2(params['l2'])
              )(conv) 
model = Model(inputs=_input, outputs=output)
model.compile(loss=params['loss'], 
              optimizer=Adam(lr=params['lr']), 
              metrics=[params['loss']])
model.summary()

Model: "model_73"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_81 (InputLayer)        [(None, 29, 1)]           0         
_________________________________________________________________
conv1d_99 (Conv1D)           (None, 29, 2)             10        
_________________________________________________________________
batch_normalization_108 (Bat (None, 29, 2)             8         
_________________________________________________________________
max_pooling1d_98 (MaxPooling (None, 14, 2)             0         
_________________________________________________________________
dropout_98 (Dropout)         (None, 14, 2)             0         
_________________________________________________________________
lstm_79 (LSTM)               (None, 32)                4352      
_________________________________________________________________
batch_normalization_109 (Bat (None, 32)                128

In [454]:
train_x.shape

(16889, 29, 1)

In [455]:
history = model.fit(train_x,
                    monthly_income_per_user[list(monthly_income_per_user.columns)[-1]].iloc[train_index].values, 
                    batch_size=params['batch_size'],
                    epochs=params['epochs'],
                    verbose=1,
                    validation_split=1-params['split_ratio'],
                    shuffle=True)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


### Train And Validation Set MAE Epoch Loss 

In [456]:
_epochs = list(range(1, params['epochs'] + 1))
fig = go.Figure()
fig.add_trace(go.Scatter(x= _epochs, y= model.history.history['loss'],
                         mode='lines+markers',
                         name='Train'))
fig.add_trace(go.Scatter(x= _epochs, y= model.history.history['val_loss'],
                         mode='lines+markers',
                         name='Validation'))
fig.show()

- Validation and train sets of lost values are almost same for each epoch. This shows we are going correct way to calculate optimum weigh function with minimum Bais and Variance Trade off.

### Test Model With Test Data Set - Check the residuals of the Distribution (MAE)

In [457]:
prediction = [i[0] for i in model.predict(monthly_income_per_user[list(range(2, 31))].values.reshape(len(monthly_income_per_user), 29, 1)).tolist()]

In [458]:
residuals =  pd.DataFrame(zip(list(monthly_income_per_user['user_id']), 
                              list(monthly_income_per_user[30]), 
                                   prediction)).rename(columns={0: "user_id", 1: "actual", 2: "prediction"})
display(residuals.head())
residuals['residuals'] = residuals.apply(lambda row: abs(row['actual'] - row['prediction']), axis=1) 
import plotly.express as px
fig = px.histogram(residuals, x="residuals")
fig.show()

Unnamed: 0,user_id,actual,prediction
0,user_0,0.677449,0.164893
1,user_1,0.021826,0.111685
2,user_10,0.064432,0.08539
3,user_100,0.077222,0.060666
4,user_1000,0.0,0.240479


- Model still need to continue for training. 
- Needs to continue model improvements as we see on there are a significant number of users of test values have very high residuals which are almost 1.

## Prediction
 - First, let`s see the users total number of orders that we have predicted for next month.

In [459]:
feuture_orders['order_seq_num'] = feuture_orders.sort_values(by=["user_id", "created_date"]).groupby(['user_id']).cumcount()+1
num_of_future_orders = feuture_orders.groupby("user_id").agg({"order_seq_num": "max"}).reset_index()
num_of_future_orders.sort_values(by='order_seq_num')

Unnamed: 0,user_id,order_seq_num
1476,user_9994,1
591,user_1636,1
592,user_16369,1
1172,user_6196,1
1171,user_6168,1
...,...,...
471,user_14948,27
861,user_2574,30
731,user_18189,32
787,user_19089,49


In [460]:
monthly_income_per_user = monthly_income_per_user.rename(columns={0: "user_id"})

In [461]:
prediction_data

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,user_id
0,0.036991,0.489058,0.079057,0.712987,0.104777,0.217127,0.187735,0.147529,0.086186,0.035227,...,0.012574,0.319365,0.014364,0.006979,0.000000,0.344446,1.000000,0.136214,0.677449,user_0
1,0.000000,0.000000,0.040489,0.239213,0.105283,0.703729,0.265429,0.484029,0.208140,0.026550,...,0.094264,0.019401,0.029523,0.187453,0.000000,1.000000,0.072121,0.480223,0.021826,user_1
2,0.048114,0.080794,0.217862,0.601820,0.000000,0.034936,1.000000,0.095036,0.066250,0.187015,...,0.021275,0.257200,0.403631,0.119683,0.022831,0.005315,0.138356,0.084608,0.064432,user_10
3,0.264826,0.045426,0.005009,0.136179,0.688249,0.014252,0.000000,0.356258,1.000000,0.422344,...,0.357498,0.025642,0.168836,0.186765,0.006628,0.052636,0.086610,0.053512,0.077222,user_100
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.037964,0.818332,0.499443,0.502953,0.502515,1.000000,0.502001,0.503468,0.000000,user_1000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18761,0.206282,0.584965,0.435876,0.040057,0.128427,0.288363,0.487666,0.200875,0.034228,0.391209,...,0.110976,0.000000,1.000000,0.000000,0.412083,0.304416,0.081222,0.081638,0.190102,user_9995
18762,1.000000,0.279213,0.060469,0.001611,0.174307,0.007942,0.004247,0.182322,0.161888,0.100126,...,0.302229,0.240750,0.034221,0.161888,0.151874,0.206219,0.016223,0.134840,0.000000,user_9996
18763,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.354517,0.666835,0.716888,0.096778,0.000000,0.032344,0.564624,0.376425,0.913834,user_9997
18764,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.717770,0.000000,user_9998


In [462]:
model_num = model.input.shape[1]
users = list(monthly_income_per_user['user_id'])
columns = list(range(1,31)) 
prediction_data = monthly_income_per_user[columns + ["user_id"]]

In [463]:
print("number of features :", model_num)
print("number of users :", len(users))
print("data set columns :", columns)
print("prediction data :")
display(prediction_data.head())

number of features : 29
number of users : 18766
data set columns : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
prediction data :


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,user_id
0,0.036991,0.489058,0.079057,0.712987,0.104777,0.217127,0.187735,0.147529,0.086186,0.035227,...,0.012574,0.319365,0.014364,0.006979,0.0,0.344446,1.0,0.136214,0.677449,user_0
1,0.0,0.0,0.040489,0.239213,0.105283,0.703729,0.265429,0.484029,0.20814,0.02655,...,0.094264,0.019401,0.029523,0.187453,0.0,1.0,0.072121,0.480223,0.021826,user_1
2,0.048114,0.080794,0.217862,0.60182,0.0,0.034936,1.0,0.095036,0.06625,0.187015,...,0.021275,0.2572,0.403631,0.119683,0.022831,0.005315,0.138356,0.084608,0.064432,user_10
3,0.264826,0.045426,0.005009,0.136179,0.688249,0.014252,0.0,0.356258,1.0,0.422344,...,0.357498,0.025642,0.168836,0.186765,0.006628,0.052636,0.08661,0.053512,0.077222,user_100
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.037964,0.818332,0.499443,0.502953,0.502515,1.0,0.502001,0.503468,0.0,user_1000


In [470]:
def check_for_next_prediction(data, model_num):
    columns = sorted([int(col) for col in data.columns], reverse=False)[-model_num:]
    data_for_pred = pd.DataFrame([list(data[col])[0] for col in columns])
    data_for_pred = data_for_pred.values.reshape(1, model_num, 1)
    return data_for_pred


def add_predicted_values_as_column(data, pred):
    max_num = max(list(data.columns))
    data[max_num+1] = pred
    return data
    
    
def get_prediction(data, number, model_num, model):
    for num in range(0, number+1):
        _pred_data = check_for_next_prediction(data, model_num)
        _pred = model.predict(_pred_data)[0]
        data = add_predicted_values_as_column(data, _pred)
    return data


def get_predicted_data_readable_form(user, prediction, removing_columns, norm_data):
    removing_cols = list(range(removing_columns+1))
    predictions = [{"user_id": user, 
                    "user_min": list(norm_data['user_min'])[0], 
                    "user_max": list(norm_data['user_max'])[0], 
                    "pred_order_seq": col - removing_columns, 
                    "prediction": list(prediction[col])[0]} for col in prediction.columns if col not in removing_cols]
    predictions = pd.DataFrame(predictions)
    predictions['prediction_values'] = predictions.apply(lambda row: ((row['user_max'] - row['user_min']) * row['prediction']) + row['user_min'], axis=1)
    return predictions
    
    

### Test How it predicts
  - On each iteration, users of next orders of purchase amounts are predicted by the model

In [471]:
prediction = pd.DataFrame()
for u in num_of_future_orders.to_dict('results')[0:10]:
    _number, _user = u['order_seq_num'], u['user_id']
    _prediction_data = prediction_data.query("user_id == @_user").drop('user_id', axis=1)
    _prediction = get_prediction(_prediction_data, _number, model_num, model)
    display(_prediction.head())
    prediction = get_predicted_data_readable_form(_user, _prediction, model_num + 1, users_min_max.query("user_id == @_user"))
    display(prediction.head())

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,25,26,27,28,29,30,31,32,33,34
16,0.0,0.0,0.0,0.0,0.0,0.003171,0.928917,0.175571,0.02232,0.123904,...,0.324057,0.089246,0.02407,1.0,0.087998,0.0,0.19976,0.148823,0.152188,0.129857


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10011,3.5,580.559333,1,0.19976,118.773143
1,user_10011,3.5,580.559333,2,0.148823,89.379422
2,user_10011,3.5,580.559333,3,0.152188,91.321267
3,user_10011,3.5,580.559333,4,0.129857,78.435196


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,28,29,30,31,32,33,34,35,36,37
27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008499,...,0.015897,0.233193,0.324602,0.09502,0.075401,0.086735,0.048797,0.089221,0.046926,0.065493


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10021,0.54,835.5,1,0.09502,79.877628
1,user_10021,0.54,835.5,2,0.075401,63.496459
2,user_10021,0.54,835.5,3,0.086735,72.960158
3,user_10021,0.54,835.5,4,0.048797,41.283708
4,user_10021,0.54,835.5,5,0.089221,75.036023


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,24,25,26,27,28,29,30,31,32,33
32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.507861,0.669825,0.210526,0.123206,0.785144,0.200957,0.086124,0.151582,0.184734,0.133455


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10026,4.0,213.0,1,0.151582,35.680679
1,user_10026,4.0,213.0,2,0.184734,42.609438
2,user_10026,4.0,213.0,3,0.133455,31.892022


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,23,24,25,26,27,28,29,30,31,32
36,0.209155,0.178885,0.107331,0.094307,0.110342,0.080162,0.094307,0.393424,0.079026,0.360241,...,0.253218,0.361879,0.188615,0.079541,1.0,0.0,0.029205,0.039685,0.122098,0.158718


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_1003,0.0,1804.845,1,0.122098,220.368285
1,user_1003,0.0,1804.845,2,0.158718,286.461774


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,26,27,28,29,30,31,32,33,34,35
39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006261,0.012778,...,0.053995,0.024966,0.009775,0.517167,0.152083,0.043208,0.022751,0.045786,0.043081,0.058418


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10032,3.83,1702.12,1,0.043208,77.209403
1,user_10032,3.83,1702.12,2,0.022751,42.467533
2,user_10032,3.83,1702.12,3,0.045786,81.587351
3,user_10032,3.83,1702.12,4,0.043081,76.993716
4,user_10032,3.83,1702.12,5,0.058418,103.04012


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,26,27,28,29,30,31,32,33,34,35
41,0.0,0.052515,0.26122,0.117115,0.170708,0.466308,0.0,1.0,0.045974,0.008354,...,0.05333,0.033487,0.059495,0.027965,0.033968,0.010572,0.0,0.0,0.0,0.0


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10034,0.99,191.315,1,0.010572,3.00216
1,user_10034,0.99,191.315,2,0.0,0.99
2,user_10034,0.99,191.315,3,0.0,0.99
3,user_10034,0.99,191.315,4,0.0,0.99
4,user_10034,0.99,191.315,5,0.0,0.99


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,23,24,25,26,27,28,29,30,31,32
52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.411565,0.807069,0.812421,1.0,0.358843,0.161551,0.532834,0.47747,0.255538,0.169486


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10044,5.2475,431.876667,1,0.255538,114.267598
1,user_10044,5.2475,431.876667,2,0.169486,77.555273


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,25,26,27,28,29,30,31,32,33,34
60,0.018338,0.424502,0.363226,0.003526,0.003526,0.227457,0.053955,0.141059,0.030426,0.183376,...,0.240336,0.858695,0.004676,0.021181,0.006171,0.238156,0.165366,0.128686,0.145685,0.11109


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10051,5.833333,478.45,1,0.165366,83.988171
1,user_10051,5.833333,478.45,2,0.128686,66.652395
2,user_10051,5.833333,478.45,3,0.145685,74.686381
3,user_10051,5.833333,478.45,4,0.11109,58.336282


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,23,24,25,26,27,28,29,30,31,32
64,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.395252,0.58966,0.088298,0.109906,0.094344,1.0,0.121508,0.399034,0.138262,0.153739


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10055,5.33,2493.143333,1,0.138262,349.299537
1,user_10055,5.33,2493.143333,2,0.153739,387.804763


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,24,25,26,27,28,29,30,31,32,33
66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.002767,9.4e-05,0.00447,0.02744,0.006098,0.015104,0.012768,0.021373,0.037788,0.015965


Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10057,0.73,1276.59,1,0.021373,27.998397
1,user_10057,0.73,1276.59,2,0.037788,48.941604
2,user_10057,0.73,1276.59,3,0.015965,21.099017


In [473]:
results = pd.DataFrame()
for u in num_of_future_orders.to_dict('results'):
    _number, _user = u['order_seq_num'], u['user_id']
    _prediction_data = prediction_data.query("user_id == @_user").drop('user_id', axis=1)
    _prediction = get_prediction(_prediction_data, _number, model_num, model)
    prediction = get_predicted_data_readable_form(_user, _prediction, model_num + 1, users_min_max.query("user_id == @_user"))
    results = pd.concat([results, prediction])


In [475]:
results.head()

Unnamed: 0,user_id,user_min,user_max,pred_order_seq,prediction,prediction_values
0,user_10011,3.5,580.559333,1,0.19976,118.773143
1,user_10011,3.5,580.559333,2,0.148823,89.379422
2,user_10011,3.5,580.559333,3,0.152188,91.321267
3,user_10011,3.5,580.559333,4,0.129857,78.435196
0,user_10021,0.54,835.5,1,0.09502,79.877628


In [476]:
sum(results['prediction_values'])

1014070871.3884847

In [474]:
results.to_csv("monetary_value_for_clv_calculation.csv", index=False)