# Dependencies

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

# Import from projects

In [2]:
from propensity_prediction.tasks.converting_action_prediction.converting_action_prediction import NextAction_InSession_Task

# Experiments

### Load data

In [3]:
# !pip install kaggle
# !mkdir -p ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

In [4]:
# !kaggle datasets download mkechinov/ecommerce-events-history-in-cosmetics-shop -f 2019-Dec.csv

# import zipfile
# zip_ref = zipfile.ZipFile('426888%2F1015349%2Fcompressed%2F2019-Dec.csv.zip', 'r')
# zip_ref.extractall()
# zip_ref.close()
# !rm 426888%2F1015349%2Fcompressed%2F2019-Dec.csv.zip

In [5]:
df = pd.read_csv('./2019-Dec.csv', dtype = str, nrows=10000)
df['price'] = pd.to_numeric(df['price'])
df = df[df['price']>= 0]
df

Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session
0,2019-12-01 00:00:00 UTC,remove_from_cart,5712790,1487580005268456287,,f.o.x,6.27,576802932,51d85cb0-897f-48d2-918b-ad63965c12dc
1,2019-12-01 00:00:00 UTC,view,5764655,1487580005411062629,,cnd,29.05,412120092,8adff31e-2051-4894-9758-224bfa8aec18
2,2019-12-01 00:00:02 UTC,cart,4958,1487580009471148064,,runail,1.19,494077766,c99a50e8-2fac-4c4d-89ec-41c05f114554
3,2019-12-01 00:00:05 UTC,view,5848413,1487580007675986893,,freedecor,0.79,348405118,722ffea5-73c0-4924-8e8f-371ff8031af4
4,2019-12-01 00:00:07 UTC,view,5824148,1487580005511725929,,,5.56,576005683,28172809-7e4a-45ce-bab0-5efa90117cd5
...,...,...,...,...,...,...,...,...,...
9995,2019-12-01 05:57:58 UTC,cart,5695958,1487580007575323592,,,1.43,580067784,b2327b07-825d-456d-a229-18c5226277a9
9996,2019-12-01 05:58:00 UTC,cart,5809826,1487580009286598681,,,0.35,445669783,2af5e540-c558-4c39-8c18-dc56e614765f
9997,2019-12-01 05:58:00 UTC,cart,5886483,1487580007432717250,,runail,2.38,429084578,51bbe2ed-800f-4e08-8ef7-cb82aad3e677
9998,2019-12-01 05:58:01 UTC,view,5807067,1487580012574933146,,masura,6.02,429518973,aa940cf9-b0d9-4a64-b490-65c084a0f116


### Split data

In [6]:
data_train, data_test = train_test_split(df, test_size = 0.2, random_state = 0)

### Config

In [7]:
INPUT_CONFIG = {
	'user_id':'user_id',
	'product_id': 'product_id',
	'user_session':'user_session',
	'event': 'event_type',
	'order_actions': [{'source': 'view', 'des':'cart'},{'source': 'cart', 'des':'remove_from_cart'},{'source': 'cart', 'des':'purchase'}],
	'other_features': []
}

FE_CONFIG = []
MODEL_CONFIG=['BinaryClasses', 'MultiClass']
PREDICT_CONFIG={'method':'gettop','ntop':None}
PIPELINE_CONFIG = {'task': 'converting_action_prediction', 'input_config':INPUT_CONFIG, 'fe_config':FE_CONFIG, 'model_config':MODEL_CONFIG, 'predict_config':PREDICT_CONFIG}

### Apply model

In [8]:
PIPELINE_CONFIG

{'task': 'converting_action_prediction',
 'input_config': {'user_id': 'user_id',
  'product_id': 'product_id',
  'user_session': 'user_session',
  'event': 'event_type',
  'order_actions': [{'source': 'view', 'des': 'cart'},
   {'source': 'cart', 'des': 'remove_from_cart'},
   {'source': 'cart', 'des': 'purchase'}],
  'other_features': []},
 'fe_config': [],
 'model_config': ['BinaryClasses', 'MultiClass'],
 'predict_config': {'method': 'gettop', 'ntop': None}}

In [9]:
model = NextAction_InSession_Task(PIPELINE_CONFIG['input_config'], PIPELINE_CONFIG['model_config'])

In [10]:
model.train(data_train)

Train ensemble NextAction_BinaryClass_Ensemble
Training NextAction_BinaryClass_Ensemble with label view2cart
Training NextAction_BinaryClass_Ensemble with label cart2remove_from_cart
Training NextAction_BinaryClass_Ensemble with label cart2purchase
Train ensemble NextAction_MultiClass_Ensemble
Training NextAction_MultiClass with label view2cart


[<propensity_prediction.tasks.converting_action_prediction.base.nextaction_model.NextAction_BinaryClasses at 0x7f284836ed10>,
 <propensity_prediction.tasks.converting_action_prediction.base.nextaction_model.NextAction_MultiClass at 0x7f284836ec50>]

In [11]:
# model.list_models[0].list_models[2].context.get_label_names()

# Other

In [12]:
model.predict(data_test)

{'predict': [{'model_name': 'NextAction_BinaryClass_Ensemble',
   'predict': [{'model_name': 'NextAction_BinaryClass_Ensemble',
     'label': 'view2cart',
     'predict': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [13]:
model.get_probabilities(data_test)

{'probabilities': [{'model_name': 'NextAction_BinaryClass_Ensemble',
   'probabilities': [{'model_name': 'NextAction_BinaryClass_Ensemble',
     'label': 'view2cart',
     'probabilities': array([[0.97532604, 0.02467396],
            [0.97532604, 0.02467396],
            [0.97308294, 0.02691706],
            ...,
            [0.97532604, 0.02467396],
            [0.97532604, 0.02467396],
            [0.97532604, 0.02467396]])},
    {'model_name': 'NextAction_BinaryClass_Ensemble',
     'label': 'cart2remove_from_cart',
     'probabilities': array([[0.97532604, 0.02467396],
            [0.97532604, 0.02467396],
            [0.97308294, 0.02691706],
            ...,
            [0.97532604, 0.02467396],
            [0.97532604, 0.02467396],
            [0.97532604, 0.02467396]])},
    {'model_name': 'NextAction_BinaryClass_Ensemble',
     'label': 'cart2purchase',
     'probabilities': array([[0.97532604, 0.02467396],
            [0.97532604, 0.02467396],
            [0.97308294, 0.02691

In [14]:
model.evaluate(data_test)

  mean1 = np.cumsum(hist * bin_centers) / weight1
  crit = np.log(((P1_sq[:-1] * P2_sq[1:]) ** -1) *
  (P1[:-1] * (1.0 - P1[:-1])) ** 2)
  recall = 1.0*n_tp/(n_tp+n_fn)
  mean1 = np.cumsum(hist * bin_centers) / weight1
  crit = np.log(((P1_sq[:-1] * P2_sq[1:]) ** -1) *
  (P1[:-1] * (1.0 - P1[:-1])) ** 2)
  recall = 1.0*n_tp/(n_tp+n_fn)
  mean1 = np.cumsum(hist * bin_centers) / weight1
  crit = np.log(((P1_sq[:-1] * P2_sq[1:]) ** -1) *
  (P1[:-1] * (1.0 - P1[:-1])) ** 2)
  recall = 1.0*n_tp/(n_tp+n_fn)
  recall = 1.0*n_tp/(n_tp+n_fn)
  recall = 1.0*n_tp/(n_tp+n_fn)
  recall = 1.0*n_tp/(n_tp+n_fn)


{'evaluation': [{'model_name': 'NextAction_BinaryClass_Ensemble',
   'evaluation': [{'model_name': 'NextAction_BinaryClass_Ensemble',
     'label': 'view2cart',
     'evaluation': {'model_performance': {'auc': 0.9996002132196162,
       'predicting': [{'method': 'threshold',
         'results': [{'threshold_method': 'constant',
           'results': {'accuracy': 0.99822695035461,
            'precision': 1.0,
            'recall': 0.9655172413793104,
            'trueneg_rate': 0.9981343283582089,
            'f1_score': 0.9824561403508771}},
          {'threshold_method': 'baseline',
           'results': {'accuracy': 0.9893617021276596,
            'precision': 0.7857142857142857,
            'recall': 1.0,
            'trueneg_rate': 1.0,
            'f1_score': 0.88}},
          {'threshold_method': 'kmeans',
           'results': {'accuracy': 0.99822695035461,
            'precision': 1.0,
            'recall': 0.9655172413793104,
            'trueneg_rate': 0.9981343283582089,
  