<img src="https://github.com/elaynelemos/prediction-of-orders-dmc/blob/main/assets/img/univasf-logo.png?raw=1" height=100 width=100/>

<center>
<h3>
    UNIVERSIDADE FEDERAL DO VALE DO SÃO FRANCISCO
    <br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;COLEGIADO DE ENGENHARIA DE COMPUTAÇÃO
</h3>

<h3>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Orientador</h3>
<span>Prof. Dr. Rosalvo Ferreira de Oliveira Neto</span>

<h3>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Discentes</h3>
<span>Anísio Pereira Batista Filho
<br>Edjair Aguiar Gomes Filho
<br>Elayne Rute Lessa Lemos</span>
</center>
<br><br>

## Predição de pedidos com Redes Neurais e Random Forest

### Importação da base de dados

In [1]:
import numpy as np
import pandas as pd

In [2]:
transact_train_database = pd.read_csv('data/transact_train.txt', sep = '|')
transact_train_database.sample(3)

Unnamed: 0,sessionNo,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,customerNo,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder,order
412744,48189,17,7,3811.701,93,333.0,699.99,22883.65,5,399.99,499.0,2327.97,1,y,completely orderable,24081,2300,481,64,9,49,2,340,y
100860,12029,18,5,195.149,2,19.99,19.99,39.98,1,19.99,19.99,19.99,4,y,completely orderable,?,?,?,?,?,?,?,?,y
9558,1235,8,5,2465.82,63,9.99,69.99,1964.79,13,9.99,69.99,365.06,4,y,completely orderable,580,2600,483,221,30,38,2,61,y


In [3]:
transact_test_database = pd.read_csv('data/transact_class.txt', sep = '|')
transact_test_database.sample(3)

Unnamed: 0,sessionNo,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,customerNo,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder
44142,4927,3,1,125.177,2,349.99,349.99,699.98,1,349.99,349.99,349.99,?,?,?,27301,4000,499,135,8,73,2,14
38295,4105,23,7,57.421,2,399.99,399.99,799.98,1,399.99,399.99,399.99,2,y,completely orderable,?,?,?,?,?,?,?,?
34606,3679,22,7,426.969,5,37.99,64.99,252.95,1,49.99,49.99,49.99,1,y,completely orderable,26849,1900,477,47,17,44,1,36


<br>

### Alteração da granularidade da base de dados

In [4]:
# remove todas as linhas com valores de sessionNo iguais exceto a última
session_train_database = transact_train_database.drop_duplicates(subset=['sessionNo'], keep='last')

# separa variável alvo no conjunto de treinamento
session_train_X = session_train_database.iloc[:,:-1]
session_train_y = session_train_database.iloc[:,-1]
session_train_y = session_train_y.replace({'y': 1 , 'n': 0 })

In [5]:
session_test_X = transact_test_database.drop_duplicates(subset=['sessionNo'], keep='last')
session_test_y = session_test_X['sessionNo']

In [6]:
session_test_X.sample(3)

Unnamed: 0,sessionNo,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,customerNo,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder
32207,3396,22,7,5759.786,55,14.99,749.99,9660.93,1,149.99,149.99,149.99,2,y,completely orderable,?,?,?,?,?,?,?,?
1771,178,18,7,1968.652,28,59.0,75.95,311.95,1,59.0,59.0,59.0,5,y,completely orderable,25133,3700,492,223,20,55,2,5
5305,530,19,7,3650.573,99,12.95,39.99,1838.8,25,12.95,29.99,482.67,5,y,completely orderable,17080,5000,614,19,61,34,2,4


<br>

### Tratamento de valores ausentes

In [7]:
def replace_missing_value(df, value, features):
    replaced = df[features].replace(value, np.nan)
    for column in features:
        df[column] = replaced[column]

    return df

In [8]:
# convert_float() é baseadona solução proposta no Estudo de Caso
# do livro Ciência dos Dados pelo Processo de KDD do Prof. Dr. Rosalvo Neto
# livro em: https://www.researchgate.net/publication/352749819_Ciencia_dos_Dados_pelo_Processo_de_KDD
# implementação em: https://github.com/rosalvoneto/Livro

def convert_float(df, numeric_features):
    for column in numeric_features:
        df[column] = df[column].astype(float)

    return df

In [9]:
# replace_missing_by_mean() é baseadona solução proposta no Estudo de Caso
# do livro Ciência dos Dados pelo Processo de KDD do Prof. Dr. Rosalvo Neto
# livro em: https://www.researchgate.net/publication/352749819_Ciencia_dos_Dados_pelo_Processo_de_KDD
# implementação em: https://github.com/rosalvoneto/Livro

def replace_missing_by_mean(df, numeric_features):
    for column in numeric_features:
        average = df[column].mean(axis=0)
        df[column].fillna(average, inplace=True)
    
    return df

In [10]:
def replace_by_reference(df, reference_feature, reference_feature_value, features, replace_to):
    rows = df[reference_feature] == reference_feature_value
    for column in features:
        df.loc[rows, column] = replace_to
    
    return df

In [11]:
def replace_missing_by_fixed_value(df, value, features):
    for column in features:
        df[column].fillna(value, inplace=True)

    return df

In [12]:
numeric_features = ['cMinPrice', 'cMaxPrice', 'cSumPrice', 'bMinPrice', 'bMaxPrice',
    'bSumPrice', 'bStep','maxVal', 'customerScore', 'accountLifetime', 'payments', 
    'age', 'address', 'lastOrder']

string_features = ['availability', 'onlineStatus']

In [13]:
session_train_X = replace_missing_value(session_train_X.copy(), '?', numeric_features)
session_test_X = replace_missing_value(session_test_X.copy(), '?', numeric_features)
session_train_X = replace_missing_value(session_train_X.copy(), '?', string_features)
session_test_X = replace_missing_value(session_test_X.copy(), '?', string_features)

In [14]:
session_train_X = convert_float(session_train_X, numeric_features)
session_test_X = convert_float(session_test_X, numeric_features)

In [15]:
replace_missing_by_mean(session_train_X, numeric_features)
replace_missing_by_mean(session_test_X, numeric_features)

Unnamed: 0,sessionNo,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,customerNo,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder
8,1,18,7,624.606,11,16.99000,39.990000,207.91000,5,16.990000,39.990000,112.950000,3.118534,y,completely orderable,25039,1300.000000,489.000000,188.000000,5.000000,49.000000,1.000000,65.000000
19,2,18,7,2804.705,16,34.99000,34.990000,174.95000,2,34.990000,34.990000,69.980000,3.118534,y,completely orderable,25040,1200.000000,543.000000,43.000000,5.000000,29.000000,2.000000,184.000000
42,3,18,7,7401.384,119,7.99000,59.950000,3263.57000,12,12.490000,39.950000,346.560000,3.118534,y,completely orderable,25041,600.000000,552.000000,17.000000,4.000000,37.000000,2.000000,107.000000
48,4,18,7,2853.550,152,3.99000,239.990000,5642.50000,4,9.990000,14.990000,44.960000,3.118534,,,25042,8500.000000,535.000000,226.000000,19.000000,49.000000,2.000000,17.000000
50,5,18,7,48.145,2,29.99000,29.990000,59.98000,1,29.990000,29.990000,29.990000,2.000000,y,completely orderable,25043,600.000000,543.000000,39.000000,2.000000,53.000000,2.000000,234.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45037,5107,5,1,519.017,11,9.00000,39.990000,311.93000,1,39.990000,39.990000,39.990000,3.118534,,,?,1929.004329,476.621409,120.714286,9.496308,43.525107,1.712398,89.313253
45039,5108,5,1,18.429,1,6.99000,6.990000,6.99000,1,6.990000,6.990000,6.990000,3.118534,y,completely orderable,47,300.000000,537.000000,5.000000,0.000000,53.000000,1.000000,34.000000
45054,5109,5,1,777.655,0,72.88035,171.776058,1267.25505,0,89.261148,122.470157,208.196835,5.000000,y,completely orderable,?,1929.004329,476.621409,120.714286,9.496308,43.525107,1.712398,89.313253
45060,5110,5,1,101.074,1,49.99000,49.990000,49.99000,1,49.990000,49.990000,49.990000,5.000000,y,completely orderable,?,1929.004329,476.621409,120.714286,9.496308,43.525107,1.712398,89.313253


In [16]:
replace_missing_by_fixed_value(session_train_X, 'ausente', string_features)
replace_missing_by_fixed_value(session_test_X, 'ausente', string_features)

Unnamed: 0,sessionNo,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,customerNo,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder
8,1,18,7,624.606,11,16.99000,39.990000,207.91000,5,16.990000,39.990000,112.950000,3.118534,y,completely orderable,25039,1300.000000,489.000000,188.000000,5.000000,49.000000,1.000000,65.000000
19,2,18,7,2804.705,16,34.99000,34.990000,174.95000,2,34.990000,34.990000,69.980000,3.118534,y,completely orderable,25040,1200.000000,543.000000,43.000000,5.000000,29.000000,2.000000,184.000000
42,3,18,7,7401.384,119,7.99000,59.950000,3263.57000,12,12.490000,39.950000,346.560000,3.118534,y,completely orderable,25041,600.000000,552.000000,17.000000,4.000000,37.000000,2.000000,107.000000
48,4,18,7,2853.550,152,3.99000,239.990000,5642.50000,4,9.990000,14.990000,44.960000,3.118534,ausente,ausente,25042,8500.000000,535.000000,226.000000,19.000000,49.000000,2.000000,17.000000
50,5,18,7,48.145,2,29.99000,29.990000,59.98000,1,29.990000,29.990000,29.990000,2.000000,y,completely orderable,25043,600.000000,543.000000,39.000000,2.000000,53.000000,2.000000,234.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45037,5107,5,1,519.017,11,9.00000,39.990000,311.93000,1,39.990000,39.990000,39.990000,3.118534,ausente,ausente,?,1929.004329,476.621409,120.714286,9.496308,43.525107,1.712398,89.313253
45039,5108,5,1,18.429,1,6.99000,6.990000,6.99000,1,6.990000,6.990000,6.990000,3.118534,y,completely orderable,47,300.000000,537.000000,5.000000,0.000000,53.000000,1.000000,34.000000
45054,5109,5,1,777.655,0,72.88035,171.776058,1267.25505,0,89.261148,122.470157,208.196835,5.000000,y,completely orderable,?,1929.004329,476.621409,120.714286,9.496308,43.525107,1.712398,89.313253
45060,5110,5,1,101.074,1,49.99000,49.990000,49.99000,1,49.990000,49.990000,49.990000,5.000000,y,completely orderable,?,1929.004329,476.621409,120.714286,9.496308,43.525107,1.712398,89.313253


In [17]:
session_train_X = replace_by_reference(session_train_X, 'customerNo', '?', ['maxVal',
    'customerScore', 'accountLifetime', 'payments', 'age', 'address', 'lastOrder'], 0)
session_test_X = replace_by_reference(session_test_X, 'customerNo', '?', ['maxVal',
    'customerScore', 'accountLifetime', 'payments', 'age', 'address', 'lastOrder'], 0)

In [18]:
session_train_X.sample(3)

Unnamed: 0,sessionNo,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,customerNo,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder
93361,11175,17,5,5859.862,134,2.99,99.99,1010.63,21,3.0,12.99,145.77,5.0,y,completely orderable,5531,4000.0,557.0,96.0,9.0,42.0,2.0,154.0
375985,44232,14,7,918.174,22,19.99,59.99,654.78,3,24.99,39.99,94.97,3.158804,y,completely orderable,21934,2100.0,523.0,160.0,4.0,36.0,2.0,139.0
71612,8612,15,5,220.032,6,9.99,29.99,129.94,6,9.99,29.99,129.94,3.158804,y,completely orderable,4247,4000.0,494.0,62.0,5.0,50.0,2.0,46.0


In [19]:
transact_train_database = replace_missing_value(transact_train_database.copy(), '?', numeric_features)
transact_train_database = replace_missing_value(transact_train_database.copy(), '?', string_features)
transact_test_database = replace_missing_value(transact_test_database.copy(), '?', numeric_features)
transact_test_database = replace_missing_value(transact_test_database.copy(), '?', string_features)

transact_train_database = convert_float(transact_train_database, numeric_features)
transact_test_database = convert_float(transact_test_database, numeric_features)

In [20]:
transact_train_database.sample(3)

Unnamed: 0,sessionNo,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,customerNo,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder,order
82552,9923,16,5,128.76,6,19.99,19.99,119.94,2,19.99,19.99,39.98,,y,completely orderable,4886,4000.0,535.0,147.0,6.0,84.0,2.0,26.0,y
18909,2329,9,5,46.977,5,74.99,74.99,374.95,2,74.99,74.99,149.98,,,,1163,1400.0,571.0,74.0,7.0,35.0,2.0,18.0,y
222704,26426,15,6,7310.892,9,7.99,39.99,120.94,2,7.99,15.0,22.99,1.0,y,completely orderable,4953,15000.0,444.0,117.0,71.0,50.0,2.0,19.0,y


In [21]:
# torna o sessionNo o índice da base
session_train_X.set_index('sessionNo', inplace=True)
session_test_X.set_index('sessionNo', inplace=True)

In [22]:
# remove variável inútil
session_train_X = session_train_X.drop(['customerNo'], axis=1)
session_test_X = session_test_X.drop(['customerNo'], axis=1)

In [23]:
session_train_X.sample(3)

Unnamed: 0_level_0,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder
sessionNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
30344,18,6,338.943,3,139.99,139.99,279.98,1,139.99,139.99,139.99,2.0,y,completely orderable,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32653,20,6,2260.691,41,4.99,59.99,737.41,6,12.99,59.99,150.94,3.158804,y,completely orderable,2100.0,512.0,185.0,9.0,48.0,2.0,590.0
2158,9,5,10697.044,29,14.99,79.99,1079.71,6,14.99,79.99,174.94,5.0,y,completely orderable,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<br>

### Criação de variáveis

In [24]:
# bMeanSumPriceOverTransacitions: valor médio do carrinho durante a sessão 
session_train_X['bMeanSumPriceOverTransacitions'] = transact_train_database.groupby('sessionNo').bSumPrice.mean()
session_train_X['bMeanSumPriceOverTransacitions'].fillna(0, inplace=True)
session_test_X['bMeanSumPriceOverTransacitions'] = transact_test_database.groupby('sessionNo').bSumPrice.mean()
session_test_X['bMeanSumPriceOverTransacitions'].fillna(0, inplace=True)

# meanInterationsDuration: valor médio de tempo entre uma transação e outra na sessão
session_train_X['meanInterationsDuration'] = session_train_X['duration']/transact_train_database.groupby('sessionNo').duration.count()
session_train_X['meanInterationsDuration'].fillna(0, inplace=True)
session_test_X['meanInterationsDuration'] = session_test_X['duration']/transact_test_database.groupby('sessionNo').duration.count()
session_test_X['meanInterationsDuration'].fillna(0, inplace=True)

# bMeanCountOverTransacitions: quantidade média de itens no carrinho durante a sessão 
session_train_X['bMeanCountOverTransacitions'] = transact_train_database.groupby('sessionNo').bCount.mean()
session_train_X['bMeanCountOverTransacitions'].fillna(0, inplace=True)
session_test_X['bMeanCountOverTransacitions'] = transact_test_database.groupby('sessionNo').bCount.mean()
session_test_X['bMeanCountOverTransacitions'].fillna(0, inplace=True)

In [25]:
session_train_X.sample(3)

Unnamed: 0_level_0,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,onlineStatus,availability,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder,bMeanSumPriceOverTransacitions,meanInterationsDuration,bMeanCountOverTransacitions
sessionNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
14468,20,5,250.02,3,3.99,24.99,53.97,3,3.99,24.99,53.97,5.0,y,completely orderable,6500.0,537.0,358.0,13.0,55.0,1.0,65.0,49.84875,31.2525,2.625
25640,15,6,10263.516,36,9.25,29.99,643.91,9,9.99,29.99,169.91,3.158804,y,completely orderable,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108.276111,570.195333,5.722222
16605,21,5,853.878,33,3.0,19.99,247.45,3,9.99,19.99,48.98,1.0,y,completely orderable,4000.0,499.0,75.0,12.0,45.0,1.0,139.0,38.986,170.7756,2.4


<br>

### Normalização do conjunto de dados

In [26]:
# criação das variáveis dummies

session_train_X = pd.get_dummies(session_train_X, prefix_sep='_')
session_test_X = pd.get_dummies(session_test_X, prefix_sep='_')

In [27]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

scaler = MinMaxScaler()

X_train_norm = scaler.fit_transform(session_train_X)
X_test_norm = scaler.fit_transform(session_test_X)

session_train_X = pd.DataFrame(dict(zip(session_train_X.columns.values, X_train_norm.T)))
session_test_X = pd.DataFrame(dict(zip(session_test_X.columns.values, X_test_norm.T)))

In [28]:
session_train_X.sample(3)

Unnamed: 0,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder,bMeanSumPriceOverTransacitions,meanInterationsDuration,bMeanCountOverTransacitions,onlineStatus_ausente,onlineStatus_n,onlineStatus_y,availability_ausente,availability_completely not determinable,availability_completely not orderable,availability_completely orderable,availability_mainly not determinable,availability_mainly not orderable,availability_mainly orderable,availability_mixed
40183,0.478261,1.0,0.015773,0.065,0.001665,0.004999,0.002764,0.037037,0.001427,0.004999,0.004108,0.539701,0.012,0.934169,0.021667,0.012673,0.686869,0.333333,0.112466,0.007461,0.002655,0.055043,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
46174,0.652174,1.0,0.027136,0.03,0.004165,0.00357,0.000648,0.009259,0.00357,0.00357,0.001081,0.539701,0.08,0.918495,0.223333,0.002304,0.474747,0.666667,0.181572,0.002381,0.003045,0.018348,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1674,0.391304,0.0,0.011418,0.05,0.004998,0.007856,0.003628,0.037037,0.004284,0.007856,0.007136,0.0,0.028,0.713166,0.3,0.012673,0.393939,0.666667,0.03523,0.011987,0.001922,0.055043,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


<br>

### Remoção de variáveis não significativas

In [29]:
threshold_var=0
l_var = [x for x in session_train_X.columns if session_train_X[x].var() <= threshold_var]
for v in l_var:
    session_train_X = session_train_X.drop([v], axis=1)
    session_test_X = session_test_X.drop([v], axis=1)

In [30]:
session_train_X.sample(3)

Unnamed: 0,startHour,startWeekday,duration,cCount,cMinPrice,cMaxPrice,cSumPrice,bCount,bMinPrice,bMaxPrice,bSumPrice,bStep,maxVal,customerScore,accountLifetime,payments,age,address,lastOrder,bMeanSumPriceOverTransacitions,meanInterationsDuration,bMeanCountOverTransacitions,onlineStatus_ausente,onlineStatus_n,onlineStatus_y,availability_ausente,availability_completely not determinable,availability_completely not orderable,availability_completely orderable,availability_mainly not determinable,availability_mainly not orderable,availability_mainly orderable,availability_mixed
7194,0.608696,0.0,0.006042,0.01,0.033332,0.02857,0.003456,0.009259,0.02857,0.02857,0.008651,1.0,0.08,0.785266,0.135,0.011521,0.626263,0.666667,0.056911,0.019055,0.00061,0.018348,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
33601,0.869565,0.5,0.051234,0.04,0.001165,0.001856,0.000691,0.009259,0.001427,0.001427,0.000432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000952,0.025872,0.018348,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
6487,0.565217,0.0,0.028098,0.11,0.000498,0.003999,0.002046,0.027778,0.000856,0.000856,0.000777,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001284,0.007094,0.041282,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


<br>

### Exportação dos dados

In [31]:
session_train_X.to_csv('data/session_train_X.csv', index=False)
session_train_y.to_csv('data/session_train_y.csv', index=False)
session_test_X.to_csv('data/session_test_X.csv', index=False)
session_test_y.to_csv('data/session_test_y.csv', index=False)