In [20]:
from fetch_data import fetch_events, clear_events, load_csv_and_create_dataframe, load_credentials
import numpy as np
import json
import pandas as pd

from pprint import PrettyPrinter
from matplotlib import pyplot as plt
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Embedding, LSTM, GlobalMaxPooling1D, SpatialDropout1D, Flatten, Activation
from sklearn.preprocessing import MinMaxScaler, normalize
from sklearn.metrics import mean_squared_error
plt.style.use('dark_background')
from sklearn.metrics import classification_report, confusion_matrix
import random
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16138411449482822683
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 10336468590597931805
physical_device_desc: "device: XLA_CPU device"
]


### Parse position data (x, y, w, h) and splits it in separate data series for each dimention

In [21]:
def generate_x_data(data): 
    data['pos'] = data['pos'].apply(lambda x: np.array([list(map(float, value.replace('(','[').replace(')',']').replace('[','').replace(']','').split(', '))) for value in x]))
    data['len'] = data['pos'].apply(len)
    data['x'] = data['pos'].apply(lambda x: [data[0] for data in x])
    data['y'] = data['pos'].apply(lambda x: [data[1] for data in x])
    data['sizex'] = data['pos'].apply(lambda x: [data[2] for data in x])
    data['sizey'] = data['pos'].apply(lambda x: [data[3] for data in x])
    
    return data

### Data fetching

This function fetchs data from multiple days into one dataframe and downloads it's csv's

In [22]:
def fetch_multiple_days (credentials, *days, equalize=False, for_review=False):
    
    purchases, regular_events = clear_events(fetch_events(credentials['user'], credentials['pwd'], days[0]).json()[0], for_review=for_review)
    
    data = load_csv_and_create_dataframe(purchases, 1)
    if equalize:
        data = load_csv_and_create_dataframe(sorted(regular_events, key=lambda x:random.random())[:len(purchases)], 0, data)
    else:
        data = load_csv_and_create_dataframe(regular_events, 0, data)
        
    for day in days[1:]:
        purchases, regular_events = clear_events(fetch_events(credentials['user'], credentials['pwd'], day).json()[0], for_review=for_review)
        data = load_csv_and_create_dataframe(purchases, 1, data)
        if equalize:
            data = load_csv_and_create_dataframe(sorted(regular_events, key=lambda x:random.random())[:len(purchases)], 0, data)
        else:
            data = load_csv_and_create_dataframe(regular_events, 0, data)
    return data

### Data constrain

LSTM's requires setting a specific amount of timesteps, this function set the data to that size, and then formats it to the expected input of an LSTM. ex:

```json
t0 (a0, a1, ... an)
t1 (b0, b1, ... bn)
t2 (c0, c1, ... cn)
```

In [23]:
def constrain_data(data, max_phrase_len):
    X_x_train = pad_sequences(data['x'], maxlen = max_phrase_len) 
    X_y_train = pad_sequences(data['y'], maxlen = max_phrase_len)
    X_szx_train = pad_sequences(data['sizex'], maxlen = max_phrase_len)
    X_szy_train = pad_sequences(data['sizey'], maxlen = max_phrase_len)

    x_train = np.array(list(zip(X_x_train, X_y_train, X_szx_train, X_szy_train)))
    
    return x_train

## Fetching train and test data

In [24]:
load_credentials()
credentials = {}
with open('credentials_visio.json') as creds:
    credentials = json.load(creds)
train_data = fetch_multiple_days(credentials, '2020-02-03','2020-02-02','2020-02-01','2020-01-31', '2020-01-30', '2020-01-28', '2020-01-28', '2020-01-27', '2020-01-25', '2020-01-24', '2020-01-13', '2020-01-20')
test_data = fetch_multiple_days(credentials, '2020-01-30', equalize=False)

Login succed
Login succed
Login succed
Login succed
Login succed
Login succed
Login succed
Login succed
Login succed
Login succed
Login succed
Login succed
Login succed


In [25]:
train_Data = train_data.copy()
train_Data = generate_x_data(train_Data)
feature_dimention = 4

test_Data = test_data.copy()
test_Data = generate_x_data(test_Data)

max_phrase_len = min(test_Data['len'].max() if test_Data['len'].max() > test_Data['len'].max() else test_Data['len'].max(), 3000)
feature_dimention = 4

In [26]:
test_Data.describe()

Unnamed: 0,len
count,429.0
mean,649.121212
std,796.967465
min,92.0
25%,127.0
50%,294.0
75%,904.0
max,5312.0


In [27]:
def normalize(arr):
    norm = arr - arr.min()
    norm = norm / np.abs(norm).max()
    
    return norm

In [28]:
batch_size = 50
epochs = 1000

In [29]:
Y_train = train_Data['purchase']
y_train = to_categorical(Y_train)

X_x_train = pad_sequences(train_Data['x'], maxlen = max_phrase_len) 
X_y_train = pad_sequences(train_Data['y'], maxlen = max_phrase_len)
X_szx_train = pad_sequences(train_Data['sizex'], maxlen = max_phrase_len)
X_szy_train = pad_sequences(train_Data['sizey'], maxlen = max_phrase_len)

x_train = np.array(list(zip(X_x_train, X_y_train, X_szx_train, X_szy_train)))

In [30]:
Y_test = test_Data['purchase']
y_test = to_categorical(Y_test)

X_x_test = pad_sequences(test_Data['x'], maxlen = max_phrase_len) 
X_y_test = pad_sequences(test_Data['y'], maxlen = max_phrase_len)
X_szx_test = pad_sequences(test_Data['sizex'], maxlen = max_phrase_len)
X_szy_test = pad_sequences(test_Data['sizey'], maxlen = max_phrase_len)

x_test = np.array(list(zip(X_x_test, X_y_test, X_szx_test, X_szy_test)))

In [31]:
model_lstm = Sequential()
# model_lstm.add(Embedding(input_dim = 4, output_dim = 256, input_length = max_phrase_len))
# model_lstm.add(SpatialDropout1D(0.3))
model_lstm.add(LSTM(64, input_shape=(4, max_phrase_len), return_sequences=True))
model_lstm.add(Dropout(0.2))
model_lstm.add(LSTM(64, input_shape=(4, max_phrase_len), return_sequences=True))
model_lstm.add(Dropout(0.2))
model_lstm.add(LSTM(64, input_shape=(4, max_phrase_len)))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(2, activation='softmax'))
#model_lstm.add(Flatten())
model_lstm.summary()

model_lstm.compile(
    loss='categorical_crossentropy',
    optimizer='Adam',
    metrics=['acc']
)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 4, 64)             784640    
_________________________________________________________________
dropout_1 (Dropout)          (None, 4, 64)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 64)             33024     
_________________________________________________________________
dropout_2 (Dropout)          (None, 4, 64)             0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                

In [32]:
history = model_lstm.fit(
    x_train,
    y_train,
    validation_split = 0.1,
    epochs = epochs,
    batch_size = batch_size,
    verbose=1
)

Train on 5326 samples, validate on 592 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1

Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/

Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/

Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/1000
Epoch 358/1000
Epoch 359/1000
Epoch 360/1000
Epoch 361/1000
Epoch 362/1000
Epoch 363/1000
Epoch 364/1000
Epoch 365/1000
Epoch 366/1000
Epoch 367/1000
Epoch 368/1000
Epoch 369/1000
Epoch 370/1000
Epoch 371/1000
Epoch 372/1000
Epoch 373/1000
Epoch 374/1000
Epoch 375/1000
Epoch 376/1000
Epoch 377/1000
Epoch 378/1000
Epoch 379/1000
Epoch 380/1000
Epoch 381/1000
Epoch 382/1000
Epoch 383/1000
Epoch 384/1000
Epoch 385/1000
Epoch 386/1000
Epoch 387/1000
Epoch 388/1000
Epoch 389/1000
Epoch 390/1000
Epoch 391/1000
Epoch 392/1000
Epoch 393/1000
Epoch 394/1000
Epoch 395/1000
Epoch 396/1000
Epoch 397/1000
Epoch 398/1000
Epoch 399/1000
Epoch 400/1000
Epoch 401/1000
Epoch 402/1000
Epoch 403/1000
Epoch 404/1000
Epoch 405/1000
Epoch 406/1000
Epoch 407/1000
Epoch 408/1000
Epoch 409/1000
Epoch 410/1000
Epoch 411/1000
Epoch 412/1000
Epoch 413/1000
Epoch 414/1000
Epoch 415/1000
Epoch 416/1000
Epoch 417/

Epoch 467/1000
Epoch 468/1000
Epoch 469/1000
Epoch 470/1000
Epoch 471/1000
Epoch 472/1000
Epoch 473/1000
Epoch 474/1000
Epoch 475/1000
Epoch 476/1000
Epoch 477/1000
Epoch 478/1000
Epoch 479/1000
Epoch 480/1000
Epoch 481/1000
Epoch 482/1000
Epoch 483/1000
Epoch 484/1000
Epoch 485/1000
Epoch 486/1000
Epoch 487/1000
Epoch 488/1000
Epoch 489/1000
Epoch 490/1000
Epoch 491/1000
Epoch 492/1000
Epoch 493/1000
Epoch 494/1000
Epoch 495/1000
Epoch 496/1000
Epoch 497/1000
Epoch 498/1000
Epoch 499/1000
Epoch 500/1000
Epoch 501/1000
Epoch 502/1000
Epoch 503/1000
Epoch 504/1000
Epoch 505/1000
Epoch 506/1000
Epoch 507/1000
Epoch 508/1000
Epoch 509/1000
Epoch 510/1000
Epoch 511/1000
Epoch 512/1000
Epoch 513/1000
Epoch 514/1000
Epoch 515/1000
Epoch 516/1000
Epoch 517/1000
Epoch 518/1000
Epoch 519/1000
Epoch 520/1000
Epoch 521/1000
Epoch 522/1000
Epoch 523/1000
Epoch 524/1000
Epoch 525/1000
Epoch 526/1000
Epoch 527/1000
Epoch 528/1000
Epoch 529/1000
Epoch 530/1000
Epoch 531/1000
Epoch 532/1000
Epoch 533/

Epoch 583/1000
Epoch 584/1000
Epoch 585/1000
Epoch 586/1000
Epoch 587/1000
Epoch 588/1000
Epoch 589/1000
Epoch 590/1000
Epoch 591/1000
Epoch 592/1000
Epoch 593/1000
Epoch 594/1000
Epoch 595/1000
Epoch 596/1000
Epoch 597/1000
Epoch 598/1000
Epoch 599/1000
Epoch 600/1000
Epoch 601/1000
Epoch 602/1000
Epoch 603/1000
Epoch 604/1000
Epoch 605/1000
Epoch 606/1000
Epoch 607/1000
Epoch 608/1000
Epoch 609/1000
Epoch 610/1000
Epoch 611/1000
Epoch 612/1000
Epoch 613/1000
Epoch 614/1000
Epoch 615/1000
Epoch 616/1000
Epoch 617/1000
Epoch 618/1000
Epoch 619/1000
Epoch 620/1000
Epoch 621/1000
Epoch 622/1000
Epoch 623/1000
Epoch 624/1000
Epoch 625/1000
Epoch 626/1000
Epoch 627/1000
Epoch 628/1000
Epoch 629/1000
Epoch 630/1000
Epoch 631/1000
Epoch 632/1000
Epoch 633/1000
Epoch 634/1000
Epoch 635/1000
Epoch 636/1000
Epoch 637/1000
Epoch 638/1000
Epoch 639/1000
Epoch 640/1000
Epoch 641/1000
Epoch 642/1000
Epoch 643/1000
Epoch 644/1000
Epoch 645/1000
Epoch 646/1000
Epoch 647/1000
Epoch 648/1000
Epoch 649/

Epoch 699/1000
Epoch 700/1000
Epoch 701/1000
Epoch 702/1000
Epoch 703/1000
Epoch 704/1000
Epoch 705/1000
Epoch 706/1000
Epoch 707/1000
Epoch 708/1000
Epoch 709/1000
Epoch 710/1000
Epoch 711/1000
Epoch 712/1000
Epoch 713/1000
Epoch 714/1000
Epoch 715/1000
Epoch 716/1000
Epoch 717/1000
Epoch 718/1000
Epoch 719/1000
Epoch 720/1000
Epoch 721/1000
Epoch 722/1000
Epoch 723/1000
Epoch 724/1000
Epoch 725/1000
Epoch 726/1000
Epoch 727/1000
Epoch 728/1000
Epoch 729/1000
Epoch 730/1000
Epoch 731/1000
Epoch 732/1000
Epoch 733/1000
Epoch 734/1000
Epoch 735/1000
Epoch 736/1000
Epoch 737/1000
Epoch 738/1000
Epoch 739/1000
Epoch 740/1000
Epoch 741/1000
Epoch 742/1000
Epoch 743/1000
Epoch 744/1000
Epoch 745/1000
Epoch 746/1000
Epoch 747/1000
Epoch 748/1000
Epoch 749/1000
Epoch 750/1000
Epoch 751/1000
Epoch 752/1000
Epoch 753/1000
Epoch 754/1000
Epoch 755/1000
Epoch 756/1000
Epoch 757/1000
Epoch 758/1000
Epoch 759/1000
Epoch 760/1000
Epoch 761/1000
Epoch 762/1000
Epoch 763/1000
Epoch 764/1000
Epoch 765/

Epoch 815/1000
Epoch 816/1000
Epoch 817/1000
Epoch 818/1000
Epoch 819/1000
Epoch 820/1000
Epoch 821/1000
Epoch 822/1000
Epoch 823/1000
Epoch 824/1000
Epoch 825/1000
Epoch 826/1000
Epoch 827/1000
Epoch 828/1000
Epoch 829/1000
Epoch 830/1000
Epoch 831/1000
Epoch 832/1000
Epoch 833/1000
Epoch 834/1000
Epoch 835/1000
Epoch 836/1000
Epoch 837/1000
Epoch 838/1000
Epoch 839/1000
Epoch 840/1000
Epoch 841/1000
Epoch 842/1000
Epoch 843/1000
Epoch 844/1000
Epoch 845/1000
Epoch 846/1000
Epoch 847/1000
Epoch 848/1000
Epoch 849/1000
Epoch 850/1000
Epoch 851/1000
Epoch 852/1000
Epoch 853/1000
Epoch 854/1000
Epoch 855/1000
Epoch 856/1000
Epoch 857/1000
Epoch 858/1000
Epoch 859/1000
Epoch 860/1000
Epoch 861/1000
Epoch 862/1000
Epoch 863/1000
Epoch 864/1000
Epoch 865/1000
Epoch 866/1000
Epoch 867/1000
Epoch 868/1000
Epoch 869/1000
Epoch 870/1000
Epoch 871/1000
Epoch 872/1000
Epoch 873/1000
Epoch 874/1000
Epoch 875/1000
Epoch 876/1000
Epoch 877/1000
Epoch 878/1000
Epoch 879/1000
Epoch 880/1000
Epoch 881/

Epoch 931/1000
Epoch 932/1000
Epoch 933/1000
Epoch 934/1000
Epoch 935/1000
Epoch 936/1000
Epoch 937/1000
Epoch 938/1000
Epoch 939/1000
Epoch 940/1000
Epoch 941/1000
Epoch 942/1000
Epoch 943/1000
Epoch 944/1000
Epoch 945/1000
Epoch 946/1000
Epoch 947/1000
Epoch 948/1000
Epoch 949/1000
Epoch 950/1000
Epoch 951/1000
Epoch 952/1000
Epoch 953/1000
Epoch 954/1000
Epoch 955/1000
Epoch 956/1000
Epoch 957/1000
Epoch 958/1000
Epoch 959/1000
Epoch 960/1000
Epoch 961/1000
Epoch 962/1000
Epoch 963/1000
Epoch 964/1000
Epoch 965/1000
Epoch 966/1000
Epoch 967/1000
Epoch 968/1000
Epoch 969/1000
Epoch 970/1000
Epoch 971/1000
Epoch 972/1000
Epoch 973/1000
Epoch 974/1000
Epoch 975/1000
Epoch 976/1000
Epoch 977/1000
Epoch 978/1000
Epoch 979/1000
Epoch 980/1000
Epoch 981/1000
Epoch 982/1000
Epoch 983/1000
Epoch 984/1000
Epoch 985/1000
Epoch 986/1000
Epoch 987/1000
Epoch 988/1000
Epoch 989/1000
Epoch 990/1000
Epoch 991/1000
Epoch 992/1000
Epoch 993/1000
Epoch 994/1000
Epoch 995/1000
Epoch 996/1000
Epoch 997/

In [33]:
score = model_lstm.evaluate(x_test, y_test, verbose=1)
model_lstm.save('purchase_classification_20200205.h5')
print (score)

[0.23039306592552256, 0.8951048851013184]


In [34]:
y_pred = model_lstm.predict(x_test)
purchases = []
for result,_id, compra in zip(y_pred, test_data['id'], test_data['purchase']):
    print (f"{_id} {'Nada' if result[0] > 0.95 else 'Compr' if result[1] > 0.8 else 'Sece'} {'Compr' if compra == 1 else 'Nada'} {result[0]}")

1580407200_022 Sece Compr 0.2664857506752014
1580407200_056 Sece Compr 0.5325844883918762
1580392800_045 Sece Compr 0.3057483732700348
1580395200_029 Sece Compr 0.21140056848526
1580395200_038 Sece Compr 0.2699207663536072
1580395200_040 Compr Compr 0.15642638504505157
1580395200_074 Sece Compr 0.30861037969589233
1580403600_017 Sece Compr 0.7738438844680786
1580404800_000 Sece Compr 0.38707664608955383
1580404800_001 Sece Compr 0.6824271082878113
1580404800_073 Sece Compr 0.47753235697746277
1580406000_072 Sece Compr 0.5504408478736877
1580408400_030 Sece Compr 0.3541993200778961
1580408400_049 Sece Compr 0.20033667981624603
1580408400_053 Sece Compr 0.48832955956459045
1580408400_061 Sece Compr 0.22162388265132904
1580408400_069 Sece Compr 0.9153889417648315
1580412000_104 Sece Compr 0.2736496329307556
1580413200_002 Sece Compr 0.25770294666290283
1580413200_046 Sece Compr 0.5442635416984558
1580413200_071 Nada Compr 0.971476674079895
1580414400_037 Sece Compr 0.25460201501846313
158

In [35]:
matrix = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
[['tp', 'fp'],['fn', 'tn']]
print(matrix)

[[314  23]
 [ 22  70]]
