In [10]:
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional

In [8]:
# Load in the training set
training_set = pd.read_csv("data/mini_data/data/training_set/log_mini.csv")
training_set.hist_user_behavior_reason_start = training_set.hist_user_behavior_reason_start.astype('category')
training_set.hist_user_behavior_reason_end = training_set.hist_user_behavior_reason_end.astype('category')
training_set.context_type = training_set.context_type.astype('category')
training_set.date = training_set.date.apply(pd.to_datetime)

# Load in the track features
track_data = pd.read_pickle("data/pickles/track_features.pkl")

# Using skip_2 as the ground truth
training_set['skipped'] = (training_set.skip_2 | training_set.skip_1).astype('int32')
training_set = training_set.drop(columns=['skip_1','skip_2','skip_3','not_skipped'])
track_data.iloc[:,1:] = track_data.iloc[:,1:].astype('float32')

training_set.head()

Unnamed: 0,session_id,session_position,session_length,track_id_clean,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,context_type,hist_user_behavior_reason_start,hist_user_behavior_reason_end,skipped
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,1,20,t_0479f24c-27d2-46d6-a00c-7ec928f2b539,0,0,0,0,0,0,True,16,2018-07-15,True,editorial_playlist,trackdone,trackdone,0
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,2,20,t_9099cd7b-c238-47b7-9381-f23f2c1d1043,0,1,0,0,0,0,True,16,2018-07-15,True,editorial_playlist,trackdone,trackdone,0
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,3,20,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0,0,1,0,0,0,0,True,16,2018-07-15,True,editorial_playlist,trackdone,trackdone,0
3,0_00006f66-33e5-4de7-a324-2d18e439fc1e,4,20,t_23cff8d6-d874-4b20-83dc-94e450e8aa20,0,1,0,0,0,0,True,16,2018-07-15,True,editorial_playlist,trackdone,trackdone,0
4,0_00006f66-33e5-4de7-a324-2d18e439fc1e,5,20,t_64f3743c-f624-46bb-a579-0f3f9a07a123,0,1,0,0,0,0,True,16,2018-07-15,True,editorial_playlist,trackdone,trackdone,0


In [9]:
track_data.head()

Unnamed: 0,track_id,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
0,t_a540e552-16d4-42f8-a185-232bd650ea7d,109.706673,1950.0,99.975418,0.45804,0.519497,0.504949,0.399767,7.51188,0.817709,0.903753,3.254327e-06,0.0,0.132124,-11.238,0.3861,1.0,0.541606,0.079985,166.287003,4.0,0.935512,-0.033284,-0.411896,-0.02858,0.349438,0.832467,-0.213871,-0.299464,-0.675907
1,t_67965da0-132b-4b1e-8a69-0ef99b32287c,187.693329,1950.0,99.969429,0.916272,0.419223,0.54553,0.491235,9.098376,0.154258,1.037952,8.344854e-12,0.0,0.163281,-13.706,0.125,1.0,0.895874,0.083877,95.261002,3.0,0.359675,0.145703,-0.850372,0.12386,0.746904,0.371803,-0.420558,-0.21312,-0.525795
2,t_0614ecd3-a7d5-40a1-816e-156d5872a467,160.839996,1951.0,99.602547,0.812884,0.42589,0.50828,0.491625,8.36867,0.358813,1.004268,2.927475e-10,0.0,0.090115,-10.522,0.200669,0.0,0.806136,0.038777,105.185997,4.0,0.726769,0.02172,-0.743634,0.333247,0.568447,0.411094,-0.187749,-0.387599,-0.433496
3,t_070a63a0-744a-434e-9913-a97b02926a29,175.399994,1951.0,99.665016,0.396854,0.400934,0.35999,0.552227,5.967346,0.514585,0.975138,3.981341e-11,0.0,0.360924,-11.032,0.427152,1.0,0.492772,0.038337,119.441002,4.0,0.859075,0.039143,-0.267555,-0.051825,0.106173,0.614825,-0.111419,-0.265953,-0.542753
4,t_d6990e17-9c31-4b01-8559-47d9ce476df1,369.600006,1951.0,99.99176,0.728831,0.371328,0.335115,0.483044,5.802681,0.721442,0.976866,2.541884e-10,0.0,0.189162,-6.836,0.28125,1.0,0.723808,0.032043,95.261002,4.0,0.562343,0.131931,-0.292523,-0.174819,-0.034422,0.717229,-0.016239,-0.392694,-0.455496


In [27]:
# Merge the dataframes
full_data = training_set.merge(
    track_data,
    left_on='track_id_clean',
    right_on='track_id'
).drop(
    columns=["track_id_clean","track_id"]
)

# Convert column types
category_columns = [
    'context_type',
    'hist_user_behavior_reason_start',
    'hist_user_behavior_reason_end'
]
full_data = pd.get_dummies(full_data,columns=category_columns,drop_first=True)
full_data.date = full_data.date.astype('int')
full_data.iloc[:,1:] = full_data.iloc[:,1:].astype('float32')

full_data.head()

Unnamed: 0,session_id,session_position,session_length,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,skipped,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7,context_type_charts,context_type_editorial_playlist,context_type_personalized_playlist,context_type_radio,context_type_user_collection,hist_user_behavior_reason_start_backbtn,hist_user_behavior_reason_start_clickrow,hist_user_behavior_reason_start_endplay,hist_user_behavior_reason_start_fwdbtn,hist_user_behavior_reason_start_playbtn,hist_user_behavior_reason_start_remote,hist_user_behavior_reason_start_trackdone,hist_user_behavior_reason_start_trackerror,hist_user_behavior_reason_end_clickrow,hist_user_behavior_reason_end_endplay,hist_user_behavior_reason_end_fwdbtn,hist_user_behavior_reason_end_logout,hist_user_behavior_reason_end_remote,hist_user_behavior_reason_end_trackdone
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0_00079a23-1600-486a-91bd-5208be0c745a,7.0,12.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,17.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0_012b0fb4-0cc3-429f-9a78-cc6e622153fb,6.0,20.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,21.0,1.531526e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0_013cc010-c476-4ad2-8972-73449e0b2ef4,9.0,13.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,18.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0_01a5f0dc-9938-48c9-92f1-c7e51f34d290,7.0,12.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,19.0,1.531526e+18,0.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [28]:
full_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 167880 entries, 0 to 167879
Data columns (total 62 columns):
session_id                                    167880 non-null object
session_position                              167880 non-null float32
session_length                                167880 non-null float32
context_switch                                167880 non-null float32
no_pause_before_play                          167880 non-null float32
short_pause_before_play                       167880 non-null float32
long_pause_before_play                        167880 non-null float32
hist_user_behavior_n_seekfwd                  167880 non-null float32
hist_user_behavior_n_seekback                 167880 non-null float32
hist_user_behavior_is_shuffle                 167880 non-null float32
hour_of_day                                   167880 non-null float32
date                                          167880 non-null float32
premium                                       1678

In [33]:
print("Number of unique sessions: %d" % full_data.session_id.unique().shape)

Number of unique sessions: 10000


In [41]:
full_data.groupby('session_id').session_position.max().value_counts()

20.0    5066
10.0     819
11.0     752
12.0     646
13.0     547
14.0     459
15.0     435
16.0     339
17.0     320
19.0     314
18.0     303
Name: session_position, dtype: int64

In [38]:
session_lengths = full_data.groupby('session_id').session_position.max()
sessions_full_length = session_lengths == 20
full_data_s20 = full_data[[]]

20.0    5066
10.0     819
11.0     752
12.0     646
13.0     547
14.0     459
15.0     435
16.0     339
17.0     320
19.0     314
18.0     303
Name: session_position, dtype: int64

In [61]:
session_lengths = full_data.groupby('session_id').session_position.max().reset_index()
full_length_session_ids = session_lengths[session_lengths.session_position == 20].session_id.values
"0_00006f66-33e5-4de7-a324-2d18e439fc1e" in full_length_session_ids

True

In [66]:
full_data_subset = full_data[pd.Series([(i in full_length_session_ids) for i in full_data.session_id])].reset_index(drop=True)
full_data_subset.head(10)

Unnamed: 0,session_id,session_position,session_length,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,skipped,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7,context_type_charts,context_type_editorial_playlist,context_type_personalized_playlist,context_type_radio,context_type_user_collection,hist_user_behavior_reason_start_backbtn,hist_user_behavior_reason_start_clickrow,hist_user_behavior_reason_start_endplay,hist_user_behavior_reason_start_fwdbtn,hist_user_behavior_reason_start_playbtn,hist_user_behavior_reason_start_remote,hist_user_behavior_reason_start_trackdone,hist_user_behavior_reason_start_trackerror,hist_user_behavior_reason_end_clickrow,hist_user_behavior_reason_end_endplay,hist_user_behavior_reason_end_fwdbtn,hist_user_behavior_reason_end_logout,hist_user_behavior_reason_end_remote,hist_user_behavior_reason_end_trackdone
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0_012b0fb4-0cc3-429f-9a78-cc6e622153fb,6.0,20.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,21.0,1.531526e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0_01f86af2-cf48-42a6-9bf8-adb91f072909,7.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,1.531613e+18,0.0,1.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0_02340c3b-c370-4c98-b759-87be85e11aa3,8.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,15.0,1.531613e+18,1.0,1.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,0_02a8ebbd-5a12-4343-a7da-2ea912102f90,2.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,15.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
5,0_04ab5699-1f6a-40ef-b8a6-9ebad09e85f6,16.0,20.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,9.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,0_05219b57-68f2-4eac-a139-85cef6f3809b,14.0,20.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
7,0_05b0cc6b-6615-44bd-ba6d-d6737cd5324a,16.0,20.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
8,0_06a28c80-2dca-434d-8404-3a86f77a0a10,8.0,20.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
9,0_0792cde4-90a0-45e9-bf9e-b4f78114f1bf,19.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,22.0,1.531526e+18,1.0,1.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [87]:
full_data_subset = full_data_subset.sort_values(["session_id",'session_position']).reset_index(drop=True)
full_data_subset.head(30)

Unnamed: 0,session_id,session_position,session_length,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,skipped,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7,context_type_charts,context_type_editorial_playlist,context_type_personalized_playlist,context_type_radio,context_type_user_collection,hist_user_behavior_reason_start_backbtn,hist_user_behavior_reason_start_clickrow,hist_user_behavior_reason_start_endplay,hist_user_behavior_reason_start_fwdbtn,hist_user_behavior_reason_start_playbtn,hist_user_behavior_reason_start_remote,hist_user_behavior_reason_start_trackdone,hist_user_behavior_reason_start_trackerror,hist_user_behavior_reason_end_clickrow,hist_user_behavior_reason_end_endplay,hist_user_behavior_reason_end_fwdbtn,hist_user_behavior_reason_end_logout,hist_user_behavior_reason_end_remote,hist_user_behavior_reason_end_trackdone
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,180.066666,2018.0,99.968132,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484468,1.0,0.678553,-6.577,0.546784,1.0,0.320668,0.069717,134.024994,4.0,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,2.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,236.796371,2018.0,99.896729,0.061811,0.654804,0.735661,0.877393,11.30875,0.726828,1.025636,1.031315e-07,7.0,0.104322,-5.319,0.824766,0.0,0.131391,0.061158,130.037994,4.0,0.337152,-0.713646,0.363718,0.310315,-0.042222,-0.383164,0.066357,-0.365308,0.15792,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,3.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,231.266663,2018.0,99.999977,0.354116,0.532155,0.540411,0.679719,8.065802,0.563009,1.029465,2.659035e-08,10.0,0.135776,-5.843,0.774327,1.0,0.296923,0.045354,145.028,4.0,0.373862,-0.742541,0.375599,0.25266,-0.049007,-0.299745,0.063341,-0.486689,0.181604,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0_00006f66-33e5-4de7-a324-2d18e439fc1e,4.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,169.826675,2018.0,99.995041,0.769225,0.641756,0.729224,0.864881,11.287586,0.529484,0.99352,6.598388e-06,1.0,0.103722,-7.756,0.630996,1.0,0.603271,0.229936,111.982002,4.0,0.64942,-0.705116,0.317562,0.289141,-0.03892,-0.393358,0.092719,-0.364418,0.285603,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0_00006f66-33e5-4de7-a324-2d18e439fc1e,5.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,210.545258,2018.0,99.998497,0.006602,0.732428,0.794881,0.857778,12.181586,0.650057,1.000571,2.066649e-06,8.0,0.120842,-4.919,0.759465,1.0,0.170148,0.24098,147.031006,4.0,0.652921,-0.868489,0.33128,0.210478,0.08474,-0.333287,-0.025706,-0.51035,0.182315,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,0_00006f66-33e5-4de7-a324-2d18e439fc1e,6.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,0.0,216.689484,2018.0,99.997604,0.098337,0.548632,0.658323,0.59648,10.416269,0.557129,1.02732,7.479557e-08,1.0,0.056493,-7.522,0.385027,0.0,0.440376,0.133586,133.246994,5.0,0.661081,-0.817504,0.283297,0.387589,0.279636,-0.280334,0.117993,0.106159,0.311233,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,0_00006f66-33e5-4de7-a324-2d18e439fc1e,7.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,1.0,186.453323,2018.0,99.861565,0.449629,0.487708,0.608612,0.55752,9.834414,0.46935,1.033029,2.062497e-08,6.0,0.111306,-7.808,0.157576,1.0,0.67522,0.409848,104.466003,4.0,0.10942,-0.748412,0.321976,0.237488,0.00348,-0.315287,0.032431,-0.464694,0.200836,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
7,0_00006f66-33e5-4de7-a324-2d18e439fc1e,8.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,1.0,214.047348,2018.0,97.24382,0.017628,0.649367,0.68261,0.489405,9.900208,0.715155,1.050186,2.530034e-05,1.0,0.114787,-6.96,0.354167,1.0,0.456843,0.103687,83.036003,4.0,0.389913,-0.921928,0.35974,0.293674,0.115302,-0.274987,0.043193,-0.444351,0.211909,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8,0_00006f66-33e5-4de7-a324-2d18e439fc1e,9.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,1.0,218.260818,2018.0,99.998451,0.030127,0.818798,0.879947,0.964383,15.037571,0.55528,1.046868,0.0005667919,5.0,0.128244,-6.245,0.857855,0.0,0.102744,0.049853,130.016998,4.0,0.338321,-0.744412,0.3087,0.230126,0.066493,-0.242549,0.02537,-0.40321,0.15935,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
9,0_00006f66-33e5-4de7-a324-2d18e439fc1e,10.0,20.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,1.531613e+18,1.0,1.0,60.0,2018.0,99.957436,0.332467,0.7515,0.843354,0.914082,14.457762,0.239318,1.040802,0.0004132156,4.0,0.119036,-8.597,0.622222,0.0,0.355844,0.154609,100.237,4.0,0.257672,-0.647221,0.316101,0.251329,-0.041532,-0.252359,0.059971,-0.313696,0.126421,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [81]:
full_data_subset.shape

(101320, 62)

In [91]:
X = full_data_subset.drop(columns=['session_id','skipped']).values.reshape((-1,20,full_data_subset.shape[-1]-2))
y = full_data_subset.skipped.values.reshape((-1,20,1))
X.shape, y.shape

((5066, 20, 60), (5066, 20, 1))

In [95]:
X[0]

array([[ 1., 20.,  0., ...,  0.,  0.,  1.],
       [ 2., 20.,  0., ...,  0.,  0.,  1.],
       [ 3., 20.,  0., ...,  0.,  0.,  1.],
       ...,
       [18., 20.,  0., ...,  0.,  0.,  0.],
       [19., 20.,  0., ...,  0.,  0.,  0.],
       [20., 20.,  0., ...,  0.,  0.,  0.]], dtype=float32)

In [113]:
y[0]

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)

In [114]:
full_data_subset.shape[0] * 20

2026400

In [115]:
X2 = []
y2 = []
for i in range(X.shape[0]):
    for j in range(-1,-21,-1):
        X2.append(np.concatenate((X[i,j:,:],np.zeros((20+j,60)))))
        y2.append(np.concatenate((y[i,j:,:],np.zeros((20+j,1)))))
X2 = np.array(X2)
y2 = np.array(y2)
X2.shape, y2.shape

((101320, 20, 60), (101320, 20, 1))

In [123]:
X[0]

array([[ 1., 20.,  0., ...,  0.,  0.,  1.],
       [ 2., 20.,  0., ...,  0.,  0.,  1.],
       [ 3., 20.,  0., ...,  0.,  0.,  1.],
       ...,
       [18., 20.,  0., ...,  0.,  0.,  0.],
       [19., 20.,  0., ...,  0.,  0.,  0.],
       [20., 20.,  0., ...,  0.,  0.,  0.]], dtype=float32)

***

Now let's create the model that we'll be training

In [None]:
def build_model():
    m = Sequential()