# Predicción de crímenes
### Trabajo Fin de Máster para el Máster Universitario en Ciencia de Datos
### Universitat Oberta de Catalunya
### Realizado por Álvaro Pavón Díaz

## Parte 2: Modelado y evaluación de los datos
Durante este Jupyter Notebook nos dedicaremos a realizar el entrenamiento de los conjunto de datos sobre diferentes métodos.

Para ello lo primero se va a proceder a importar las diferentes clases que se necesitarán a lo largo de este notebook y la importación de los archivos que se va a utilizar. Además de mostrar que los proceso realizados a través de TensorFlow van a ser acelerados mediante GPU (GeForce RTX 2070 SUPER)

In [1]:
import pandas as pd
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time_function
import tensorflow as tf
from tensorflow.python.client import device_lib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import os
#os.environ["CUDA_VISIBLE_DEVICES"]="-1" 

print(device_lib.list_local_devices())
print(tf.version)

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14663795888489606148
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 13305875057602867155
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 5175569564501812678
physical_device_desc: "device: XLA_GPU device"
]
<module 'tensorflow._api.v2.version' from 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow\\_api\\v2\\version\\__init__.py'>


In [2]:
crimes = pd.read_csv ('crimes_clean.csv', sep='""', delimiter=',', engine='python')
crimes.head(5)

Unnamed: 0,OFFENSE_CODE_GROUP,DISTRICT,SHOOTING,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,IS_NIGHT,DAY_OF_MONTH
0,Disorderly Conduct,E18,0.0,2018,10,Wednesday,20,Part Two,ARLINGTON ST,42.262608,-71.121186,1,3
1,Property Lost,D14,0.0,2018,8,Thursday,20,Part Three,ALLSTON ST,42.352111,-71.135311,1,30
2,Other,B2,0.0,2018,10,Wednesday,19,Part Two,DEVON ST,42.308126,-71.07693,1,3
3,Aggravated Assault,A1,0.0,2018,10,Wednesday,20,Part One,CAMBRIDGE ST,42.359454,-71.059648,1,3
4,Aircraft,A7,0.0,2018,10,Wednesday,20,Part Three,PRESCOTT ST,42.375258,-71.024663,1,3


In [3]:
crimes_time = pd.read_csv ('crimes_clean_time.csv', sep='""', delimiter=',', engine='python')
crimes_time.head(5)

Unnamed: 0,OFFENSE_CODE_GROUP,DISTRICT,SHOOTING,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,IS_NIGHT,DAY_OF_MONTH,TIME_X,TIME_Y,Lat_M,Long_M
0,Disorderly Conduct,E18,0.0,2018,10,Wednesday,20,Part Two,ARLINGTON ST,42.262608,-71.121186,1,3,0.518198,0.000331,0.185665,0.267386
1,Property Lost,D14,0.0,2018,8,Thursday,20,Part Three,ALLSTON ST,42.352111,-71.135311,1,30,0.239106,0.073462,0.736023,0.201687
2,Other,B2,0.0,2018,10,Wednesday,19,Part Two,DEVON ST,42.308126,-71.07693,1,3,0.51784,0.000318,0.465558,0.473233
3,Aggravated Assault,A1,0.0,2018,10,Wednesday,20,Part One,CAMBRIDGE ST,42.359454,-71.059648,1,3,0.518198,0.000331,0.78117,0.553614
4,Aircraft,A7,0.0,2018,10,Wednesday,20,Part Three,PRESCOTT ST,42.375258,-71.024663,1,3,0.518198,0.000331,0.87835,0.716335


In [4]:
crimes_time=crimes_time[['OFFENSE_CODE_GROUP', 'SHOOTING','UCR_PART','YEAR','MONTH','DAY_OF_MONTH','DAY_OF_WEEK','HOUR','IS_NIGHT','Lat', 'Lat_M', 'Long','Long_M', 'TIME_X', 'TIME_Y', 'DISTRICT']]
crimes_time.head(5)

Unnamed: 0,OFFENSE_CODE_GROUP,SHOOTING,UCR_PART,YEAR,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,HOUR,IS_NIGHT,Lat,Lat_M,Long,Long_M,TIME_X,TIME_Y,DISTRICT
0,Disorderly Conduct,0.0,Part Two,2018,10,3,Wednesday,20,1,42.262608,0.185665,-71.121186,0.267386,0.518198,0.000331,E18
1,Property Lost,0.0,Part Three,2018,8,30,Thursday,20,1,42.352111,0.736023,-71.135311,0.201687,0.239106,0.073462,D14
2,Other,0.0,Part Two,2018,10,3,Wednesday,19,1,42.308126,0.465558,-71.07693,0.473233,0.51784,0.000318,B2
3,Aggravated Assault,0.0,Part One,2018,10,3,Wednesday,20,1,42.359454,0.78117,-71.059648,0.553614,0.518198,0.000331,A1
4,Aircraft,0.0,Part Three,2018,10,3,Wednesday,20,1,42.375258,0.87835,-71.024663,0.716335,0.518198,0.000331,A7


In [5]:
crimes_times = crimes_time.sort_values(['YEAR', 'MONTH', 'DAY_OF_MONTH', 'HOUR'], ascending=[True, True, True, True])
crimes_times.head(5)

Unnamed: 0,OFFENSE_CODE_GROUP,SHOOTING,UCR_PART,YEAR,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,HOUR,IS_NIGHT,Lat,Lat_M,Long,Long_M,TIME_X,TIME_Y,DISTRICT
127095,Harassment,0.0,Part Two,2015,6,15,Monday,0,1,42.291093,0.36082,-71.065945,0.524324,0.022469,0.648202,C11
241630,Property Lost,0.0,Part Three,2015,6,15,Monday,0,1,42.283634,0.314958,-71.082813,0.445868,0.022469,0.648202,B3
288821,Fraud,0.0,Part Two,2015,6,15,Monday,0,1,42.360205,0.785788,-71.056208,0.569614,0.022469,0.648202,A1
295842,Other,0.0,Part Two,2015,6,15,Monday,0,1,42.293606,0.376273,-71.071887,0.49669,0.022469,0.648202,C11
300018,Confidence Games,0.0,Part Two,2015,6,15,Monday,0,1,42.300217,0.416923,-71.080979,0.454397,0.022469,0.648202,B3


In [6]:
crimes_time_one = crimes_time
le_dw = LabelEncoder()
dw_labels = le_dw.fit_transform(crimes_time_one['DAY_OF_WEEK'])
crimes_time_one['DAY_OF_WEEK_label'] = dw_labels

one_dw = OneHotEncoder()
days_of_week = one_dw.fit_transform(crimes_time_one[['DAY_OF_WEEK_label']]).toarray()
days_of_week_labels = list(le_dw.classes_)
days_of_week_df = pd.DataFrame(days_of_week, columns=days_of_week_labels)



In [7]:
le_ocg = LabelEncoder()
ocg_labels = le_ocg.fit_transform(crimes_time_one['OFFENSE_CODE_GROUP'])
crimes_time_one['OFFENSE_CODE_label'] = ocg_labels

one_ocg = OneHotEncoder()
ocg = one_ocg.fit_transform(crimes_time_one[['OFFENSE_CODE_label']]).toarray()
ocg_labels = list(le_ocg.classes_)
ocg_df = pd.DataFrame(ocg, columns=ocg_labels)

In [8]:
#le_year = LabelEncoder()
#year_labels = le_year.fit_transform(crimes_time_one['YEAR'])
#crimes_time_one['year_label'] = year_labels

#one_year = OneHotEncoder()
#year = one_year.fit_transform(crimes_time_one[['year_label']]).toarray()
#year_labels = list(le_year.classes_)
#year_df = pd.DataFrame(year, columns=year_labels)

In [9]:
le_d = LabelEncoder()
d_labels = le_d.fit_transform(crimes_time_one['DISTRICT'])
crimes_time_one['DISTRICT_labels'] = d_labels

In [10]:
le_ucr = LabelEncoder()
ucr_labels = le_ucr.fit_transform(crimes_time_one['UCR_PART'])
crimes_time_one['UCR_PART_label'] = ucr_labels

one_ucr = OneHotEncoder()
ucr = one_ucr.fit_transform(crimes_time_one[['UCR_PART_label']]).toarray()
ucr_labels = list(le_ucr.classes_)
ucr_df = pd.DataFrame(ucr, columns=ucr_labels)

In [11]:
crimes_time_one = pd.concat([ocg_df,ucr_df,days_of_week_df, crimes_time_one], axis=1)

crimes_time_one.drop('DAY_OF_WEEK_label', 1, inplace= True)
crimes_time_one.drop('OFFENSE_CODE_label', 1, inplace= True)
crimes_time_one.drop('DAY_OF_WEEK', 1, inplace= True)
crimes_time_one.drop('OFFENSE_CODE_GROUP', 1, inplace= True)
#crimes_time_one.drop('year_label', 1, inplace= True)
crimes_time_one.drop('DISTRICT', 1, inplace= True)
crimes_time_one.drop('Lat', 1, inplace= True)
crimes_time_one.drop('Long', 1, inplace= True)
crimes_time_one.drop('Lat_M', 1, inplace= True)
crimes_time_one.drop('Long_M', 1, inplace= True)
crimes_time_one.drop('UCR_PART', 1, inplace= True)

In [12]:
crimes_time_one = crimes_time_one.sort_values(['YEAR', 'MONTH', 'DAY_OF_MONTH', 'HOUR'], ascending=[True, True, True, True])
crimes_time_train_test = crimes_time_one.head(int(crimes_time_one.shape[0] * 0.8))
crimes_time_validation = crimes_time_one.tail(crimes_time_one.shape[0] - int(crimes_times.shape[0] * 0.8))
crimes_time_train_test.drop('YEAR', 1, inplace= True)
crimes_time_validation.drop('YEAR', 1, inplace= True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [13]:
crimes_time_train_test.head(5)

Unnamed: 0,Aggravated Assault,Aircraft,Assembly or Gathering Violations,Auto Theft,Ballistics,Biological Threat,Bomb Hoax,Commercial Burglary,Confidence Games,Counterfeiting,...,Wednesday,SHOOTING,MONTH,DAY_OF_MONTH,HOUR,IS_NIGHT,TIME_X,TIME_Y,DISTRICT_labels,UCR_PART_label
127095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6,15,0,1,0.022469,0.648202,5,2
241630,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6,15,0,1,0.022469,0.648202,4,1
288821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6,15,0,1,0.022469,0.648202,0,2
295842,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6,15,0,1,0.022469,0.648202,5,2
300018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,6,15,0,1,0.022469,0.648202,4,2


In [14]:
crimes_time_validation.head(5)

Unnamed: 0,Aggravated Assault,Aircraft,Assembly or Gathering Violations,Auto Theft,Ballistics,Biological Threat,Bomb Hoax,Commercial Burglary,Confidence Games,Counterfeiting,...,Wednesday,SHOOTING,MONTH,DAY_OF_MONTH,HOUR,IS_NIGHT,TIME_X,TIME_Y,DISTRICT_labels,UCR_PART_label
61407,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,3,14,0,0.91873,0.773249,4,1
61408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,3,14,0,0.91873,0.773249,7,0
61411,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,3,14,0,0.91873,0.773249,8,2
61412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,3,14,0,0.91873,0.773249,8,2
61413,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,3,14,0,0.91873,0.773249,8,2


In [15]:
crimes_time_validation.shape

(60846, 78)

In [16]:
crimes_time_train_test.shape

(243383, 78)

In [17]:
xTrain, xTest, yTrain, yTest = train_test_split(crimes_time_train_test.loc[:,'Aggravated Assault':'TIME_Y'], crimes_time_train_test.loc[:,'DISTRICT_labels'], test_size = 0.25, random_state = 0)
#xTrain, xTest, yTrain, yTest = train_test_split(crimes_time_train_test.loc[:,'OFFENSE_CODE_GROUP':'TIME_Y'], crimes_time_train_test.loc[:,'DISTRICT'], test_size = 0.25, random_state = 0)

In [18]:
#xTrain.drop([ 'TIME_X', 'TIME_Y'], 1, inplace= True)
#xTest.drop([ 'TIME_X', 'TIME_Y'], 1, inplace= True)
xTrain.drop([ 'MONTH', 'DAY_OF_MONTH', 'HOUR'], 1, inplace= True)
xTest.drop([ 'MONTH', 'DAY_OF_MONTH', 'HOUR'], 1, inplace= True)

In [19]:
xTrain.head(15)

Unnamed: 0,Aggravated Assault,Aircraft,Assembly or Gathering Violations,Auto Theft,Ballistics,Biological Threat,Bomb Hoax,Commercial Burglary,Confidence Games,Counterfeiting,...,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,SHOOTING,IS_NIGHT,TIME_X,TIME_Y
231328,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0.489538,0.999891
302995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.01267,0.611847
195984,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0.157651,0.135587
88225,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1,0.605984,0.011362
289842,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.108869,0.188526
82894,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.232153,0.077794
212207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0.017742,0.632013
102925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0.168547,0.125649
196485,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.121758,0.172994
290356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0.097312,0.203618


In [20]:
len(list(le_ocg.classes_))

59

In [21]:
xValidation = crimes_time_validation.loc[:,'Aggravated Assault':'TIME_Y']
yValidation =  crimes_time_validation.loc[:,'DISTRICT_labels']
xValidation.drop([ 'MONTH', 'DAY_OF_MONTH', 'HOUR'], 1, inplace= True)
def create_model (type_layer= 'LSTM', n_layers=1, n_embeding = 32,n_epochs = 20,batch_size=512, top_k=3):
    
    layer_r = tf.keras.layers.LSTM(n_embeding, return_sequences=True)
    layer_e = tf.keras.layers.LSTM(n_embeding)
    if type_layer == 'GRU':
        layer_r = tf.keras.layers.GRU(n_embeding, return_sequences=True)
        layer_e = tf.keras.layers.GRU(n_embeding)
        
    model_i = tf.keras.models.Sequential()
    model_i.add(tf.keras.layers.Embedding(30, n_embeding, input_length=73))
    if n_layers != 0:
        for i in range(n_layers):
            model_i.add(layer_r)
    
    model_i.add(layer_e)
    
    model_i.add(tf.keras.layers.Dense(12, activation='softmax'))
    optimizer = tf.keras.optimizers.Adam(lr=0.01)
    metric= tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="sparse_top_k_categorical_accuracy", dtype=None)
    model_i.compile (optimizer=optimizer,loss='sparse_categorical_crossentropy', metrics=[metric] )
    
    model_i.fit(xTrain, yTrain, epochs = n_epochs,batch_size=batch_size, validation_data=(xTest, yTest))
    return model_i.evaluate(xValidation, yValidation)


In [22]:
create_model (type_layer= 'LSTM', n_layers=0, n_embeding = 128,n_epochs = 20,batch_size=512 )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[2.3472208976745605, 0.42494165897369385]

In [23]:

def grid_search(n_layers=[0,1,2,3], type_layers= ['GRU','LSTM'], n_embedings=[64,128], n_epochs = 20, top_ks=[3,4]):
    i=0
    lista = {}
    for n_layer in n_layers:
        for type_l in type_layers:
            for n_embeding in n_embedings:
                for top_k in top_ks:
                    print("************ NUEVA PRUEBA *************")
                    print (type_l)
                    print (n_layer)
                    print (n_embeding)
                    print (top_k)
                    try:
                        fit = create_model (type_layer= type_l, n_layers=n_layer, n_embeding = n_embeding, n_epochs=n_epochs)
                        print(fit)
                        print(i)
                        lista[str(i)] = "type_l=" + type_l + ",n_layer="+ str(n_layer) + ",n_embeding" + str(n_embeding) + ",top_k=" + str(top_k)
                        i=i+1
                    except:
                        print("Imposible entrenar el modelos con los parametros siguientes")
    return lista



In [24]:
resultados0 = grid_search (n_layers=[0],type_layers= ['GRU','LSTM'], n_embedings=[64,128], top_ks=[3,4])


************ NUEVA PRUEBA *************
GRU
0
64
3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[2.354361057281494, 0.419156551361084]
0
************ NUEVA PRUEBA *************
GRU
0
64
4
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[2.334188938140869, 0.44665220379829407]
1
************ NUEVA PRUEBA *************
GRU
0
128
3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[2.3704614639282227, 0.40027281641960144]
2
************ NUEVA PRUEBA *************
GRU
0
128
4
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[2.360860586166382, 0.41549158096313477]
3
************ NUEVA PRUEBA *************
LSTM
0
64
3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[2.319502592086792, 0.45084312558174133]
4
************ NUEVA PRUEBA *************
LSTM
0
64
4
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[2.326326847076416, 0.4476054310798645]
5
************ NUEVA PRUEBA *************
LSTM
0
128
3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[2.3270952701568604, 0.4454689025878906]
6
************ NUEVA PRUEBA *************
LSTM
0
128
4
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[2.3215808868408203, 0.4530946910381317]
7


In [25]:
resultados1 = grid_search (n_layers=[1],type_layers= ['LSTM'], n_embedings=[64,128], top_ks=[3,4])

************ NUEVA PRUEBA *************
LSTM
1
64
3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[2.3183233737945557, 0.45232224464416504]
0
************ NUEVA PRUEBA *************
LSTM
1
64
4
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[2.3268938064575195, 0.44377610087394714]
1
************ NUEVA PRUEBA *************
LSTM
1
128
3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
************ NUEVA PRUEBA *************
LSTM
1
128
4
Epoch 1/20
  7/357 [..............................] - ETA: 4:21 - loss: 2.5208 - sparse_top_k_categorical_accuracy: 0.3610Imposible entrenar el modelos con los parametros siguientes


In [28]:
n_embeding=20

model_wo_D = tf.keras.models.Sequential()
model_wo_D.add(tf.keras.layers.Dense(500, activation='relu', input_dim=73))
model_wo_D.add(tf.keras.layers.Dense(250, activation='relu'))
model_wo_D.add(tf.keras.layers.Dense(100, activation='relu'))
model_wo_D.add(tf.keras.layers.Dense(50, activation='relu'))
model_wo_D.add(tf.keras.layers.Dense(12, activation='softmax'))
metric_wo_D= tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="sparse_top_k_categorical_accuracy", dtype=None)
model_wo_D.compile (optimizer='adam',loss='sparse_categorical_crossentropy', metrics=[metric_wo_D] )
model_fitted_wo_D = model_wo_D.fit(xTrain, yTrain, epochs = 100,batch_size=128, validation_data=(xTest, yTest))
results_wo_D = model_wo_D.evaluate(xValidation, yValidation)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100


Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [34]:
print(results_wo_D)

[2.5692942142486572, 0.4270288944244385]


In [29]:
n_embeding=20

model_wD = tf.keras.models.Sequential()
model_wD.add(tf.keras.layers.Dense(500, activation='relu', input_dim=73))
model_wD.add(tf.keras.layers.Dense(250, activation='relu'))
model_wD.add(tf.keras.layers.Dropout(0.25))
model_wD.add(tf.keras.layers.Dense(100, activation='relu'))
model_wD.add(tf.keras.layers.Dense(50, activation='relu'))
model_wD.add(tf.keras.layers.Dense(12, activation='softmax'))
metric_wD= tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="sparse_top_k_categorical_accuracy", dtype=None)
model_wD.compile (optimizer='adam',loss='sparse_categorical_crossentropy', metrics=[metric_wD] )
model_fitted_wD = model_wD.fit(xTrain, yTrain, epochs = 100,batch_size=128, validation_data=(xTest, yTest))
results_wD = model_wD.evaluate(xValidation, yValidation)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100


Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [30]:
n_embeding=20

model_wDD = tf.keras.models.Sequential()
model_wDD.add(tf.keras.layers.Dense(512, activation='relu', input_dim=73))
model_wDD.add(tf.keras.layers.Dense(256, activation='relu'))
model_wDD.add(tf.keras.layers.Dropout(0.25))
model_wDD.add(tf.keras.layers.Dense(128, activation='relu'))
model_wDD.add(tf.keras.layers.Dense(128, activation='relu'))
model_wDD.add(tf.keras.layers.Dropout(0.25))
model_wDD.add(tf.keras.layers.Dense(64, activation='relu'))
model_wDD.add(tf.keras.layers.Dense(12, activation='softmax'))
metric_wDD= tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="sparse_top_k_categorical_accuracy", dtype=None)
model_wDD.compile (optimizer='adam',loss='sparse_categorical_crossentropy', metrics=[metric_wDD] )
model_fitted_wDD = model_wDD.fit(xTrain, yTrain, epochs = 100,batch_size=128, validation_data=(xTest, yTest))
results_wDD = model_wDD.evaluate(xValidation, yValidation)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100


Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [35]:
print(results_wDD)

[2.3696868419647217, 0.4494297206401825]


In [31]:
n_embeding=20

model_wo_D4 = tf.keras.models.Sequential()
model_wo_D4.add(tf.keras.layers.Dense(500, activation='relu', input_dim=73))
model_wo_D4.add(tf.keras.layers.Dense(250, activation='relu'))
model_wo_D4.add(tf.keras.layers.Dense(100, activation='relu'))
model_wo_D4.add(tf.keras.layers.Dense(50, activation='relu'))
model_wo_D4.add(tf.keras.layers.Dense(12, activation='softmax'))
metric_wo_D4= tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="sparse_top_k_categorical_accuracy", dtype=None)
model_wo_D4.compile (optimizer='adam',loss='sparse_categorical_crossentropy', metrics=[metric_wo_D4] )
model_fitted_wo_D4 = model_wo_D4.fit(xTrain, yTrain, epochs = 100,batch_size=128, validation_data=(xTest, yTest))
results_wo_D4 = model_wo_D4.evaluate(xValidation, yValidation)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100


Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [32]:
n_embeding=20

model_wD4 = tf.keras.models.Sequential()
model_wD4.add(tf.keras.layers.Dense(500, activation='relu', input_dim=73))
model_wD4.add(tf.keras.layers.Dense(250, activation='relu'))
model_wD4.add(tf.keras.layers.Dropout(0.25))
model_wD4.add(tf.keras.layers.Dense(100, activation='relu'))
model_wD4.add(tf.keras.layers.Dense(50, activation='relu'))
model_wD4.add(tf.keras.layers.Dense(12, activation='softmax'))
metric_wD4= tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="sparse_top_k_categorical_accuracy", dtype=None)
model_wD4.compile (optimizer='adam',loss='sparse_categorical_crossentropy', metrics=[metric_wD4] )
model_fitted_wD4 = model_wD4.fit(xTrain, yTrain, epochs = 100,batch_size=128, validation_data=(xTest, yTest))
results_wD4 = model_wD4.evaluate(xValidation, yValidation)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100


Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [33]:
n_embeding=20

model_wDD4 = tf.keras.models.Sequential()
model_wDD4.add(tf.keras.layers.Dense(512, activation='relu', input_dim=73))
model_wDD4.add(tf.keras.layers.Dense(256, activation='relu'))
model_wDD4.add(tf.keras.layers.Dropout(0.25))
model_wDD4.add(tf.keras.layers.Dense(128, activation='relu'))
model_wDD4.add(tf.keras.layers.Dense(128, activation='relu'))
model_wDD4.add(tf.keras.layers.Dropout(0.25))
model_wDD4.add(tf.keras.layers.Dense(64, activation='relu'))
model_wDD4.add(tf.keras.layers.Dense(12, activation='softmax'))
metric_wDD4= tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="sparse_top_k_categorical_accuracy", dtype=None)
model_wDD4.compile (optimizer='adam',loss='sparse_categorical_crossentropy', metrics=[metric_wDD4] )
model_fitted_wDD4= model_wDD4.fit(xTrain, yTrain, epochs = 100,batch_size=128, validation_data=(xTest, yTest))
results_wDD4 = model_wDD4.evaluate(xValidation, yValidation)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100


Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [55]:
print(len(xValidation))
mapping = dict(zip(le_d.classes_, range(len(le_d.classes_))))
print(mapping)
predict = model_wDD.predict_classes(xValidation)
unique, counts = np.unique(predict, return_counts=True)
print (np.asarray((unique, counts)).T)
unique, counts = np.unique(yValidation, return_counts=True)

print (np.asarray((unique, counts)).T)

60846
{'A1': 0, 'A15': 1, 'A7': 2, 'B2': 3, 'B3': 4, 'C11': 5, 'C6': 6, 'D14': 7, 'D4': 8, 'E13': 9, 'E18': 10, 'E5': 11}
[[    0  5622]
 [    1   101]
 [    2   115]
 [    3 22831]
 [    4  3933]
 [    5 12201]
 [    6   538]
 [    7   982]
 [    8 14177]
 [    9   119]
 [   10   103]
 [   11   124]]
[[   0 6637]
 [   1 1199]
 [   2 2446]
 [   3 9265]
 [   4 6963]
 [   5 8124]
 [   6 4831]
 [   7 3846]
 [   8 7939]
 [   9 3412]
 [  10 3583]
 [  11 2601]]


In [56]:
print(len(xValidation))
mapping = dict(zip(le_d.classes_, range(len(le_d.classes_))))
print(mapping)
predict = model_wD.predict_classes(xValidation)
unique, counts = np.unique(predict, return_counts=True)
print (np.asarray((unique, counts)).T)
unique, counts = np.unique(yValidation, return_counts=True)

print (np.asarray((unique, counts)).T)

60846
{'A1': 0, 'A15': 1, 'A7': 2, 'B2': 3, 'B3': 4, 'C11': 5, 'C6': 6, 'D14': 7, 'D4': 8, 'E13': 9, 'E18': 10, 'E5': 11}
[[    0  6267]
 [    1    96]
 [    2   192]
 [    3 19531]
 [    4  5422]
 [    5 14182]
 [    6   755]
 [    7  1144]
 [    8 12484]
 [    9   244]
 [   10   317]
 [   11   212]]
[[   0 6637]
 [   1 1199]
 [   2 2446]
 [   3 9265]
 [   4 6963]
 [   5 8124]
 [   6 4831]
 [   7 3846]
 [   8 7939]
 [   9 3412]
 [  10 3583]
 [  11 2601]]
