# Taller Dataminig – Deep Learning

Basado en la siguiente estructura de datos de un archivo .csv, realizar los siguientes ejercicios de DeepLearning en el lenguaje python y librerias como Scikit Learn, Keras, Shap, Pytorch: 

CASE#,DATE OF OCCURRENCE,BLOCK, IUCR, PRIMARY DESCRIPTION, SECONDARY DESCRIPTION, LOCATION DESCRIPTION,ARREST, DOMESTIC,BEAT,WARD,FBI CD,X COORDINATE,Y COORDINATE,LATITUDE,LONGITUDE,LOCATION
JG406115,08/31/2023 07:00:00 PM,042XX W MARQUETTE RD,0498,BATTERY,"AGG. DOMESTIC BATTERY - HANDS, FISTS, FEET, SERIOUS INJURY",APARTMENT,Y,Y,833,23,04B,1149062,1859830,41.771296232,-87.729149311,"(41.771296232, -87.729149311)".

Archivo de datos en repo ucc-datamining: ucc-data-mining/taller-dl/data.csv

## 1. Predicción de Clasificación Temporal con Redes Neuronales Recurrentes (RNN)

* Este ejercicio implica predecir la ocurrencia de un cierto tipo de crimen en función de la fecha y hora.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import LabelEncoder, StandardScaler
# Cargar los datos
data = pd.read_csv('/home/jovyan/work/data.csv')
# Preprocesamiento de datos
data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'])
data['HOUR'] = data['DATE OF OCCURRENCE'].dt.hour
data['DAY_OF_WEEK'] = data['DATE OF OCCURRENCE'].dt.dayofweek
data['MONTH'] = data['DATE OF OCCURRENCE'].dt.month
# Seleccionar características y etiquetas
X = data[['HOUR', 'DAY_OF_WEEK', 'MONTH']].values
y = data['PRIMARY DESCRIPTION']
# Codificar las etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Construir la red neuronal recurrente (LSTM)
model = Sequential([
 LSTM(64, input_shape=(X_train.shape[1], 1), activation='relu', return_sequences=True),
 LSTM(32, activation='relu'),
 Dense(len(label_encoder.classes_), activation='softmax')
])
# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Ajustar el modelo
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

  data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'])
  super().__init__(**kwargs)


Epoch 1/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 10ms/step - accuracy: 0.2199 - loss: 2.4236 - val_accuracy: 0.2278 - val_loss: 2.3427
Epoch 2/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 15ms/step - accuracy: 0.2274 - loss: 2.3414 - val_accuracy: 0.2279 - val_loss: 2.3362
Epoch 3/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 11ms/step - accuracy: 0.2268 - loss: 2.3399 - val_accuracy: 0.2291 - val_loss: 2.3305
Epoch 4/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 18ms/step - accuracy: 0.2289 - loss: 2.3330 - val_accuracy: 0.2299 - val_loss: 2.3293
Epoch 5/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 19ms/step - accuracy: 0.2290 - loss: 2.3293 - val_accuracy: 0.2300 - val_loss: 2.3255
Epoch 6/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 11ms/step - accuracy: 0.2282 - loss: 2.3292 - val_accuracy: 0.2308 - val_loss: 2.3250
Ep

<keras.src.callbacks.history.History at 0x7f863f615fd0>