# Taller Dataminig – Deep LearningBasado en la siguiente estructura de datos de un archivo .csv, realizar los siguientes ejercicios de DeepLearning 
en el lenguaje python y librerias como Scikit Learn, Keras, Shap, Pytorch:
 
CASE#,DATE OF OCCURRENCE,BLOCK, IUCR, PRIMARY DESCRIPTION, SECONDARY DESCRIPTION, LOCATIO 
DESCRIPTION,ARREST,DOMESTIC,BEAT,WARD,FBI CD,X COORDINATE,Y COORDINATE,LATITUDE,LONGITUDE,LOCAT
ION
JG406115,08/31/2023 07:00:00 PM,042XX W MARQUETTE RD,0498,BATTERY,"AGG. DOMESTIC BATTERY - HANDS, FISTS, FEET, SER US 
INJURY",APARTMENT,Y,Y,833,23,04B,1149062,1859830,41.771296232,-87.729149311,"(41.771296232, -87.72914931

Archivo de datos en repo ucc-datamining: ucc-data-mining/taller-dl/data.csv
1)".


## 1. Predicción de Clasificación Temporal con Redes Neuronales Recurrentes (RNN)*  Este ejercicio implica predecir la ocurrencia de un cierto tipo de crimen en
función de la fecha y hora.

In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import LabelEncoder, StandardScaler
# Cargar los datos
data = pd.read_csv('data.csv')
# Preprocesamiento de datos

data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'], format="%m/%d/%Y %I:%M:%S %p")
# data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'])

data['HOUR'] = data['DATE OF OCCURRENCE'].dt.hour
data['DAY_OF_WEEK'] = data['DATE OF OCCURRENCE'].dt.dayofweek
data['MONTH'] = data['DATE OF OCCURRENCE'].dt.month
# Seleccionar características y etiquetas
X = data[['HOUR', 'DAY_OF_WEEK', 'MONTH']].values
y = data['PRIMARY DESCRIPTION']
# Codificar las etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Construir la red neuronal recurrente (LSTM)
model = Sequential([
 LSTM(64, input_shape=(X_train.shape[1], 1), activation='relu', return_sequences=True),
 LSTM(32, activation='relu'),
 Dense(len(label_encoder.classes_), activation='softmax')
])
# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Ajustar el modelo
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20


  super().__init__(**kwargs)


[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 3ms/step - accuracy: 0.2151 - loss: 2.4176 - val_accuracy: 0.2221 - val_loss: 2.3429
Epoch 2/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.2255 - loss: 2.3422 - val_accuracy: 0.2278 - val_loss: 2.3372
Epoch 3/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.2255 - loss: 2.3405 - val_accuracy: 0.2293 - val_loss: 2.3349
Epoch 4/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.2278 - loss: 2.3379 - val_accuracy: 0.2289 - val_loss: 2.3323
Epoch 5/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.2286 - loss: 2.3330 - val_accuracy: 0.2293 - val_loss: 2.3266
Epoch 6/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.2286 - loss: 2.3306 - val_accuracy: 0.2251 - val_loss: 2.3310
Epoch 7/20
[1m6469/6

<keras.src.callbacks.history.History at 0x18fb25922d0>

## 2. Predicción de Series Temporales con Redes Neuronales Convolucionales (CNN)*  Este ejercicio implica predecir la ocurrencia de un tipo de crimen utilizando datos
de series temporales como entrada a una CNN.

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Cargar los datos
data = pd.read_csv('data.csv')

# Preprocesamiento de datos
# Asegúrate de que los datos estén ordenados cronológicamente
data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'], format="%m/%d/%Y %I:%M:%S %p")
# data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'])
data = data.sort_values(by='DATE OF OCCURRENCE')

# Seleccionar características y etiquetas
X = data[['DATE OF OCCURRENCE']].values
y = data['PRIMARY DESCRIPTION']

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Construir el modelo
model = Sequential([
    Dense(64, activation='relu'),
    Dense(50, activation='relu'),
    Dense(len(y.unique()), activation='softmax')
])

# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Ajustar etiquetas:
# Convertir las etiquetas a números enteros
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Ajustar el modelo
# model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))
model.fit(X_train, y_train_encoded, epochs=20, batch_size=32, validation_data=(X_test, y_test_encoded))

Epoch 1/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1ms/step - accuracy: 0.2184 - loss: 2.4204 - val_accuracy: 0.2205 - val_loss: 2.3793
Epoch 2/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - accuracy: 0.2227 - loss: 2.3685 - val_accuracy: 0.2205 - val_loss: 2.3818
Epoch 3/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - accuracy: 0.2221 - loss: 2.3663 - val_accuracy: 0.2205 - val_loss: 2.3794
Epoch 4/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - accuracy: 0.2244 - loss: 2.3623 - val_accuracy: 0.2205 - val_loss: 2.3803
Epoch 5/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - accuracy: 0.2211 - loss: 2.3680 - val_accuracy: 0.2205 - val_loss: 2.3824
Epoch 6/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - accuracy: 0.2222 - loss: 2.3670 - val_accuracy: 0.2205 - val_loss: 2.3750
Epoch 7/20
[1m

<keras.src.callbacks.history.History at 0x18fb9315ed0>

## 3. Predicción de Series Temporales con Redes Neuronales Auto-Recurrentes (ARNN)*  Este ejercicio implica predecir la ocurrencia de un tipo de crimen utilizando datos
de series temporales y una red neuronal auto-recurrent.

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder


# Cargar los datos
data = pd.read_csv('data.csv')
# Preprocesamiento de datos
# Asegúrate de que los datos estén ordenados cronológicamente
data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'], format="%m/%d/%Y %I:%M:%S %p")
# data['DATE OF OCCURRENCE'] = pd.to_datetime(data['DATE OF OCCURRENCE'])
data = data.sort_values(by='DATE OF OCCURRENCE')

# Seleccionar características y etiquetas
X = data[['DATE OF OCCURRENCE']].values
y = data['PRIMARY DESCRIPTION']
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Ajustar las dimensiones para la entrada de ARNN (reshape)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
# Construir la red neuronal auto-recurrente (ARNN)
model = Sequential([
 SimpleRNN(64, activation='relu', return_sequences=True),
 SimpleRNN(32, activation='relu'),
 Dense(len(y.unique()), activation='softmax')
])
# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Convertir etiquetas de texto a valores numéricos
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Ajustar el modelo con las etiquetas numéricas
model.fit(X_train, y_train_encoded, epochs=20, batch_size=32, validation_data=(X_test, y_test_encoded))

# Ajustar el modelo
# model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - accuracy: 0.2136 - loss: 2.4299 - val_accuracy: 0.2205 - val_loss: 2.3914
Epoch 2/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.2225 - loss: 2.3653 - val_accuracy: 0.2205 - val_loss: 2.3800
Epoch 3/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.2196 - loss: 2.3699 - val_accuracy: 0.2205 - val_loss: 2.3878
Epoch 4/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.2226 - loss: 2.3665 - val_accuracy: 0.2205 - val_loss: 2.3785
Epoch 5/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.2214 - loss: 2.3668 - val_accuracy: 0.2205 - val_loss: 2.3903
Epoch 6/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.2202 - loss: 2.3664 - val_accuracy: 0.2205 - val_loss: 2.3825
Epoch 7/20

<keras.src.callbacks.history.History at 0x18fb0dd4610>

## 4. Predicción de Valores Continuos con Redes Neuronales Profundas (DNN)*  Este ejercicio implica predecir la latitud y longitud de la ubicación de un crimen
utilizando una red neuronal profunda.

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
# Cargar los datos
data = pd.read_csv('data.csv')
# Seleccionar características y etiquetas
X = data[['X COORDINATE', 'Y COORDINATE']].values
y_latitude = data['LATITUDE'].values
y_longitude = data['LONGITUDE'].values
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_lat_train, y_lat_test, y_long_train, y_long_test = train_test_split( X, y_latitude, y_longitude, test_size=0.2, random_state=42)
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Construir la red neuronal profunda (DNN)
# Construir la red neuronal profunda (DNN)
model_lat = Sequential([
 Dense(64, activation='relu'),
 Dense(32, activation='relu'),
 Dense(1)
])

model_long = Sequential([
 Dense(64, activation='relu'),
 Dense(32, activation='relu'),
 Dense(1)
])
# Compilar el modelo
model_lat.compile(optimizer='adam', loss='mse')
model_long.compile(optimizer='adam', loss='mse')
# Ajustar el modelo
model_lat.fit(X_train, y_lat_train, epochs=20, batch_size=32, validation_data=(X_test, y_lat_test))
model_long.fit(X_train, y_long_train, epochs=20, batch_size=32, validation_data=(X_test, y_long_test))

Epoch 1/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 2/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 3/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 4/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 5/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 6/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 7/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 8/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: nan - val_loss: nan
Epoch 9/20
[1m6469/6469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x18fb944e4d0>

## 5. Predicción de Texto usando Redes Neuronales Recurrentes (RNN)*  Este ejercicio implica predecir la descripción de un crimen basándose en su título
y ubicación.

In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Cargar los datos
data = pd.read_csv('data.csv')

# Eliminar filas con valores nan
data = data.dropna()

# Preprocesamiento de datos
X_title = data['PRIMARY DESCRIPTION'].values
X_location = data['LOCATION'].apply(lambda x: [float(coordinate) for coordinate in x[1:-1].split(', ')])  # Convertir coordenadas en lista de flotantes
y = data['SECONDARY DESCRIPTION'].values

# Codificar etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Separar datos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_location, y, test_size=0.2, random_state=42)

# Normalizar coordenadas geográficas
def calculate_distance(coord):
    # Supongamos que utilizamos el origen como punto de referencia
    reference_point = [0.0, 0.0]
    return np.linalg.norm(np.array(coord) - np.array(reference_point))

X_train_normalized = np.array([calculate_distance(coord) for coord in X_train]).reshape(-1, 1)
X_test_normalized = np.array([calculate_distance(coord) for coord in X_test]).reshape(-1, 1)

# Modelo de texto
model = Sequential([
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Ajustar el modelo
model.fit(X_train_normalized, y_train, epochs=10, batch_size=32, validation_data=(X_test_normalized, y_test))

Epoch 1/10
[1m6436/6436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step - accuracy: 0.1079 - loss: 3.7886 - val_accuracy: 0.1179 - val_loss: 3.6651
Epoch 2/10
[1m6436/6436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.1177 - loss: 3.6664 - val_accuracy: 0.1179 - val_loss: 3.6586
Epoch 3/10
[1m6436/6436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.1181 - loss: 3.6606 - val_accuracy: 0.1179 - val_loss: 3.6536
Epoch 4/10
[1m6436/6436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step - accuracy: 0.1175 - loss: 3.6608 - val_accuracy: 0.1179 - val_loss: 3.6558
Epoch 5/10
[1m6436/6436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.1179 - loss: 3.6630 - val_accuracy: 0.1179 - val_loss: 3.6543
Epoch 6/10
[1m6436/6436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1ms/step - accuracy: 0.1173 - loss: 3.6543 - val_accuracy: 0.1179 - val_loss: 3.6531
Epoch 7/10
[

<keras.src.callbacks.history.History at 0x18fb64fc310>