In [1]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE, RandomOverSampler
from xgboost import XGBClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import plotly.express as px
import plotly.graph_objs as go

In [None]:
# Cargar el dataset
df = pd.read_csv('New_BPD.csv')

In [3]:
# Limpieza de datos
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)


In [4]:
# Selección de características y etiquetas
X = df.drop('OffenseCategory', axis=1)
y = df['OffenseCategory']

# Convertir características categóricas en variables dummy
X = pd.get_dummies(X, columns=['Sex', 'Race', 'District'])

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
# Entrenar y evaluar modelo con RandomForestClassifier
def train_evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
train_evaluate_model(model_rf, X_train, y_train, X_test, y_test)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                          precision    recall  f1-score   support

          Administrative       0.57      0.06      0.11       128
      Alcohol Violations       0.50      0.03      0.05        38
           Animal Crimes       0.00      0.00      0.00         2
                   Arson       0.00      0.00      0.00         3
             Child Abuse       0.00      0.00      0.00        17
         Domestic Crimes       0.43      0.04      0.07        84
           Drug Offenses       0.58      0.79      0.67      3916
Environmental Violations       0.00      0.00      0.00         3
          Fire Incidents       0.00      0.00      0.00         3
                   Fraud       1.00      0.12      0.22         8
          Investigations       0.69      0.21      0.33       443
     Medical Emergencies       0.00      0.00      0.00         7
           Mental Health       0.00      0.00      0.00         7
  Obstruction of Justice       0.00      0.00      0.00        15
         

In [6]:
# # Balanceo de datos con SMOTE
# smote = SMOTE(random_state=42)
# X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
# model_rf_smote = RandomForestClassifier(n_estimators=100, random_state=42)
# train_evaluate_model(model_rf_smote, X_train_smote, y_train_smote, X_test, y_test)

In [7]:
# Balanceo de datos con RandomOverSampler
ros = RandomOverSampler(random_state=42)
X_train_ros, y_train_ros = ros.fit_resample(X_train, y_train)
model_rf_ros = RandomForestClassifier(n_estimators=100, random_state=42)
train_evaluate_model(model_rf_ros, X_train_ros, y_train_ros, X_test, y_test)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                          precision    recall  f1-score   support

          Administrative       0.46      0.10      0.17       128
      Alcohol Violations       1.00      0.03      0.05        38
           Animal Crimes       0.00      0.00      0.00         2
                   Arson       1.00      0.33      0.50         3
             Child Abuse       0.00      0.00      0.00        17
         Domestic Crimes       0.27      0.05      0.08        84
           Drug Offenses       0.56      0.82      0.66      3916
Environmental Violations       0.00      0.00      0.00         3
          Fire Incidents       0.00      0.00      0.00         3
                   Fraud       1.00      0.12      0.22         8
          Investigations       0.65      0.29      0.40       443
     Medical Emergencies       0.00      0.00      0.00         7
           Mental Health       0.33      0.14      0.20         7
  Obstruction of Justice       1.00      0.13      0.24        15
         

In [8]:
# XGBoost
label_encoder = LabelEncoder()
y_train_ros_encoded = label_encoder.fit_transform(y_train_ros)
y_test_encoded = label_encoder.transform(y_test)

xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_model.fit(X_train_ros, y_train_ros_encoded)
y_pred_xgb_encoded = xgb_model.predict(X_test)
y_pred_xgb = label_encoder.inverse_transform(y_pred_xgb_encoded)
print(classification_report(y_test, y_pred_xgb))
print(confusion_matrix(y_test, y_pred_xgb))

Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                          precision    recall  f1-score   support

          Administrative       0.11      0.18      0.14       128
      Alcohol Violations       0.08      0.08      0.08        38
           Animal Crimes       0.00      0.00      0.00         2
                   Arson       0.00      0.00      0.00         3
             Child Abuse       0.33      0.06      0.10        17
         Domestic Crimes       0.07      0.12      0.08        84
           Drug Offenses       0.59      0.63      0.61      3916
Environmental Violations       0.00      0.00      0.00         3
          Fire Incidents       0.00      0.00      0.00         3
                   Fraud       0.25      0.12      0.17         8
       Human Trafficking       0.00      0.00      0.00         0
          Investigations       0.21      0.55      0.30       443
     Medical Emergencies       0.50      0.14      0.22         7
           Mental Health       0.40      0.29      0.33         7
  Obstruc

In [9]:
# K-Nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier()
knn_model.fit(X_train_ros, y_train_ros)
y_pred_knn = knn_model.predict(X_test)
print(classification_report(y_test, y_pred_knn))
print(confusion_matrix(y_test, y_pred_knn))

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                          precision    recall  f1-score   support

          Administrative       0.05      0.09      0.07       128
      Alcohol Violations       0.02      0.03      0.02        38
           Animal Crimes       0.00      0.00      0.00         2
                   Arson       0.00      0.00      0.00         3
             Child Abuse       0.00      0.00      0.00        17
         Domestic Crimes       0.04      0.07      0.05        84
           Drug Offenses       0.37      0.54      0.44      3916
Environmental Violations       0.00      0.00      0.00         3
          Fire Incidents       0.00      0.00      0.00         3
                   Fraud       0.00      0.00      0.00         8
          Investigations       0.10      0.17      0.13       443
     Medical Emergencies       0.00      0.00      0.00         7
           Mental Health       0.00      0.00      0.00         7
  Obstruction of Justice       0.00      0.00      0.00        15
         

In [10]:
# Deep Learning
y_train_ros_encoded = label_encoder.fit_transform(y_train_ros)
y_test_encoded = label_encoder.transform(y_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_ros)
X_test_scaled = scaler.transform(X_test)

# Definir el modelo de red neuronal
nn_model = Sequential([
    Input(shape=(X_train_scaled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(len(set(y_train_ros_encoded)), activation='softmax')
])

nn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train_scaled, y_train_ros_encoded, epochs=10, batch_size=32, validation_split=0.2)

y_pred_nn_encoded = nn_model.predict(X_test_scaled)
y_pred_nn = y_pred_nn_encoded.argmax(axis=1)
y_pred_nn_decoded = label_encoder.inverse_transform(y_pred_nn)
print(classification_report(y_test, y_pred_nn_decoded))
print(confusion_matrix(y_test, y_pred_nn_decoded))

NameError: name 'Input' is not defined

In [3]:
# Visualización de resultados
probabilities = nn_model.predict(X_test_scaled)
prob_df = pd.DataFrame(probabilities, columns=label_encoder.classes_)
prob_df['Actual'] = y_test.values

fig = px.bar(prob_df.melt(id_vars='Actual'), x='variable', y='value', color='Actual', barmode='group')
fig.show()

NameError: name 'nn_model' is not defined