In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Paso 1: Cargar los datos
data = pd.read_csv('data.csv')

# Paso 2: Preprocesamiento de datos
# Eliminar columnas innecesarias o que no sean útiles para la predicción
data = data.drop(columns=['CASE#', 'FBI CD', 'X COORDINATE', 'Y COORDINATE', 'LATITUDE',
                         'LONGITUDE', 'LOCATION'])

# Obtener las columnas únicas de "PRIMARY DESCRIPTION"
unique_descriptions = data['PRIMARY DESCRIPTION'].unique()

# Dividir los datos en lotes más pequeños
batch_size = 10000
num_batches = len(data) // batch_size
for batch_idx in range(num_batches):
    batch_data = data.iloc[batch_idx * batch_size:(batch_idx + 1) * batch_size]
    
    # Guardar la columna "PRIMARY DESCRIPTION"
    y_batch = batch_data['PRIMARY DESCRIPTION']
    
    # Aplicar pd.get_dummies() a batch_data
    batch_data = pd.get_dummies(batch_data)
    
    # Concatenar la columna "PRIMARY DESCRIPTION" con el DataFrame resultante de pd.get_dummies()
    batch_data = pd.concat([batch_data, y_batch], axis=1)
    
    # Dividir los datos en características (X) y etiquetas (y) para cada lote
    X_batch = batch_data.drop(columns=['PRIMARY DESCRIPTION'])
    y_batch = batch_data['PRIMARY DESCRIPTION']
    
    # Entrenar y evaluar el modelo en cada lote
    X_train, X_test, y_train, y_test = train_test_split(X_batch, y_batch, test_size=0.2, random_state=42)
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy for batch {batch_idx}: {accuracy}")

# Guardar el modelo entrenado
joblib.dump(model, 'crime_prediction_model.pkl')


Accuracy for batch 0: 1.0
Accuracy for batch 1: 0.9995
Accuracy for batch 2: 0.999
Accuracy for batch 3: 0.9995
Accuracy for batch 4: 1.0
Accuracy for batch 5: 0.998
Accuracy for batch 6: 0.998
Accuracy for batch 7: 0.9985
Accuracy for batch 8: 0.9995
Accuracy for batch 9: 0.999
Accuracy for batch 10: 0.9995
Accuracy for batch 11: 0.999
Accuracy for batch 12: 1.0
Accuracy for batch 13: 0.9995
Accuracy for batch 14: 0.998
Accuracy for batch 15: 0.999
Accuracy for batch 16: 0.999
Accuracy for batch 17: 0.9995
Accuracy for batch 18: 0.9995
Accuracy for batch 19: 0.999
Accuracy for batch 20: 0.999
Accuracy for batch 21: 0.9985
Accuracy for batch 22: 0.9995
Accuracy for batch 23: 0.9995
Accuracy for batch 24: 0.9985


['crime_prediction_model.pkl']

In [40]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import pandas as pd

# Paso 1: Cargar el modelo entrenado
model = joblib.load('crime_prediction_model.pkl')

# Paso 2: Definir los modelos de datos para la entrada y salida de la API
class CrimePredictionInput(BaseModel):
    DATE_OF_OCCURRENCE: str
    BLOCK: str
    IUCR: str
    LOCATION_DESCRIPTION: str
    ARREST: str
    DOMESTIC: str
    BEAT: int
    WARD: int

class CrimePredictionOutput(BaseModel):
    PRIMARY_DESCRIPTION: str

# Paso 3: Inicializar la aplicación FastAPI
app = FastAPI()

# Paso 4: Definir el endpoint para hacer predicciones de crimen
@app.post('/predict/')
async def predict_crime(data: CrimePredictionInput):
    # Convertir los datos de entrada en un DataFrame
    input_data = pd.DataFrame([data.dict()])

    # Preprocesar los datos de entrada
    input_data = pd.get_dummies(input_data)

    # Asegurarse de que todas las columnas necesarias estén presentes
    required_columns = set(X.columns)
    missing_columns = required_columns - set(input_data.columns)
    for column in missing_columns:
        input_data[column] = 0

    # Hacer la predicción
    prediction = model.predict(input_data)

    # Obtener la descripción primaria del crimen predicho
    primary_description = prediction[0]

    return CrimePredictionOutput(PRIMARY_DESCRIPTION=primary_description)



In [None]:
# Terminal 1: Ejecutar el script de entrenamiento del modelo
python train_model.py
# Terminal 2: Ejecutar el script de la API
uvicorn api:app --reload


In [27]:
import requests

In [28]:
import json

In [41]:
import requests
from requests.exceptions import JSONDecodeError

# Definir los datos de entrada
data = {
    "DATE_OF_OCCURRENCE": "2023-05-03",
    "BLOCK": "050XX S ASHLAND AVE",
    "IUCR": "0486",
    "LOCATION_DESCRIPTION": "STREET",
    "ARREST": "TRUE",
    "DOMESTIC": "TRUE",
    "BEAT": 911,
    "WARD": 20
}

# Enviar la solicitud POST a la API
response = requests.post("http://localhost:8000/predict/", json=data)

try:
    print(response.json())
except JSONDecodeError as e:
    print(f"Error decoding JSON: {e}")
    print(f"Response content: {response.text}")


ConnectionError: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /predict/ (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f31d743d390>: Failed to establish a new connection: [Errno 111] Connection refused'))