<a href="https://colab.research.google.com/github/cristiangodoyangel/Analisis-Inicial-y-Seleccion-de-Problema/blob/main/Sismos_en_Chile.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import plotly.express as px
import geopandas as gpd
from shapely.geometry import Point
from geopandas.tools import sjoin

# Cargar archivo CSV
path = ('/content/drive/MyDrive/0002 Python - Data Science/Datasets/seismic_data.csv')
df = pd.read_csv(path)


In [3]:
df.head()

Unnamed: 0,Date(UTC),Latitude,Longitude,Depth,Magnitude
0,2025-05-26 03:50:27,-19.63,-69.49,97,5.6
1,2025-05-13 00:47:58,-51.25,-72.28,28,5.1
2,2025-05-05 09:46:48,-29.49,-71.84,48,5.0
3,2025-05-05 02:17:48,-31.89,-70.88,88,5.1
4,2025-05-02 15:23:49,-27.52,-72.48,30,5.8


In [4]:
# Convertimos la fecha original a datetime
df['Fecha'] = pd.to_datetime(df['Date(UTC)'])

# Convertimos a horario chileno
df['Fecha_Chile'] = df['Fecha'].dt.tz_localize('UTC').dt.tz_convert('America/Santiago')

# Extraemos hora y mes
df['Hora'] = df['Fecha_Chile'].dt.hour
df['Mes'] = df['Fecha_Chile'].dt.month

# Diccionario de meses en español
meses = {
    1: "Enero", 2: "Febrero", 3: "Marzo", 4: "Abril",
    5: "Mayo", 6: "Junio", 7: "Julio", 8: "Agosto",
    9: "Septiembre", 10: "Octubre", 11: "Noviembre", 12: "Diciembre"
}
df['Mes_Nombre'] = df['Mes'].map(meses)
df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
Latitude,4018.0,-27.837778,-62.35,-31.82,-29.595,-21.5125,-11.07,5.961963
Longitude,4018.0,-70.851127,-75.73,-71.59,-71.18,-70.03,-58.39,1.293387
Depth,4018.0,61.524639,2.0,36.0,50.0,86.0,624.0,38.27661
Magnitude,4018.0,4.428945,2.3,4.0,4.4,4.8,8.4,0.688666
Fecha,4018.0,2019-04-08 11:26:32.820308992,2012-03-03 11:01:47,2016-07-14 17:58:09,2019-06-01 21:22:07,2021-09-02 22:38:11,2025-05-26 03:50:27,
Hora,4018.0,11.336237,0.0,5.0,11.0,17.0,23.0,6.905795
Mes,4018.0,6.574664,1.0,4.0,7.0,9.0,12.0,3.419187


In [5]:
df.head()

Unnamed: 0,Date(UTC),Latitude,Longitude,Depth,Magnitude,Fecha,Fecha_Chile,Hora,Mes,Mes_Nombre
0,2025-05-26 03:50:27,-19.63,-69.49,97,5.6,2025-05-26 03:50:27,2025-05-25 23:50:27-04:00,23,5,Mayo
1,2025-05-13 00:47:58,-51.25,-72.28,28,5.1,2025-05-13 00:47:58,2025-05-12 20:47:58-04:00,20,5,Mayo
2,2025-05-05 09:46:48,-29.49,-71.84,48,5.0,2025-05-05 09:46:48,2025-05-05 05:46:48-04:00,5,5,Mayo
3,2025-05-05 02:17:48,-31.89,-70.88,88,5.1,2025-05-05 02:17:48,2025-05-04 22:17:48-04:00,22,5,Mayo
4,2025-05-02 15:23:49,-27.52,-72.48,30,5.8,2025-05-02 15:23:49,2025-05-02 11:23:49-04:00,11,5,Mayo


In [19]:
# Función para detectar outliers con la Regla de IQR
def detectar_outliers_iqr(serie):
    Q1 = serie.quantile(0.25)
    Q3 = serie.quantile(0.75)
    IQR = Q3 - Q1
    outliers = serie[(serie < Q1 - 1.5 * IQR) | (serie > Q3 + 1.5 * IQR)]
    return outliers

# Detectar outliers en Depth y Magnitude
outliers_depth = detectar_outliers_iqr(df['Depth'])
outliers_magnitude = detectar_outliers_iqr(df['Magnitude'])

print(f"Outliers en Depth: {len(outliers_depth)}")
print(f"Outliers en Magnitude: {len(outliers_magnitude)}")


Outliers en Depth: 55
Outliers en Magnitude: 108


### Justificación de los Outliers

Se aplicó la regla de IQR (Interquartile Range) para identificar posibles outliers en las columnas **Depth** y **Magnitude**.

Sin embargo, **no se eliminaron estos valores**, ya que representan eventos sísmicos reales extremos (como sismos muy profundos o de gran magnitud).

Eliminar estos datos podría sesgar el análisis o subestimar la severidad de los eventos. Por tanto, se decidió **mantener todos los registros**, respetando la naturaleza del fenómeno estudiado.


In [6]:


# Clasificación de zona geográfica según latitud
def clasificar_zona(lat):
    if lat >= -27:
        return 'Norte'
    elif lat >= -36:
        return 'Centro'
    else:
        return 'Sur'

df['Zona'] = df['Latitude'].apply(clasificar_zona)


In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


# Crear variable objetivo: 1 si magnitud ≥ 6.0
df['Sismo_Fuerte'] = (df['Magnitude'] >= 6.0).astype(int)

# Seleccionar variables para el modelo
X = df[['Depth', 'Mes', 'Hora', 'Zona']]
y = df['Sismo_Fuerte']

# Definir columnas numéricas y categóricas
num_cols = ['Depth', 'Hora']
cat_cols = ['Mes', 'Zona']

# Crear el preprocesador
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(drop='first'), cat_cols)
])

# Pipeline final
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor)
])

# Transformar datos
X_final = pipeline.fit_transform(X)

# Mostrar forma final de los datos
print("X original:", X.shape)
print("X transformado:", X_final.shape)
print("y clases:", y.value_counts())


X original: (4018, 4)
X transformado: (4018, 15)
y clases: Sismo_Fuerte
0    3897
1     121
Name: count, dtype: int64


In [20]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

# Definir los modelos a evaluar
modelos = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Evaluar modelos usando F1-score
resultados = {}

for nombre, modelo in modelos.items():
    score = cross_val_score(modelo, X_final, y, cv=5, scoring='f1')
    resultados[nombre] = score.mean()

# Mostrar resultados ordenados
import pandas as pd
df_resultados = pd.DataFrame.from_dict(resultados, orient='index', columns=['F1-Score']).sort_values(by='F1-Score', ascending=False)
df_resultados


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Unnamed: 0,F1-Score
KNN,0.148311
XGBoost,0.129892
Random Forest,0.088863
Logistic Regression,0.016


### Comparación de Modelos Iniciales

Se entrenaron cuatro modelos usando validación cruzada con 5 folds y métrica F1:

- Random Forest
- K-Nearest Neighbors (KNN)
- Regresión Logística
- XGBoost

A continuación se muestran los resultados promedio del F1-score. El modelo con mejor rendimiento será seleccionado para la etapa de optimización.


In [21]:
from sklearn.metrics import (
    f1_score, accuracy_score, precision_score,
    recall_score, roc_auc_score
)
from sklearn.model_selection import train_test_split

# Dividir datos en entrenamiento y prueba (si aún no lo hiciste)
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y, test_size=0.2, stratify=y, random_state=42
)

# Inicializar y entrenar modelos
modelos_test = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Evaluar cada modelo
metricas = {}

for nombre, modelo in modelos_test.items():
    modelo.fit(X_train, y_train)
    y_pred = modelo.predict(X_test)
    metricas[nombre] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-Score": f1_score(y_test, y_pred),
        "ROC-AUC": roc_auc_score(y_test, y_pred)
    }

# Mostrar tabla comparativa
df_metricas = pd.DataFrame(metricas).T.sort_values(by="F1-Score", ascending=False)
df_metricas


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Unnamed: 0,Accuracy,Precision,Recall,F1-Score,ROC-AUC
KNN,0.971393,0.666667,0.083333,0.148148,0.541026
Random Forest,0.968905,0.4,0.083333,0.137931,0.539744
Logistic Regression,0.971393,1.0,0.041667,0.08,0.520833
XGBoost,0.970149,0.5,0.041667,0.076923,0.520192


### Métricas Comparativas en el Conjunto de Prueba

Se evaluaron todos los modelos usando el conjunto de prueba reservado. Las métricas consideradas fueron:

- **Accuracy**: Proporción de predicciones correctas.
- **Precision**: Proporción de verdaderos positivos entre los predichos como positivos.
- **Recall**: Proporción de verdaderos positivos detectados respecto al total real.
- **F1-Score**: Media armónica entre precisión y recall.
- **ROC-AUC**: Capacidad del modelo para distinguir entre clases.

El modelo con mejor balance en F1 y ROC-AUC será elegido para optimización.


In [22]:
y_test, y_pred, y_pred_rand, y_pred_optuna


(2241    0
 2094    0
 1675    0
 3686    0
 1348    0
        ..
 2112    0
 814     0
 2825    0
 2709    0
 3818    0
 Name: Sismo_Fuerte, Length: 804, dtype: int64,
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 

In [23]:
# Evaluación final con las 5 métricas clave
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Construir diccionario de resultados
comparacion_optimizaciones = {
    "GridSearchCV": {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-Score": f1_score(y_test, y_pred),
        "ROC-AUC": roc_auc_score(y_test, y_pred)
    },
    "RandomizedSearchCV": {
        "Accuracy": accuracy_score(y_test, y_pred_rand),
        "Precision": precision_score(y_test, y_pred_rand),
        "Recall": recall_score(y_test, y_pred_rand),
        "F1-Score": f1_score(y_test, y_pred_rand),
        "ROC-AUC": roc_auc_score(y_test, y_pred_rand)
    },
    "Optuna": {
        "Accuracy": accuracy_score(y_test, y_pred_optuna),
        "Precision": precision_score(y_test, y_pred_optuna),
        "Recall": recall_score(y_test, y_pred_optuna),
        "F1-Score": f1_score(y_test, y_pred_optuna),
        "ROC-AUC": roc_auc_score(y_test, y_pred_optuna)
    }
}

# Mostrar tabla ordenada por F1-Score
import pandas as pd
df_comparacion_final = pd.DataFrame(comparacion_optimizaciones).T.sort_values(by="F1-Score", ascending=False)
df_comparacion_final


Unnamed: 0,Accuracy,Precision,Recall,F1-Score,ROC-AUC
RandomizedSearchCV,0.972637,1.0,0.083333,0.153846,0.541667
Optuna,0.972637,1.0,0.083333,0.153846,0.541667
GridSearchCV,0.970149,0.5,0.041667,0.076923,0.520192


### Comparación Final de Modelos Optimizados

Se evaluaron tres métodos de optimización:

- `GridSearchCV`: búsqueda exhaustiva.
- `RandomizedSearchCV`: búsqueda aleatoria.
- `Optuna`: optimización bayesiana con pruning.

Las métricas mostradas permiten comparar el rendimiento real de los modelos optimizados en el conjunto de prueba. El modelo con mayor F1 y ROC-AUC será considerado el mejor resultado final.


In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

# Definir el modelo base
rf = RandomForestClassifier(random_state=42)

# Definir la grilla de hiperparámetros
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2']
}

# Dividir los datos transformados en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y, test_size=0.2, stratify=y, random_state=42
)
# Implementar GridSearchCV
grid = GridSearchCV(estimator=rf, param_grid=param_grid,
                    scoring='f1', cv=5, verbose=2, n_jobs=-1)

# Entrenar con los datos ya transformados
grid.fit(X_train, y_train)


Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [10]:
from sklearn.metrics import classification_report

# Predicciones y métricas
y_pred = grid.predict(X_test)
print("Mejores parámetros:", grid.best_params_)
print("\nReporte de Clasificación:")
print(classification_report(y_test, y_pred))


Mejores parámetros: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

Reporte de Clasificación:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98       780
           1       0.40      0.08      0.14        24

    accuracy                           0.97       804
   macro avg       0.69      0.54      0.56       804
weighted avg       0.96      0.97      0.96       804



In [11]:
from sklearn.model_selection import train_test_split

# Dividir los datos transformados
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y, test_size=0.2, stratify=y, random_state=42
)


In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Definir modelo base
rf = RandomForestClassifier(random_state=42)

# Hiperparámetros a evaluar
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2']
}

# Crear búsqueda con validación cruzada
grid = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring='f1',
    cv=5,
    verbose=2,
    n_jobs=-1
)

# Entrenar
grid.fit(X_train, y_train)


Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [13]:
from sklearn.metrics import classification_report

# Mostrar mejores hiperparámetros
print("Mejores parámetros:", grid.best_params_)

# Predicción y reporte
y_pred = grid.predict(X_test)
print("\nReporte de clasificación en test:")
print(classification_report(y_test, y_pred))


Mejores parámetros: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

Reporte de clasificación en test:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98       780
           1       0.40      0.08      0.14        24

    accuracy                           0.97       804
   macro avg       0.69      0.54      0.56       804
weighted avg       0.96      0.97      0.96       804



In [14]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Definir el modelo base
rf = RandomForestClassifier(random_state=42)

# Espacio de búsqueda (más amplio que en GridSearchCV)
param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None]
}

# RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist,
    n_iter=20,               # Número de combinaciones aleatorias a probar
    scoring='f1',
    cv=5,
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# Entrenamiento
random_search.fit(X_train, y_train)


Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [15]:
# Mostrar mejores parámetros encontrados
print("Mejores parámetros (RandomizedSearchCV):", random_search.best_params_)

# Evaluación en conjunto de prueba
y_pred_rand = random_search.predict(X_test)
print("\nReporte de clasificación (RandomizedSearchCV):")
print(classification_report(y_test, y_pred_rand))


Mejores parámetros (RandomizedSearchCV): {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 20}

Reporte de clasificación (RandomizedSearchCV):
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       780
           1       1.00      0.08      0.15        24

    accuracy                           0.97       804
   macro avg       0.99      0.54      0.57       804
weighted avg       0.97      0.97      0.96       804



In [16]:
!pip install optuna --quiet


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/395.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/247.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [17]:
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Función objetivo que Optuna optimizará
def objective(trial):
    # Espacio de búsqueda
    n_estimators = trial.suggest_int('n_estimators', 100, 300)
    max_depth = trial.suggest_int('max_depth', 5, 30)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 4)
    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2', None])

    # Modelo con los hiperparámetros propuestos
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=42
    )

    # Validación cruzada (F1)
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='f1')
    return scores.mean()

# Crear y ejecutar el estudio
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)


[I 2025-07-23 05:33:32,638] A new study created in memory with name: no-name-7edf7313-1954-44d7-9fc0-76864d9f3f9e
[I 2025-07-23 05:33:40,774] Trial 0 finished with value: 0.11252555540411613 and parameters: {'n_estimators': 213, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': None}. Best is trial 0 with value: 0.11252555540411613.
[I 2025-07-23 05:33:44,941] Trial 1 finished with value: 0.019047619047619046 and parameters: {'n_estimators': 276, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 0 with value: 0.11252555540411613.
[I 2025-07-23 05:33:48,986] Trial 2 finished with value: 0.0 and parameters: {'n_estimators': 168, 'max_depth': 16, 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 0 with value: 0.11252555540411613.
[I 2025-07-23 05:33:51,688] Trial 3 finished with value: 0.0 and parameters: {'n_estimators': 197, 'max_depth': 8, 'min_samples_split': 7, 'min_s

In [18]:
# Mostrar los mejores hiperparámetros encontrados
print("Mejores parámetros (Optuna):")
print(study.best_params)

# Entrenar modelo con los mejores parámetros
best_model = RandomForestClassifier(**study.best_params, random_state=42)
best_model.fit(X_train, y_train)

# Evaluar en test
y_pred_optuna = best_model.predict(X_test)

from sklearn.metrics import classification_report
print("\nReporte clasificación (Optuna):")
print(classification_report(y_test, y_pred_optuna))


Mejores parámetros (Optuna):
{'n_estimators': 256, 'max_depth': 16, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': None}

Reporte clasificación (Optuna):
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       780
           1       1.00      0.08      0.15        24

    accuracy                           0.97       804
   macro avg       0.99      0.54      0.57       804
weighted avg       0.97      0.97      0.96       804

