<a href="https://colab.research.google.com/github/cristiangodoyangel/Analisis-Inicial-y-Seleccion-de-Problema/blob/main/Sismos_en_Chile.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import plotly.express as px
import geopandas as gpd
from shapely.geometry import Point
from geopandas.tools import sjoin

# Cargar archivo CSV
path = ('/content/drive/MyDrive/0002 Python - Data Science/Datasets/seismic_data.csv')
df = pd.read_csv(path)


In [2]:
df.head()

Unnamed: 0,Date(UTC),Latitude,Longitude,Depth,Magnitude
0,2025-05-26 03:50:27,-19.63,-69.49,97,5.6
1,2025-05-13 00:47:58,-51.25,-72.28,28,5.1
2,2025-05-05 09:46:48,-29.49,-71.84,48,5.0
3,2025-05-05 02:17:48,-31.89,-70.88,88,5.1
4,2025-05-02 15:23:49,-27.52,-72.48,30,5.8


In [8]:
# Convertimos la fecha original a datetime
df['Fecha'] = pd.to_datetime(df['Date(UTC)'])

# Convertimos a horario chileno
df['Fecha_Chile'] = df['Fecha'].dt.tz_localize('UTC').dt.tz_convert('America/Santiago')

# Extraemos hora y mes
df['Hora'] = df['Fecha_Chile'].dt.hour
df['Mes'] = df['Fecha_Chile'].dt.month

# Diccionario de meses en español
meses = {
    1: "Enero", 2: "Febrero", 3: "Marzo", 4: "Abril",
    5: "Mayo", 6: "Junio", 7: "Julio", 8: "Agosto",
    9: "Septiembre", 10: "Octubre", 11: "Noviembre", 12: "Diciembre"
}
df['Mes_Nombre'] = df['Mes'].map(meses)
df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
Latitude,4018.0,-27.837778,-62.35,-31.82,-29.595,-21.5125,-11.07,5.961963
Longitude,4018.0,-70.851127,-75.73,-71.59,-71.18,-70.03,-58.39,1.293387
Depth,4018.0,61.524639,2.0,36.0,50.0,86.0,624.0,38.27661
Magnitude,4018.0,4.428945,2.3,4.0,4.4,4.8,8.4,0.688666
Fecha,4018.0,2019-04-08 11:26:32.820308992,2012-03-03 11:01:47,2016-07-14 17:58:09,2019-06-01 21:22:07,2021-09-02 22:38:11,2025-05-26 03:50:27,
Hora,4018.0,11.336237,0.0,5.0,11.0,17.0,23.0,6.905795
Mes,4018.0,6.574664,1.0,4.0,7.0,9.0,12.0,3.419187


In [9]:
df.head()

Unnamed: 0,Date(UTC),Latitude,Longitude,Depth,Magnitude,Fecha,Fecha_Chile,Hora,Mes,Mes_Nombre
0,2025-05-26 03:50:27,-19.63,-69.49,97,5.6,2025-05-26 03:50:27,2025-05-25 23:50:27-04:00,23,5,Mayo
1,2025-05-13 00:47:58,-51.25,-72.28,28,5.1,2025-05-13 00:47:58,2025-05-12 20:47:58-04:00,20,5,Mayo
2,2025-05-05 09:46:48,-29.49,-71.84,48,5.0,2025-05-05 09:46:48,2025-05-05 05:46:48-04:00,5,5,Mayo
3,2025-05-05 02:17:48,-31.89,-70.88,88,5.1,2025-05-05 02:17:48,2025-05-04 22:17:48-04:00,22,5,Mayo
4,2025-05-02 15:23:49,-27.52,-72.48,30,5.8,2025-05-02 15:23:49,2025-05-02 11:23:49-04:00,11,5,Mayo


In [10]:


# Clasificación de zona geográfica según latitud
def clasificar_zona(lat):
    if lat >= -27:
        return 'Norte'
    elif lat >= -36:
        return 'Centro'
    else:
        return 'Sur'

df['Zona'] = df['Latitude'].apply(clasificar_zona)


In [12]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


# Crear variable objetivo: 1 si magnitud ≥ 6.0
df['Sismo_Fuerte'] = (df['Magnitude'] >= 6.0).astype(int)

# Seleccionar variables para el modelo
X = df[['Depth', 'Mes', 'Hora', 'Zona']]
y = df['Sismo_Fuerte']

# Definir columnas numéricas y categóricas
num_cols = ['Depth', 'Hora']
cat_cols = ['Mes', 'Zona']

# Crear el preprocesador
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(drop='first'), cat_cols)
])

# Pipeline final
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor)
])

# Transformar datos
X_final = pipeline.fit_transform(X)

# Mostrar forma final de los datos
print("X original:", X.shape)
print("X transformado:", X_final.shape)
print("y clases:", y.value_counts())


X original: (4018, 4)
X transformado: (4018, 15)
y clases: Sismo_Fuerte
0    3897
1     121
Name: count, dtype: int64
