In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
tf.get_logger().setLevel('ERROR')

df = pd.read_csv("CatalogoChile2000_2022T.csv")

df = df.loc[df['z'] <= 70]
df = df.loc[(df['lat'] >= -60)]
df = df.loc[(df['long'] >= -90)]

df['fecha'] = pd.to_datetime(df[['year', 'month', 'day', 'hour', 'minute', 'second']])
df = df.drop(['year', 'month', 'day', 'hour', 'minute', 'second', 'Unnamed: 0', 'X'], axis=1)
df['fecha'] = pd.to_datetime(df['fecha'])  # Asegúrate de que la columna de fecha esté en formato datetime

df.set_index('fecha', inplace=True)



In [3]:
def train_test_split(data, train_portion):
    num_samples = data.shape[0]

    train_size = int(num_samples * train_portion)

    train_data = data[:train_size]
    test_val_data = data[train_size:]

    half_test_val_data = len(test_val_data) // 2
    test_data = test_val_data[:half_test_val_data]
    val_data = test_val_data[half_test_val_data:]
    
    return train_data, test_data, val_data

In [4]:
train_rate = 0.7
train_data, test_data, val_data  = train_test_split(df, train_rate)
print("Train data: ", train_data.shape)
print("Test data: ", test_data.shape)
print("Validation data: ", val_data.shape)

Train data:  (46151, 5)
Test data:  (9889, 5)
Validation data:  (9890, 5)


In [5]:
train_data_features = train_data[['lat', 'long', 'z', 'magn1']].values
test_data_features = test_data[['lat', 'long', 'z', 'magn1']].values
val_data_features = val_data[['lat', 'long', 'z', 'magn1']].values

In [6]:
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:i+seq_length]
        y = data[i+seq_length, 3]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

SEQ_LENGTH = 21
trainX, trainY = create_sequences(train_data_features, SEQ_LENGTH)
testX, testY = create_sequences(test_data_features, SEQ_LENGTH)
valX, valY = create_sequences(val_data_features, SEQ_LENGTH)

In [7]:
from tensorflow.keras import backend as K

def weighted_binary_crossentropy(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    # Define los pesos
    weight_for_1 = 15.0 ## PESO_PARAMETRO
    weight_for_0 = 1.0
    
    # Calcula la pérdida binaria cruzada básica
    bce = K.binary_crossentropy(y_true, y_pred)
    
    # Crea una máscara para asignar el peso correcto a cada clase
    weight_vector = y_true * weight_for_1 + (1.0 - y_true) * weight_for_0
    
    # Aplica el peso a la pérdida
    weighted_bce = weight_vector * bce
    
    return K.mean(weighted_bce)

In [19]:
def binarize_data(data, threshold):
    binarized_data = np.where(data > threshold, 1, 0)
    return binarized_data

threshold = 4.0 ## THRESHOLD_PARAMETRO
binarized_trainY = binarize_data(trainY, threshold)
binarized_testY = binarize_data(testY, threshold)
binarized_valY = binarize_data(valY, threshold)

In [9]:
trainX.shape

(46130, 21, 4)

In [10]:
# # Definimos el modelo LSTM
# model = Sequential([
#     LSTM(64, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=False),  # Capa LSTM
#     Dropout(0.2),  # Regularización para evitar overfitting
#     Dense(32, activation='relu'),  # Capa densa intermedia
#     Dense(1, activation='sigmoid')  # Capa de salida para clasificación binaria
# ])

# # Compilamos el modelo
# model.compile(optimizer='adam', loss=weighted_binary_crossentropy, metrics=['binary_accuracy'])

# # Entrenamos el modelo
# history = model.fit(trainX, binarized_trainY, epochs=100, batch_size=16, validation_data=(valX, binarized_valY))


In [20]:
trainX = train_data.drop(columns=["magn1"])
trainY = train_data["magn1"]

testX = test_data.drop(columns=["magn1"])
testY = test_data["magn1"]

valX = val_data.drop(columns=["magn1"])
valY = val_data["magn1"]

In [24]:
# Crear el modelo
model = Sequential()
# Capa de entrada y primera capa oculta
model.add(Dense(units=128, activation='relu', input_dim=4))
# Segunda capa oculta
model.add(Dense(units=64, activation='relu'))
# Capa de salida
model.add(Dense(units=1, activation='sigmoid'))  # Para clasificación binaria

# Compilar el modelo
model.compile(optimizer='adam', loss=weighted_binary_crossentropy, metrics=['binary_accuracy'])

history = model.fit(trainX, binarized_trainY, epochs=100, batch_size=16, validation_data=(valX, binarized_valY))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [25]:
print(
    "Train loss: ",
    history.history["loss"][-1],
    "\nTest loss:",
    history.history["val_loss"][-1],
)

Train loss:  1.7459062337875366 
Test loss: 1.4688032865524292


In [26]:
yhat = model.predict(testX)



In [27]:
yhat_bin = (yhat > 0.5).astype(int)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

yhat_bin = (yhat > 0.5).astype(int)
yhat_bin_flatten = yhat_bin.ravel()
binarized_valY_flatten = binarized_testY.ravel()

accuracy = accuracy_score(binarized_valY_flatten, yhat_bin_flatten)
precision = precision_score(binarized_valY_flatten, yhat_bin_flatten)
recall = recall_score(binarized_valY_flatten, yhat_bin_flatten)
f1 = f1_score(binarized_valY_flatten, yhat_bin_flatten)
conf_matrix = confusion_matrix(binarized_valY_flatten, yhat_bin_flatten)

print(f'Accuracy: {round(accuracy, 4)}')
print(f'Precision: {round(precision, 4)}')
print(f'Recall: {round(recall, 4)}')
print(f'F1-Score: {round(f1, 4)}')
print(f'Matriz de Confusión:\n{conf_matrix}')

Accuracy: 0.0716
Precision: 0.0716
Recall: 1.0
F1-Score: 0.1336
Matriz de Confusión:
[[   0 9181]
 [   0  708]]
