In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import joblib
import tensorflow as tf

# ===============================
# Step 1: Import CSVs
# ===============================
train_df = pd.read_csv("/kaggle/input/mlpr-split-encoded-data/train (1).csv")
test_df = pd.read_csv("/kaggle/input/mlpr-split-encoded-data/test (2).csv")

# ===============================
# Step 2: Feature list
# ===============================
features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'has_tornado', 'has_hail', 'has_flood', 'has_wind',
    'has_tree', 'has_broken', 'has_blown', 'tmin', 'tmax', 'tavg', 'ppt',
    'MAGNITUDE_IMPUTED', 'STATE_FIPS'
]

# ===============================
# Step 3: Sequence creation
# ===============================
def create_sequences(df, features, max_seq_length):
    X, y = [], []
    for state in df['st_abb'].unique():
        state_df = df[df['st_abb'] == state].sort_values('power_outage_datetime')
        state_features = state_df[features].values
        state_target = state_df['is_storm_lagged'].values
        for i in range(len(state_df)):
            seq_start = max(0, i - max_seq_length + 1)
            seq_data = state_features[seq_start:i + 1]
            seq_len = len(seq_data)
            if seq_len == 0:
                continue
            padded_seq = np.zeros((max_seq_length, len(features)))
            padded_seq[-seq_len:] = seq_data
            X.append(padded_seq)
            y.append(state_target[i])
    return np.array(X), np.array(y)

# ===============================
# Step 4: Data preparation
# ===============================
max_seq_length = 6
X_train, y_train = create_sequences(train_df, features, max_seq_length)
X_test, y_test = create_sequences(test_df, features, max_seq_length)

scaler = StandardScaler()
X_train_reshaped = X_train.reshape(-1, len(features))
X_train_scaled = scaler.fit_transform(X_train_reshaped).reshape(X_train.shape)
X_test_reshaped = X_test.reshape(-1, len(features))
X_test_scaled = scaler.transform(X_test_reshaped).reshape(X_test.shape)

# ===============================
# Step 5: Train final model with best params
# ===============================
# Best parameters from Optuna
best_params = {
    'lstm_units': 66,
    'dropout_rate': 0.5377800075562813,
    'dense_units': 151,
    'l2_reg': 0.00012795282112026385,
    'learning_rate': 0.003748564127369342,
    'batch_size': 16
}

final_model = Sequential()
final_model.add(LSTM(
    best_params['lstm_units'],
    input_shape=(max_seq_length, len(features)),
    kernel_regularizer=l2(best_params['l2_reg'])
))
final_model.add(Dropout(best_params['dropout_rate']))
final_model.add(Dense(best_params['dense_units'], activation='relu', kernel_regularizer=l2(best_params['l2_reg'])))
final_model.add(Dense(1, activation='sigmoid'))

final_model.compile(
    optimizer=Adam(learning_rate=best_params['learning_rate']),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

final_model.fit(
    X_train_scaled, y_train,
    epochs=10,
    batch_size=best_params['batch_size'],
    verbose=1
)

# ===============================
# Step 6: Evaluate and save
# ===============================
y_pred_prob = final_model.predict(X_test_scaled)
y_pred = (y_pred_prob > 0.5).astype(int)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

joblib.dump(final_model, 'storm_lstm_model.pkl')
joblib.dump(scaler, 'storm_scaler.pkl')

2025-05-17 21:23:08.770366: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747516988.962181      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747516989.018538      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
I0000 00:00:1747517011.426143      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1747517011.426813      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability:

Epoch 1/10


I0000 00:00:1747517016.505313      99 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 5ms/step - accuracy: 0.8843 - loss: 0.3268
Epoch 2/10
[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.8943 - loss: 0.2994
Epoch 3/10
[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.8920 - loss: 0.3024
Epoch 4/10
[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.8952 - loss: 0.2970
Epoch 5/10
[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.8960 - loss: 0.2948
Epoch 6/10
[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.8953 - loss: 0.2945
Epoch 7/10
[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.8945 - loss: 0.2962
Epoch 8/10
[1m9200/9200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 5ms/step - accuracy: 0.8936 - loss: 0.2980
Epoch 9/10
[1m9200/9200[0

['storm_scaler.pkl']