In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# Data Cleaning

In [2]:
df_1 = pd.read_csv('divisions daily weather.csv')
sylhet_daily_weather = df_1[df_1['Division'] == 'Sylhet']
sylhet_daily_weather_sorted = sylhet_daily_weather.sort_values(by=['Year','Month','Day'])

In [3]:
month_dict = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
              'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}
# Convert month names to numeric values
sylhet_daily_weather_sorted['Month'] = sylhet_daily_weather_sorted['Month'].map(month_dict)

In [4]:
sylhet_daily_weather_sorted['Date'] = pd.to_datetime(sylhet_daily_weather_sorted[['Year', 'Month', 'Day']])
sylhet_daily_weather_Date = sylhet_daily_weather_sorted

In [5]:
sylhet_daily_weather_Date = sylhet_daily_weather_Date[['Date','cloudcover','dew','humidity','precip','tempmax','tempmin','windspeed','Flood']]

In [6]:
# 1 = Flood = False
# 0 = Flood = True
sylhet_daily_weather_Date = sylhet_daily_weather_Date.copy()
sylhet_daily_weather_Date.loc[:, 'Flood'] = sylhet_daily_weather_Date.loc[:, 'Flood'].astype(int)

# Child Model 

In [7]:
sylhet_daily_weather_child = sylhet_daily_weather_Date

In [8]:
sylhet_daily_weather_child = sylhet_daily_weather_child.set_index('Date')

In [9]:
features = sylhet_daily_weather_child.columns[:-1]

In [10]:
# DataFrame to numpy array
data = sylhet_daily_weather_child[features].values
target = sylhet_daily_weather_child['Flood'].values

In [11]:
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

In [12]:
# Sequences for LSTM
sequence_length = 10  
X, y = [], []
for i in range(len(data_scaled) - sequence_length):
    X.append(data_scaled[i:i+sequence_length, :])
    y.append(target[i+sequence_length])

X, y = np.array(X), np.array(y)

In [13]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [14]:
def create_lstm_model(units=50, optimizer='adam'):
    model = Sequential()
    model.add(LSTM(units=units, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(units=units))
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# KerasClassifier 
model = KerasClassifier(build_fn=create_lstm_model, epochs=10, batch_size=32, verbose=0)

# hyperparameter grid
param_grid = {
    'units': [50, 100, 150],
    'optimizer': ['adam', 'rmsprop',]
}

# GridSearchCV to find the best hyperparameters
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=3)
grid_result = grid.fit(X_train, y_train)
print(f"Best Parameters: {grid_result.best_params_}")
print(f"Best Accuracy: {grid_result.best_score_}")


  model = KerasClassifier(build_fn=create_lstm_model, epochs=10, batch_size=32, verbose=0)


Best Parameters: {'optimizer': 'adam', 'units': 100}
Best Accuracy: 0.8015350877192983


In [15]:
# Train the model with best hyperparameters
best_params = grid_result.best_params_
final_model = create_lstm_model(units=best_params['units'], optimizer=best_params['optimizer'])
final_model.fit(X_train, y_train, epochs=19, batch_size=32)  # Adjust epochs and batch_size as needed

Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19


<keras.callbacks.History at 0x28c4211ceb0>

In [16]:
test_loss, test_accuracy = final_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")


Test Accuracy: 0.7800511717796326


# Evaluation of Model Performance

In [63]:
predictions = final_model.predict(X_test)



In [64]:

mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error (MSE): {mse:.4f}")

Mean Squared Error (MSE): 0.1584


In [65]:
# Calculate the R-squared (R2) score
r2 = r2_score(y_test, predictions)
print(f"R-squared (R2) Score: {r2:.4f}")

R-squared (R2) Score: 0.3584


In [66]:
predicted_labels = final_model.predict(X_test)
result_df = pd.DataFrame({'Date': sylhet_daily_weather_child.index[-len(X_test):], 'Predicted_Flood': predicted_labels.flatten()})
result_df.set_index('Date', inplace=True)
result_df = sylhet_daily_weather_child.join(result_df, how='right')
print(result_df)

            cloudcover   dew  humidity  precip  tempmax  tempmin  windspeed  \
Date                                                                          
2021-03-04         8.4  50.8      51.1   0.000     88.4     53.0        8.9   
2021-03-05        17.2  57.6      66.1   0.008     89.3     56.9        9.4   
2021-03-06        46.8  64.7      76.0   0.024     87.0     62.2        8.3   
2021-03-07        31.4  62.2      74.1   0.274     87.5     53.7        6.9   
2021-03-08        18.1  59.6      69.9   0.391     85.0     54.2        5.6   
...                ...   ...       ...     ...      ...      ...        ...   
2023-09-26        31.5  76.7      78.6   0.008     93.1     77.1        5.4   
2023-09-27        35.6  77.1      78.6   0.008     92.9     78.2        5.6   
2023-09-28        57.5  77.7      76.4   0.004     96.7     78.9        5.6   
2023-09-29        89.6  76.8      72.3   0.031     97.1     79.4        6.6   
2023-09-30        83.7  76.8      80.0   0.024     9

In [67]:
predicted_probabilities = final_model.predict(X_test)

# threshold of 0.5 to convert probabilities to binary predictions
predicted_labels = (predicted_probabilities > 0.5).astype(int)

# Maping binary predictions to labels 
predicted_labels_mapped = ['No Flood Risk' if label == 1 else 'High Flood Risk' for label in predicted_labels.flatten()]

# Data Frame with Date and  Predicted Flood Risk
result_df = pd.DataFrame({'Date': sylhet_daily_weather_child.index[-len(X_test):], 'Predicted_Flood': predicted_labels_mapped})
result_df.set_index('Date', inplace=True)



In [69]:
result_df.tail()

Unnamed: 0_level_0,Predicted_Flood
Date,Unnamed: 1_level_1
2023-09-26,No Flood Risk
2023-09-27,No Flood Risk
2023-09-28,High Flood Risk
2023-09-29,No Flood Risk
2023-09-30,High Flood Risk
