In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

In [6]:
# Load the data into a pandas dataframe
df = pd.read_csv('C:/Users/Checkout/Documents/Courses/GWAR/Datasets/CNDS.csv')

# Select the columns you want to keep for training
train_cols = ['LAT', 'LON', 'T2M', 'T2MDEW', 'TS', 'T2M_RANGE', 'T2M_MAX', 'T2M_MIN', 'QV2M', 'RH2M', 'PS', 'WS10M', 'WS10M_MAX', 'WS10M_MIN', 'WS10M_RANGE', 'WD10M', 'ndvi_value', 'D0-D4']

# Drop the columns you don't need
df = df[train_cols]

# Split the data into train, validation, and test sets
train_data, test_data = train_test_split(df, test_size=0.1, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.1, random_state=42)
print(f"Train set shape: {train_data.shape}")
print(f"Validation set shape: {val_data.shape}")
print(f"Test set shape: {test_data.shape}")

Train set shape: (985035, 18)
Validation set shape: (109449, 18)
Test set shape: (121610, 18)


In [None]:

# Define the training, validation and test data
train_X, train_y = train_data.iloc[:, :-1], train_data.iloc[:, -1]
val_X, val_y = val_data.iloc[:, :-1], val_data.iloc[:, -1]
test_X, test_y = test_data.iloc[:, :-1], test_data.iloc[:, -1]

# Scale the data
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
val_X = scaler.transform(val_X)
test_X = scaler.transform(test_X)

# Decision Tree Regressor
dt = DecisionTreeClassifier(random_state=0)
dt.fit(train_X, train_y)
dt_pred = dt.predict(test_X)
print("Decision Tree Accuracy:", dt.score(test_X, test_y)*100)
# print("Decision Tree F1 Score:", f1_score(test_y, dt_pred, average='weighted'))
# print("Decision Tree Confusion Matrix:")
# print(confusion_matrix(test_y, dt_pred))
# print(classification_report(test_y, dt_pred))

# Random Forest Regressor
rf = RandomForestRegressor(n_estimators=100, random_state=0)
rf.fit(train_X, train_y)
rf_pred = rf.predict(test_X)
print("Random Forest Accuracy:", rf.score(test_X, test_y)*100)
# print("Random Forest F1 Score:", f1_score(test_y, rf_pred, average='weighted'))
# print("Random Forest Confusion Matrix:")
# print(confusion_matrix(test_y, rf_pred))
# print(classification_report(test_y, rf_pred))

# LSTM
lstm_train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
lstm_val_X = np.reshape(val_X, (val_X.shape[0], 1, val_X.shape[1]))
lstm_test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))

lstm_model = Sequential()
lstm_model.add(LSTM(100, input_shape=(lstm_train_X.shape[1], lstm_train_X.shape[2])))
lstm_model.add(Dense(1))
lstm_model.compile(loss='mse', optimizer='adam')

lstm_model.fit(lstm_train_X, train_y, epochs=100, batch_size=32, validation_data=(lstm_val_X, val_y), verbose=2, shuffle=False)
lstm_pred = lstm_model.predict(lstm_test_X)
lstm_pred = scaler.inverse_transform(lstm_pred)
print("LSTM Accuracy:", accuracy_score(test_y, lstm_pred)*100)
# print("LSTM F1 Score:", f1_score(test_y, lstm_pred, average='weighted'))
# print("LSTM Confusion Matrix:")
# print(confusion_matrix(test_y, lstm_pred))
# print(classification_report(test_y, lstm_pred))

# ANN
ann_model = Sequential()
ann_model.add(Dense(100, input_dim=train_X.shape[1], activation='relu'))
ann_model.add(Dense(50, activation='relu'))
ann_model.add(Dense(1))
ann_model.compile(loss='mse', optimizer='adam')

ann_model.fit(train_X, train_y, epochs=100, batch_size=32, validation_data=(val_X, val_y), verbose=2, shuffle=False)
ann_pred = ann_model.predict(test_X)
print("ANN Accuracy:", accuracy_score(test_y, ann_pred)*100)
# print("ANN F1 Score:", f1_score(test_y, ann_pred, average='weighted'))
# print("ANN Confusion Matrix:")
# print(confusion_matrix(test_y, ann_pred))
# print(classification_report(test_y, ann_pred))


Decision Tree Accuracy: -25.98505111040026


In [5]:
df.isnull().sum()

LAT                 0
LON                 0
T2M                 0
T2MDEW              0
TS                  0
T2M_RANGE           0
T2M_MAX             0
T2M_MIN             0
QV2M                0
RH2M                0
PS                  0
WS10M               0
WS10M_MAX           0
WS10M_MIN           0
WS10M_RANGE         0
WD10M               0
ndvi_value          0
SPEI           313058
D0-D4               0
dtype: int64

In [None]:
train_X, train_y = train_data.iloc[:, :-1], train_data.iloc[:, -1]
val_X, val_y = val_data.iloc[:, :-1], val_data.iloc[:, -1]
test_X, test_y = test_data.iloc[:, :-1], test_data.iloc[:, -1]

# Scale the data
scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
val_X = scaler.transform(val_X)
test_X = scaler.transform(test_X)

# Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=0)
dt.fit(train_X, train_y)
dt_pred = dt.predict(test_X)
print("Decision Tree Accuracy:", accuracy_score(test_y, dt_pred)*100)
print("Decision Tree F1 Score:", f1_score(test_y, dt_pred, average='weighted'))
print("Decision Tree Confusion Matrix:")
print(confusion_matrix(test_y, dt_pred))
print(classification_report(test_y, dt_pred))

# Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=0)
rf.fit(train_X, train_y)
rf_pred = rf.predict(test_X)
print("Random Forest Accuracy:", accuracy_score(test_y, rf_pred)*100)
print("Random Forest F1 Score:", f1_score(test_y, rf_pred, average='weighted'))
print("Random Forest Confusion Matrix:")
print(confusion_matrix(test_y, rf_pred))
print(classification_report(test_y, rf_pred))

# LSTM
lstm_train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
lstm_val_X = np.reshape(val_X, (val_X.shape[0], 1, val_X.shape[1]))
lstm_test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))

lstm_model = Sequential()
lstm_model.add(LSTM(100, input_shape=(lstm_train_X.shape[1], lstm_train_X.shape[2])))
lstm_model.add(Dense(1))
lstm_model.compile(loss='mse', optimizer='adam')

lstm_model.fit(lstm_train_X, train_y, epochs=100, batch_size=32, validation_data=(lstm_val_X, val_y), verbose=2, shuffle=False)
lstm_pred = lstm_model.predict(lstm_test_X)
lstm_pred = np.round(lstm_pred).astype(int)
print("LSTM Accuracy:", accuracy_score(test_y, lstm_pred)*100)
print("LSTM F1 Score:", f1_score(test_y, lstm_pred, average='weighted'))
print("LSTM Confusion Matrix:")
print(confusion_matrix(test_y, lstm_pred))
print(classification_report(test_y, lstm_pred))

# ANN
ann_model = Sequential()
ann_model.add(Dense(100, input_dim=train_X.shape[1], activation='relu'))
ann_model.add(Dense(50, activation='relu'))
ann_model.add(Dense(1))
ann_model.compile(loss='mse', optimizer='adam')

ann_model.fit(train_X, train_y, epochs=100, batch_size=32, validation_data=(val_X, val_y), verbose=2, shuffle=False)
ann_pred = ann_model.predict(test_X)
ann_pred = np.round(ann_pred
