In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import tensorflow
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import matplotlib.pyplot as plt
from keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pickle
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support, classification_report, r2_score

2024-05-27 22:05:36.505394: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Import the datasets

In [2]:
df_feat_eng = pd.read_csv('df_feature_engineering.csv')

In [3]:
df_complete = pd.read_csv('final_df.csv')

## Import the models

In [4]:
# load model
NN_feat_eng = load_model('NN_feat_eng.keras')

2024-05-27 22:05:40.171250: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-27 22:05:40.172212: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [5]:
# load model
NN_full_data = load_model('NN_full_data.keras')

In [6]:
# load model
NN_feat_eng_scaled = load_model('NN_feat_eng_scaled.keras')

In [7]:
# load model
NN_full_data_scaled = load_model('NN_full_data_scaled.keras')

In [19]:
with open('xgboost_model.pkl', 'rb') as file:
    # Code to read from the file in binary mode
    xgboost_model = pickle.load(file)

In [20]:
with open('random_forest_model.pkl', 'rb') as file:
    # Code to read from the file in binary mode
    random_forest_model = pickle.load(file)

## Defining the test data

In [9]:
df_feat_eng_target = df_feat_eng['Rain Tomorrow']
df_feat_eng_inputs = df_feat_eng.drop(columns = ['Rain Tomorrow'])

df_complete_target = df_complete['Rain Tomorrow']
df_complete_inputs = df_complete.drop(columns = ['Rain Tomorrow'])

In [10]:
X_train_val_complete, X_test_complete, y_train_val_complete, y_test_complete = train_test_split(df_complete_inputs.values, df_complete_target.values, test_size=0.1, random_state=42)
X_train_val_feat_eng, X_test_feat_eng, y_train_val_feat_eng, y_test_feat_eng = train_test_split(df_feat_eng_inputs.values, df_feat_eng_target.values, test_size=0.1, random_state=42)

## Defining the scaled test data

In [11]:
transformer = MinMaxScaler().fit(df_feat_eng_inputs)

x_feat_eng_scaled = pd.DataFrame(transformer.transform(df_feat_eng_inputs), columns = df_feat_eng_inputs.columns)
y_feat_eng_scaled = df_feat_eng_target

# Split into training+validation and testing sets (90% train+val, 10% test)
X_train_val, X_test_feat_eng_scaled, y_train_val, y_test_feat_eng_scaled = train_test_split(x_feat_eng_scaled.values, y_feat_eng_scaled.values, test_size=0.1, random_state=42)
# Split training+validation set into training and validation sets (of the 90% for training and validation, 90*0.21 = 12% is validation and 68% is training )
X_train_feat_eng_scaled, X_val_feat_eng_scaled, y_train_feat_eng_scaled, y_val_feat_eng_scaled = train_test_split(X_train_val, y_train_val, test_size=0.21, random_state=42)

In [12]:
transformer = MinMaxScaler().fit(df_complete_inputs)

x_complete_scaled = pd.DataFrame(transformer.transform(df_complete_inputs), columns = df_complete_inputs.columns)
y_complete_scaled = df_complete_target

# Split into training+validation and testing sets (90% train+val, 10% test)
X_train_val, X_test_complete_scaled, y_train_val, y_test_complete_scaled = train_test_split(x_complete_scaled.values, y_complete_scaled.values, test_size=0.1, random_state=42)
# Split training+validation set into training and validation sets (of the 90% for training and validation, 90*0.21 = 19% is validation and 71% is training )
X_train_complete_scaled, X_val_complete_scaled, y_train_complete_scaled, y_val_complete_scaled = train_test_split(X_train_val, y_train_val, test_size=0.21, random_state=42)

## Making the predictions for each model

In [22]:
# Store the values
predictions_complete = (NN_full_data.predict(X_test_complete) > 0.5).astype("int32")
predictions_feat_eng = (NN_feat_eng.predict(X_test_feat_eng) > 0.5).astype("int32")
predictions_feat_eng_scaled = (NN_feat_eng_scaled.predict(X_test_feat_eng_scaled) > 0.5).astype("int32")
predictions_complete_scaled = (NN_full_data_scaled.predict(X_test_complete_scaled) > 0.5).astype("int32")
predictions_rf = random_forest_model.predict(X_test_feat_eng)
predictions_xgb = xgboost_model.predict(X_test_feat_eng)

[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


## Neural Networks

In [23]:
print('Test Confusion Matrix for the NN_full_data')

print()
print(pd.DataFrame(confusion_matrix(y_test_complete, predictions_complete), index=['No rain tomorrow', 'Rains Tomorrow'], columns=['No rain tomorrow', 'Rains Tomorrow']))

print('\nAccuracy: ',accuracy_score(y_test_complete, predictions_complete))
print()
print(classification_report(y_test_complete, predictions_complete))

Test Confusion Matrix for the NN_full_data

                  No rain tomorrow  Rains Tomorrow
No rain tomorrow             10308             418
Rains Tomorrow                1695            1286

Accuracy:  0.8458451885897716

              precision    recall  f1-score   support

           0       0.86      0.96      0.91     10726
           1       0.75      0.43      0.55      2981

    accuracy                           0.85     13707
   macro avg       0.81      0.70      0.73     13707
weighted avg       0.84      0.85      0.83     13707



In [24]:
print('Test Confusion Matrix for the NN_full_data_scaled')
print()
print(pd.DataFrame(confusion_matrix(y_test_complete_scaled, predictions_complete_scaled), index=['No rain tomorrow', 'Rains Tomorrow'], columns=['No rain tomorrow', 'Rains Tomorrow']))

print('\nAccuracy: ',accuracy_score(y_test_complete_scaled, predictions_complete_scaled))
print()
print(classification_report(y_test_complete_scaled, predictions_complete_scaled))

Test Confusion Matrix for the NN_full_data_scaled

                  No rain tomorrow  Rains Tomorrow
No rain tomorrow             10186             540
Rains Tomorrow                1532            1449

Accuracy:  0.8488363609834391

              precision    recall  f1-score   support

           0       0.87      0.95      0.91     10726
           1       0.73      0.49      0.58      2981

    accuracy                           0.85     13707
   macro avg       0.80      0.72      0.75     13707
weighted avg       0.84      0.85      0.84     13707



In [25]:
print('Test Confusion Matrix for the NN_feat_eng')
print()
print(pd.DataFrame(confusion_matrix(y_test_feat_eng, predictions_feat_eng), index=['No rain tomorrow', 'Rains Tomorrow'], columns=['No rain tomorrow', 'Rains Tomorrow']))

print('\nAccuracy: ',accuracy_score(y_test_feat_eng, predictions_feat_eng))
print()
print(classification_report(y_test_feat_eng, predictions_feat_eng))

Test Confusion Matrix for the NN_feat_eng

                  No rain tomorrow  Rains Tomorrow
No rain tomorrow              2301             665
Rains Tomorrow                 708            2360

Accuracy:  0.7724560822008618

              precision    recall  f1-score   support

           0       0.76      0.78      0.77      2966
           1       0.78      0.77      0.77      3068

    accuracy                           0.77      6034
   macro avg       0.77      0.77      0.77      6034
weighted avg       0.77      0.77      0.77      6034



In [27]:
print('Test Confusion Matrix for the NN_feat_eng_scaled')
print()
print(pd.DataFrame(confusion_matrix(y_test_feat_eng_scaled, predictions_feat_eng_scaled), index=['No rain tomorrow', 'Rains Tomorrow'], columns=['No rain tomorrow', 'Rains Tomorrow']))


print('\nAccuracy: ',accuracy_score(y_test_feat_eng_scaled, predictions_feat_eng_scaled))
print()
print(classification_report(y_test_feat_eng_scaled, predictions_feat_eng_scaled))

Test Confusion Matrix for the NN_feat_eng_scaled

                  No rain tomorrow  Rains Tomorrow
No rain tomorrow              2304             662
Rains Tomorrow                 722            2346

Accuracy:  0.770633079217766

              precision    recall  f1-score   support

           0       0.76      0.78      0.77      2966
           1       0.78      0.76      0.77      3068

    accuracy                           0.77      6034
   macro avg       0.77      0.77      0.77      6034
weighted avg       0.77      0.77      0.77      6034



# Neural networks conclusions

Comparing the full data and the feature engineering NN's, one can see that the full data has some better stats, like accuracy but it is much worse at predicting correctly when it will rain. The full data NN even has more false positives then true positives, as opposed to the featured engineered.

The feature engineering NN's have a much better precision, recall and f1-score for when it rains in the following day, which is the main priority. The scaled and not scaled featured engineering NN's are really similar, although the non scaled one has a slightly more balanced performance, and slightly better accuracy, but in reality it is such a small difference that is not countable.

For the purpose of the problem, the NN's that deal with the featured engineered data have the better performance, being clear that the feature engineering part was well done.

## Ensemble methods

In [28]:
print('Test Confusion Matrix for the random forest')

print()
print(pd.DataFrame(confusion_matrix(y_test_feat_eng, predictions_rf), index=['No rain tomorrow', 'Rains Tomorrow'], columns=['No rain tomorrow', 'Rains Tomorrow']))

print('\nAccuracy: ',accuracy_score(y_test_feat_eng, predictions_rf))
print()
print(classification_report(y_test_feat_eng, predictions_rf))

Test Confusion Matrix for the random forest

                  No rain tomorrow  Rains Tomorrow
No rain tomorrow              2295             671
Rains Tomorrow                 706            2362

Accuracy:  0.7717931720251906

              precision    recall  f1-score   support

           0       0.76      0.77      0.77      2966
           1       0.78      0.77      0.77      3068

    accuracy                           0.77      6034
   macro avg       0.77      0.77      0.77      6034
weighted avg       0.77      0.77      0.77      6034



In [29]:
print('Test Confusion Matrix for the xgboost model')
print()
print(pd.DataFrame(confusion_matrix(y_test_feat_eng, predictions_xgb), index=['No rain tomorrow', 'Rains Tomorrow'], columns=['No rain tomorrow', 'Rains Tomorrow']))


print('\nAccuracy: ',accuracy_score(y_test_feat_eng, predictions_xgb))
print()
print(classification_report(y_test_feat_eng, predictions_xgb))

Test Confusion Matrix for the xgboost model

                  No rain tomorrow  Rains Tomorrow
No rain tomorrow              2308             658
Rains Tomorrow                 713            2355

Accuracy:  0.7727875372886974

              precision    recall  f1-score   support

           0       0.76      0.78      0.77      2966
           1       0.78      0.77      0.77      3068

    accuracy                           0.77      6034
   macro avg       0.77      0.77      0.77      6034
weighted avg       0.77      0.77      0.77      6034



# Ensemble methods conclusions

It is possible to see that both the random forest and the xgboost models have virtually equal performances. The differences, like in the NN's, are so little that they are not really countable as well. They are equally good at predicting when it will or won't rain, being slightly better at predicting when it will rain.

# Models conclusions

Both the ensemble methods and the feature engineering NN's have really similar performances although the factor that the ensemble methods take much longer to be trained has to be considered. The random forest model took 1.5 hours for the best model to be found, the XGBoost took 3 hours (it also searched for less number of estimators), the full data NN took 20 minutes and the featuring engineering NN took 10 minutes. Ironically, the best model took the less time to be trained. 