# Accuracy Summary Table

|      Base Setup     	|     	|     	|     	|         	|            	|     	|     	|     	|         	|
|:-------------------:	|-----	|-----	|-----	|---------	|------------	|-----	|-----	|-----	|---------	|
| SciKit              	|  1  	|  2  	|  3  	| Average 	| Tensorflow 	|  1  	|  2  	|  3  	| Average 	|
| K Nearest Neighbors 	| 100 	| 100 	| 100 	|   100   	| LSTM       	| 100 	| 100 	| 100 	|   100   	|
| Linear & RBF SVM    	| 100 	| 100 	| 100 	|   100   	| GRU        	| 100 	| 100 	| 100 	|   100   	|
| QDA                 	| 100 	| 100 	| 100 	|   100   	| Simple RNN 	| 100 	| 100 	| 100 	|   100   	|
| Gaussian Process    	| 100 	| 100 	| 100 	|   100   	| Bi LSTM    	| 100 	| 100 	| 100 	|   100   	|
| Decision Tree       	| 100 	| 100 	| 100 	|   100   	| Bi GRU     	| 100 	| 100 	| 100 	|   100   	|
| Random Forest       	| 100 	| 100 	| 100 	|   100   	| Bi RNN     	| 100 	| 100 	| 100 	|   100   	|
| Neural Net          	| 100 	| 100 	| 100 	|   100   	| Dense      	| 100 	| 100 	| 100 	|   100   	|
| AdaBoost            	| 100 	| 100 	| 100 	|   100   	|            	|     	|     	|     	|         	|
| Naive Bayes         	| 100 	| 100 	| 100 	|   100   	|            	|     	|     	|     	|         	|



In [1]:
import pandas as pd 
import matplotlib.pyplot as plt 
import pandas as pd 
import numpy as np 
import tensorflow as tf 
import pickle 

from pprint import pprint
from tqdm import tqdm

from tensorflow import keras 
from tensorflow.keras import layers 
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError
from tensorflow.keras.optimizers import Adam 

from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, minmax_scale

from sklearn.ensemble import RandomForestRegressor

from sklearn.neural_network import BernoulliRBM
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, classification_report
from sklearn.datasets import load_digits

from tensorflow.keras.callbacks import ModelCheckpoint

from scipy.ndimage import convolve

from scikeras.wrappers import KerasClassifier

from keras.utils import to_categorical

import collections 
import random

# Data Preparation

In [2]:
FULL_DATA = False
LE = LabelEncoder()
WINDOW_LENGTH = 5
ADD_ELEMENTS = True


concatenated_data = pd.read_csv("Concatenated Data.csv")

if FULL_DATA:
    raw_dataset = concatenated_data[["tempmax", "tempmin", "temp", "feelslikemax", "feelslikemin", "feelslike", "dew", "humidity", "windspeed", "sealevelpressure", "conditions"]]
else:
    raw_dataset = concatenated_data[["temp", "feelslike", "humidity", "windspeed", "sealevelpressure", "conditions"]]

raw_dataset["conditions"] = [word.replace(", ", "\n") if ("," in word) else word for word in raw_dataset["conditions"]]
raw_dataset["conditions"] =  LE.fit_transform(raw_dataset["conditions"])

t_arr = raw_dataset.copy().to_numpy()
X = []
y = []

for i in range(len(t_arr) - WINDOW_LENGTH):
    t_row = []
    for j in t_arr[i : i + WINDOW_LENGTH]:
        t_row.append(j[:-1])
    t_row = np.array(t_row).flatten()
    X.append(t_row)
    y.append(t_arr[i + WINDOW_LENGTH][-1])

X = np.array(X, "float32")
X = minmax_scale(X, feature_range=(0, 1))
y = np.array(y)

counts = dict(collections.Counter(y))
max_count = max(counts.values())

# Data Augmentation

if ADD_ELEMENTS:
    for key, value in counts.items():
        curX = []
        curY = []
        li, = np.where(y == key)
        for i in range((max_count - value) * 10):
            ci = random.choice(li)
            curX.append(X[ci])
            curY.append(y[ci])
        tX = X.tolist()
        tY = y.tolist()
        tX.extend(curX)
        tY.extend(curY)
        X = np.array(tX)
        y = np.array(tY)        

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size = 0.2)
print(X_train.shape, X_test.shape, X_valid.shape, y_train.shape, y_valid.shape, y_test.shape)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  raw_dataset["conditions"] = [word.replace(", ", "\n") if ("," in word) else word for word in raw_dataset["conditions"]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  raw_dataset["conditions"] =  LE.fit_transform(raw_dataset["conditions"])


(23497, 25) (7344, 25) (5875, 25) (23497,) (5875,) (7344,)


# 1x Unit Multiplier, Saves

In [3]:
RBM_MULTIPLIER = 1

classifiers = [
    ("KNN", KNeighborsClassifier()),
    ("RBF SVM", SVC()),
    ("LINEAR SVM", SVC(kernel = "linear")),
    ("DECISION TREE", DecisionTreeClassifier()),
    ("RANDOM FOREST", RandomForestClassifier()),
    ("MLP", MLPClassifier()),
    ("ADA BOOST", AdaBoostClassifier()),    
    ("LOGISTIC", LogisticRegression())
]

Results = {

}

for (name, _clf) in tqdm(classifiers):

    for rbm_layer in tqdm(range(0, 6)):
        comb = []

        comb.append((f"mms0", MinMaxScaler()))
        # Constant
        for j in range(0, rbm_layer):
            comb.append((f"rbm{j}", BernoulliRBM(n_components = int(X_train.shape[1] * RBM_MULTIPLIER), learning_rate = 0.01, n_iter = 10, verbose = 0)))

            if j == rbm_layer - 1:
                comb.append((f"mms{j + 1}", MinMaxScaler()))


        
        comb.append((name, _clf))
        predictor = Pipeline(comb)

        predictor.fit(X_train, y_train)
        y_pred = predictor.predict(X_test)
        
        current_filename = f"{name}_{rbm_layer}_Layers.pkl"

        with open(current_filename, "wb") as f:
            pickle.dump(predictor, f)

        accuracy = accuracy_score(y_test, y_pred) * 100
        if name not in Results:
            Results[name] = {}
        Results[name][rbm_layer] = accuracy
        print(f"{name}\tLayer: {rbm_layer}\tAccuracy: {accuracy}")
        
with open("SciKitAccuracySaves.pkl", "wb") as f:
    pickle.dump(Results, f)


  0%|          | 0/8 [00:00<?, ?it/s]

KNN	Layer: 0	Accuracy: 95.23420479302833




KNN	Layer: 1	Accuracy: 94.0495642701525




KNN	Layer: 2	Accuracy: 93.65468409586056




KNN	Layer: 3	Accuracy: 93.69553376906318




KNN	Layer: 4	Accuracy: 93.4776688453159


100%|██████████| 6/6 [01:08<00:00, 11.49s/it]
 12%|█▎        | 1/8 [01:08<08:02, 68.92s/it]

KNN	Layer: 5	Accuracy: 93.88616557734206




RBF SVM	Layer: 0	Accuracy: 89.69226579520696




RBF SVM	Layer: 1	Accuracy: 56.590413943355124




RBF SVM	Layer: 2	Accuracy: 60.15795206971678




RBF SVM	Layer: 3	Accuracy: 62.20043572984749




RBF SVM	Layer: 4	Accuracy: 56.10021786492375


100%|██████████| 6/6 [06:03<00:00, 60.51s/it]
 25%|██▌       | 2/8 [07:11<24:11, 241.93s/it]

RBF SVM	Layer: 5	Accuracy: 58.30610021786492




LINEAR SVM	Layer: 0	Accuracy: 82.33932461873638




LINEAR SVM	Layer: 1	Accuracy: 55.936819172113296




LINEAR SVM	Layer: 2	Accuracy: 57.01252723311547




LINEAR SVM	Layer: 3	Accuracy: 59.40904139433552




LINEAR SVM	Layer: 4	Accuracy: 58.10185185185185


100%|██████████| 6/6 [05:12<00:00, 52.10s/it]
 38%|███▊      | 3/8 [12:24<22:50, 274.19s/it]

LINEAR SVM	Layer: 5	Accuracy: 59.20479302832244




DECISION TREE	Layer: 0	Accuracy: 97.08605664488017




DECISION TREE	Layer: 1	Accuracy: 96.62309368191721




DECISION TREE	Layer: 2	Accuracy: 96.25544662309368




DECISION TREE	Layer: 3	Accuracy: 96.29629629629629




DECISION TREE	Layer: 4	Accuracy: 96.3371459694989


100%|██████████| 6/6 [01:04<00:00, 10.72s/it]
 50%|█████     | 4/8 [13:28<12:45, 191.35s/it]

DECISION TREE	Layer: 5	Accuracy: 96.35076252723312




RANDOM FOREST	Layer: 0	Accuracy: 97.93028322440087




RANDOM FOREST	Layer: 1	Accuracy: 96.77287581699346




RANDOM FOREST	Layer: 2	Accuracy: 96.32352941176471




RANDOM FOREST	Layer: 3	Accuracy: 96.35076252723312




RANDOM FOREST	Layer: 4	Accuracy: 96.25544662309368


100%|██████████| 6/6 [01:37<00:00, 16.18s/it]
 62%|██████▎   | 5/8 [15:05<07:52, 157.36s/it]

RANDOM FOREST	Layer: 5	Accuracy: 96.84095860566448




MLP	Layer: 0	Accuracy: 91.72113289760348




MLP	Layer: 1	Accuracy: 53.894335511982575




MLP	Layer: 2	Accuracy: 57.870370370370374




MLP	Layer: 3	Accuracy: 57.16230936819172




MLP	Layer: 4	Accuracy: 56.86274509803921


100%|██████████| 6/6 [02:45<00:00, 27.52s/it]
 75%|███████▌  | 6/8 [17:51<05:19, 160.00s/it]

MLP	Layer: 5	Accuracy: 60.974945533769066




ADA BOOST	Layer: 0	Accuracy: 79.11220043572985




ADA BOOST	Layer: 1	Accuracy: 76.44335511982571




ADA BOOST	Layer: 2	Accuracy: 78.67647058823529




ADA BOOST	Layer: 3	Accuracy: 76.49782135076254




ADA BOOST	Layer: 4	Accuracy: 72.09967320261438


100%|██████████| 6/6 [01:28<00:00, 14.76s/it]
 88%|████████▊ | 7/8 [19:19<02:16, 136.64s/it]

ADA BOOST	Layer: 5	Accuracy: 74.95915032679738


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LOGISTIC	Layer: 0	Accuracy: 75.95315904139433




LOGISTIC	Layer: 1	Accuracy: 56.11383442265795




LOGISTIC	Layer: 2	Accuracy: 57.16230936819172




LOGISTIC	Layer: 3	Accuracy: 57.16230936819172




LOGISTIC	Layer: 4	Accuracy: 60.92047930283224


100%|██████████| 6/6 [01:04<00:00, 10.73s/it]
100%|██████████| 8/8 [20:24<00:00, 153.01s/it]

LOGISTIC	Layer: 5	Accuracy: 57.89760348583878





In [4]:
pprint(Results)

{'ADA BOOST': {0: 79.11220043572985,
               1: 76.44335511982571,
               2: 78.67647058823529,
               3: 76.49782135076254,
               4: 72.09967320261438,
               5: 74.95915032679738},
 'DECISION TREE': {0: 97.08605664488017,
                   1: 96.62309368191721,
                   2: 96.25544662309368,
                   3: 96.29629629629629,
                   4: 96.3371459694989,
                   5: 96.35076252723312},
 'KNN': {0: 95.23420479302833,
         1: 94.0495642701525,
         2: 93.65468409586056,
         3: 93.69553376906318,
         4: 93.4776688453159,
         5: 93.88616557734206},
 'LINEAR SVM': {0: 82.33932461873638,
                1: 55.936819172113296,
                2: 57.01252723311547,
                3: 59.40904139433552,
                4: 58.10185185185185,
                5: 59.20479302832244},
 'LOGISTIC': {0: 75.95315904139433,
              1: 56.11383442265795,
              2: 57.16230936819172,
        

In [5]:
def BuildGenericModel(input_dimension, output_dimension, layerType, Kernel_Count = 64, Layer_Count = 2, Dense_Flag = False, Bidirectional_Flag = False):
    PredictorModel = Sequential()
    PredictorModel.add(layers.InputLayer((input_dimension, 1)))

    if Dense_Flag:
        # No Return Sequences for Dense Layer
        for i in range(1, Layer_Count):
            PredictorModel.add(layerType(Kernel_Count))

        # Final Layer
        PredictorModel.add(layerType(Kernel_Count))
        
    elif Bidirectional_Flag:
        # Return Sequences (Only Adds if More than 1 Layers)
        for i in range(1, Layer_Count):
            PredictorModel.add(layerType)

        # Add a Final SimpleRNN Layer at End of Bidirectional Layers
        PredictorModel.add(layers.SimpleRNN(Kernel_Count))

    else:
        # Return Sequences (Only Adds if More than 1 Layers)
        for i in range(1, Layer_Count):
            PredictorModel.add(layerType(Kernel_Count, return_sequences = True))
        
        # Final Layer
        PredictorModel.add(layerType(Kernel_Count))

    # Flatten Layer
    PredictorModel.add(layers.Flatten()) 

    # Output Dimension
    PredictorModel.add(layers.Dense(output_dimension, activation = "softmax"))

    # Compile Model
    PredictorModel.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])

    return PredictorModel

def buildModels(X_train_shape, Y_train_shape, Kernel = 64, Layer_Count = 2):
    LSTMModel = BuildGenericModel(X_train_shape[1], Y_train_shape[1], layers.LSTM, Kernel, Layer_Count)
    GRUModel = BuildGenericModel(X_train_shape[1], Y_train_shape[1], layers.GRU, Kernel, Layer_Count)
    SimpleRNNModel = BuildGenericModel(X_train_shape[1], Y_train_shape[1], layers.SimpleRNN, Kernel, Layer_Count)
    BiLSTMModel = BuildGenericModel(X_train_shape[1], Y_train_shape[1], layers.Bidirectional(layers.LSTM(Kernel, return_sequences = True)), Kernel, Layer_Count, Bidirectional_Flag = True)
    BiGRUModel = BuildGenericModel(X_train_shape[1], Y_train_shape[1], layers.Bidirectional(layers.GRU(Kernel, return_sequences = True)), Kernel, Layer_Count, Bidirectional_Flag = True)
    BiSimpleRNNModel = BuildGenericModel(X_train_shape[1], Y_train_shape[1], layers.Bidirectional(layers.SimpleRNN(Kernel, return_sequences = True)), Kernel, Layer_Count, Bidirectional_Flag = True)
    DenseModel = BuildGenericModel(X_train_shape[1], Y_train_shape[1], layers.Dense, Kernel, Layer_Count, Dense_Flag = True)

    return (LSTMModel, GRUModel, SimpleRNNModel, BiLSTMModel, BiGRUModel, BiSimpleRNNModel, DenseModel)


In [9]:

tf_X_train = minmax_scale(X_train, feature_range = (0, 1))
tf_X_test = minmax_scale(X_test, feature_range = (0, 1))

tf_y_train = to_categorical(y_train)
tf_y_test = to_categorical(y_test)

LSTM_Predictor, GRU_Predictor, RNN_Predictor, Bi_LSTM_Predictor, Bi_GRU_Predictor, Bi_RNN_Predictor, Dense_Predictor = buildModels(tf_X_train.shape, tf_y_train.shape)

In [7]:
TRAIN_EPOCHS = 100
BATCH_SIZE = 1024

classifiers = [
    ("LSTM", LSTM_Predictor), 
    ("GRU", GRU_Predictor), 
    ("RNN", RNN_Predictor), 
    ("BI LSTM", Bi_LSTM_Predictor), 
    ("BI GRU", Bi_GRU_Predictor), 
    ("BI RNN", Bi_RNN_Predictor), 
    ("DENSE", Dense_Predictor)
]

Results = {}

In [10]:
RBM_MULTIPLIER = 1 

for (name, _clf) in classifiers:

    for rbm_layer in range(0, 6):
        comb = []

        comb.append((f"mms0", MinMaxScaler()))

        # Constant
        for j in range(0, rbm_layer):
            
            comb.append((f"rbm{j}", BernoulliRBM(n_components = int(X_train.shape[1] * RBM_MULTIPLIER), learning_rate = 0.01, n_iter = 10, verbose = 0)))

            if j == rbm_layer - 1:
                comb.append((f"mms1", MinMaxScaler()))

        predictor = Pipeline(comb)
        
        copy_tf_X_train = predictor.fit_transform(tf_X_train)
        

        mcp_save = ModelCheckpoint(f"{name}_{rbm_layer}_Layers.keras", save_best_only = True, monitor = "accuracy", mode = "max")
        current_model = tf.keras.models.clone_model(_clf)
        current_model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
        history = current_model.fit(
            copy_tf_X_train, tf_y_train, 
            batch_size = BATCH_SIZE, 
            epochs = TRAIN_EPOCHS, 
            validation_split = 0.1,
            callbacks = [mcp_save])
        y_pred = np.argmax(current_model.predict(tf_X_test), axis = 1)
        accuracy = accuracy_score(y_test, y_pred) * 100
        
        if name not in Results:
            Results[name] = {}
        
        Results[name][rbm_layer] = {}
        Results[name][rbm_layer]["accuracy"] = accuracy
        Results[name][rbm_layer]["history"] = history.history
        
        print(f"{name}\tConstant\tLayer: {rbm_layer}\tAccuracy: {accuracy}")
        
        
        

with open("TensorflowAccuracySaves.pkl", "wb") as f:
    pickle.dump(Results, f)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [11]:
pprint(Results)

{'BI GRU': {0: {'accuracy': 93.50490196078431,
                'history': {'accuracy': [0.4973282217979431,
                                         0.609306275844574,
                                         0.6368752121925354,
                                         0.658911406993866,
                                         0.7234596014022827,
                                         0.7741523385047913,
                                         0.8049368858337402,
                                         0.8324112296104431,
                                         0.8502388000488281,
                                         0.8607367277145386,
                                         0.8677826523780823,
                                         0.871707558631897,
                                         0.8749231696128845,
                                         0.878186047077179,
                                         0.837234616279602,
                                         0.