In [186]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np


from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import activations

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [187]:
keras.losses.CategoricalCrossentropy(from_logits=True).name

'categorical_crossentropy'

In [188]:
qc_raw_data_df = pd.read_csv("QC_2016_01_01_TO_2018_08_31.csv")
qc_raw_data_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tempmax,974.0,32.170329,2.092003,25.9,30.9,32.0,33.8,38.3
tempmin,974.0,24.803285,1.422087,18.0,24.0,25.0,25.8,28.0
temp,974.0,28.304209,1.473135,23.8,27.3,28.2,29.3,32.3
feelslikemax,974.0,37.993429,3.596872,25.9,35.5,38.5,40.6,54.9
feelslikemin,974.0,25.070534,2.026257,18.0,24.0,25.0,25.8,33.2
feelslike,974.0,31.719302,3.0597,23.8,29.3,31.8,34.375,38.4
dew,974.0,23.710678,2.099124,16.1,22.2,24.3,25.4,27.3
humidity,974.0,77.893634,10.132088,56.9,69.425,78.0,86.175,98.9
precip,974.0,7.687579,19.881731,0.0,0.0,0.2015,5.88625,252.496
precipprob,974.0,66.529774,47.21287,0.0,0.0,100.0,100.0,100.0


In [189]:
qc_data = qc_raw_data_df.copy()
qc_data.columns

Index(['name', 'datetime', 'tempmax', 'tempmin', 'temp', 'feelslikemax',
       'feelslikemin', 'feelslike', 'dew', 'humidity', 'precip', 'precipprob',
       'precipcover', 'preciptype', 'snow', 'snowdepth', 'windgust',
       'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'sunrise',
       'sunset', 'moonphase', 'conditions', 'description', 'icon', 'stations'],
      dtype='object')

In [190]:
features = ["tempmin", "tempmax", "temp", "feelslikemax", "feelslikemin", "feelslike", "dew", "humidity", "windspeed", "cloudcover", "visibility"]

# Weather Condition
target = ["conditions"]
# target = ["conditions", "description"]

# Precipitation Coverage, Amount, Probability
# target = ["precipcover"]
# target = ["precipprob", "precipcover"]
# target = ["precipprob", "precipcover", "preciptype"]

X = qc_data[features]
X.head()


Unnamed: 0,tempmin,tempmax,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,windspeed,cloudcover,visibility
0,24.0,29.9,26.3,34.1,24.0,27.9,23.5,85.3,20.3,81.7,7.7
1,23.5,30.9,27.3,36.4,23.5,29.4,23.3,79.6,20.3,50.5,9.4
2,23.6,32.5,27.8,35.8,23.6,29.2,22.2,74.2,18.4,36.6,9.4
3,22.1,32.5,27.4,35.8,22.1,28.9,21.0,69.3,16.5,32.4,9.3
4,22.4,31.0,27.1,33.8,22.4,28.2,21.0,71.1,20.1,32.2,9.4


In [191]:
X.isna().sum()

tempmin         0
tempmax         0
temp            0
feelslikemax    0
feelslikemin    0
feelslike       0
dew             0
humidity        0
windspeed       0
cloudcover      0
visibility      0
dtype: int64

In [192]:
Y = qc_data[target]
Y["conditions"].unique()


array(['Rain, Partially cloudy', 'Partially cloudy', 'Rain, Overcast',
       'Overcast'], dtype=object)

In [193]:
Y = Y.conditions.to_list()
Y

['Rain, Partially cloudy',
 'Rain, Partially cloudy',
 'Rain, Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Rain, Partially cloudy',
 'Partially cloudy',
 'Rain, Partially cloudy',
 'Rain, Partially cloudy',
 'Rain, Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Rain, Partially cloudy',
 'Rain, Partially cloudy',
 'Rain, Overcast',
 'Rain, Partially cloudy',
 'Partially cloudy',
 'Rain, Partially cloudy',
 'Rain, Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Partially cloudy',
 'Part

In [194]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
Le = LabelEncoder()
Le.fit(Y_train)
y_train_encoded = Le.transform(Y_train)
y_test_encoded = Le.transform(Y_test)
x_train_encoded = X_train.to_numpy()
x_test_encoded = X_test.to_numpy()

In [195]:
np.unique(y_train_encoded)

array([0, 1, 2, 3])

In [196]:
x_train_encoded.shape

(779, 11)

In [197]:
es_function = EarlyStopping(
    patience = 5,
    min_delta = 0.001,
    restore_best_weights = True
)
checkpoint_filepath = "tmp/best_weights"

model_checkpoint_function = ModelCheckpoint(
    filepath = checkpoint_filepath,
    save_weights_only = True,
    monitor = 'val_accuracy',
    mode = 'max',
    save_best_only=True
)

In [199]:
ann_model = Sequential(
    [
        Flatten(input_shape=[11]),
        Dense(128, activation=activations.relu),
        #Dense(512, activation="sigmoid"),
        #Dense(512, activation="sigmoid"),
        #Dense(512, activation="sigmoid"),
        Dense(4)
    ]
)

print(ann_model.summary())

loss_function = keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer_function = keras.optimizers.SGD(learning_rate=0.0001)
metrics = ["accuracy"]

ann_model.compile(loss=loss_function, optimizer=optimizer_function, metrics=metrics)

batch_size = 512
epochs = 100

history = ann_model.fit(x_train_encoded, y_train_encoded, 
              #batch_size=batch_size, 
              epochs=epochs, 
              validation_data = (x_test_encoded, y_test_encoded),
              callbacks = [model_checkpoint_function],
              shuffle=True, verbose=2)

Model: "sequential_25"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_25 (Flatten)        (None, 11)                0         
                                                                 
 dense_96 (Dense)            (None, 128)               1536      
                                                                 
 dense_97 (Dense)            (None, 4)                 516       
                                                                 
Total params: 2,052
Trainable params: 2,052
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/100


ValueError: in user code:

    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1024, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1082, in compute_loss
        return self.compiled_loss(
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 152, in __call__
        losses = call_fn(y_true, y_pred)
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 284, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 2004, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "c:\Users\Alaric\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\backend.py", line 5532, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 4) are incompatible


In [None]:
history.params

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[0:, ['loss', 'val_loss']].plot()
print(("Minimum Validation Loss: {:0.4f}").format(history_df['val_loss'].min()))

plt.show()


In [None]:
history_df.loc[0:, ['accuracy', 'val_accuracy']].plot()
print(("Maximum Obtained Accuracy: {:0.4f}").format(history_df['val_accuracy'].max()))

plt.show()

#0.8051 without early stopping
#0.7795 with early stopping

In [None]:
ann_model.load_weights(checkpoint_filepath)
ann_model.predict(x_test_encoded).flatten()