In [None]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np


from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import activations

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
keras.losses.CategoricalCrossentropy(from_logits=True).name

In [None]:
qc_raw_data_df = pd.read_csv("QC_2016_01_01_TO_2018_08_31.csv")
qc_raw_data_df.describe().T

In [None]:
qc_data = qc_raw_data_df.copy()
qc_data.columns

In [None]:
features = ["temp", "feelslike", "dew", "humidity", "windspeed", "cloudcover", "visibility"]

# Weather Condition
target = ["conditions"]
# target = ["conditions", "description"]

# Precipitation Coverage, Amount, Probability
# target = ["precipcover"]
# target = ["precipprob", "precipcover"]
# target = ["precipprob", "precipcover", "preciptype"]

X = qc_data[features]
X.head()


In [None]:
X.isna().sum()

In [None]:
Y = qc_data[target]
Y["conditions"].unique()


In [None]:
Y = Y.conditions.to_list()
Y

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
Le = LabelEncoder()
Le.fit(Y_train)
y_train_encoded = Le.transform(Y_train)
y_test_encoded = Le.transform(Y_test)
x_train_encoded = X_train.to_numpy()
x_test_encoded = X_test.to_numpy()

In [None]:
np.unique(y_train_encoded)

In [None]:
x_train_encoded.shape

In [None]:
es_function = EarlyStopping(
    patience = 5,
    min_delta = 0.001,
    restore_best_weights = True
)
checkpoint_filepath = "tmp/best_weights"

model_checkpoint_function = ModelCheckpoint(
    filepath = checkpoint_filepath,
    save_weights_only = True,
    monitor = 'val_accuracy',
    mode = 'max',
    save_best_only=True
)

In [None]:
ann_model = Sequential(
    [
        Flatten(input_shape=[11]),
        Dense(128, activation=activations.relu),
        #Dense(512, activation="sigmoid"),
        #Dense(512, activation="sigmoid"),
        #Dense(512, activation="sigmoid"),
        Dense(4)
    ]
)

print(ann_model.summary())

loss_function = keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer_function = keras.optimizers.SGD(learning_rate=0.0001)
metrics = ["accuracy"]

ann_model.compile(loss=loss_function, optimizer=optimizer_function, metrics=metrics)

batch_size = 512
epochs = 100

history = ann_model.fit(x_train_encoded, y_train_encoded, 
              #batch_size=batch_size, 
              epochs=epochs, 
              validation_data = (x_test_encoded, y_test_encoded),
              callbacks = [model_checkpoint_function],
              shuffle=True, verbose=2)

In [None]:
history.params

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[0:, ['loss', 'val_loss']].plot()
print(("Minimum Validation Loss: {:0.4f}").format(history_df['val_loss'].min()))

plt.show()


In [None]:
history_df.loc[0:, ['accuracy', 'val_accuracy']].plot()
print(("Maximum Obtained Accuracy: {:0.4f}").format(history_df['val_accuracy'].max()))

plt.show()

#0.8051 without early stopping
#0.7795 with early stopping

In [None]:
ann_model.load_weights(checkpoint_filepath)
ann_model.predict(x_test_encoded).flatten()