In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras

print(tf. __version__) 
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

## Processing Data

In [None]:
# For NOAA CSV Files

# def get_features_and_target(csv_file):
#     df = pd.read_csv(f'../spark_output/colorado/{csv_file}')
#     df.drop(['year_month_day'], axis=1, inplace=True)

#     features = df.iloc[:, 1:-1].values
#     target = df.iloc[:, 0].values.reshape(-1,1)

#     assert features.shape[0] == target.shape[0]
#     assert target.shape[1] == 1

#     return features.astype('float64'), target.astype('float64')


In [None]:
def get_features_and_target(csv_file):
    df = pd.read_csv(f'../datasets/csu-weather-data/colorado/2020-2021/{csv_file}')
    df.drop(['date'], axis=1, inplace=True)
    columns = ['max_temperature', 'min_temperature', 'precipitation', 'snowfall']

    features = df[['max_temperature', 'min_temperature', 'precipitation']].values
    target = df['snowfall'].values.reshape(-1, 1)
    target

    assert features.shape[0] == target.shape[0]
    assert target.shape[1] == 1

    return features.astype('float64'), target.astype('float64')


In [None]:
def split_train_test_val(X, T):
    
    
    # Calculate standardization parameters 
    x_means = np.mean(X, axis=0)
    x_stds = np.std(X , axis=0)
    x_stds[x_stds == 0] = 1
    
    t_means = np.mean(T, axis=0)
    t_stds = np.std(T, axis=0)
    t_stds[t_stds == 0] = 1
    
    X = (X - x_means) / x_stds
    T = (T - t_means) / t_stds
    
    x_train, x_test, t_train, t_test = train_test_split(X, T, test_size=0.20, random_state=42)

    # 0.25 x 0.8 = 0.2
    x_train, x_validate, t_train, t_validate = train_test_split(x_train, t_train, test_size=0.25, random_state=42)

    assert x_train.shape[0] == t_train.shape[0]
    assert x_test.shape[0] == t_test.shape[0]
    assert x_validate.shape[0] == t_validate.shape[0]

    return [x_train, x_test, x_validate], [t_train, t_test, t_validate]

## Model Building

In [None]:
def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(3, )))
    
    # Tune the number of units in the first Dense layer
    hp_units = hp.Int('units', min_value=2, max_value=50, step=2)
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(1))
    
    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3, 1e-4, 5e-2, 5e-3, 5e-4])
    
              
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='mae',
                metrics=['mae'])
    return model

In [None]:
from IPython.display import clear_output
def run_model_search(county):
 
    features, target = get_features_and_target(county)
    X, T = split_train_test_val(features, target)
  
    x_train, x_test, x_validate = X
    t_train, t_test, t_validate = T
    
    
    tuner = kt.Hyperband(
        model_builder,
        objective='loss',
        max_epochs=10,
        factor=3,
        directory='SnowPredictions',
        project_name=county)
    
    
    stop_early = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    tuner.search(x_train, t_train, epochs=200, validation_data=(x_validate, t_validate), callbacks=[stop_early])
    
    # Get the optimal hyperparameters
    best_hps=tuner.get_best_hyperparameters(num_trials=100)[0]
    
    
    # Build the model with the optimal hyperparameters
    model = tuner.hypermodel.build(best_hps)
    history = model.fit(x_train, t_train, epochs=200, validation_data=(x_validate, t_validate))
    
    mae_per_epoch = history.history['mae']
    best_epoch = mae_per_epoch.index(max(mae_per_epoch)) + 1
    print('Best epoch: %d' % (best_epoch,))
    
    hypermodel = tuner.hypermodel.build(best_hps)
    
    # Retrain the model
    history = hypermodel.fit(x_train, t_train, epochs=200, validation_data=(x_validate, t_validate))
    history.history['county'] = county
    
    clear_output(wait=True)
    eval_result = hypermodel.evaluate(x_test, t_test)
    print("[test loss, test mae]:", eval_result)
    
    return hypermodel, history

## Graph Loss

In [None]:
def graph_loss(model_history):

    history = model_history.history
    
    county = history['county']
    county = county[:county.index('.')]
    train_loss = history['loss']
    val_loss = history['val_loss']

    epochs = range(1, len(train_loss) + 1)

    plt.plot(train_loss, '-b', label='Training loss')
    plt.plot(val_loss, '-g', label='Validation loss')
    plt.title(f"{county}'s Training and Validation Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.savefig(f'LossGraphs/{county}', bbox_inches='tight')
    plt.close()

## Running Experiment

In [None]:
from os import listdir
from os.path import isfile, join
counties_path = '../datasets/csu-weather-data/colorado/2020-2021/'
colorado_counties = [f for f in listdir(counties_path) if isfile(join(counties_path, f))]
colorado_counties

## All Models and History For All Counties

In [None]:
colorado_county_models = []
colorado_county_model_histories = []

for county in colorado_counties:
    model, history = run_model_search(county)
    graph_loss(history)
    colorado_county_models.append(model)
    colorado_county_model_histories.append(history)

<br />
<br />
<br />

## For NOAA

In [None]:
# gunnison = "Gunnison.csv"
# fremont = "Fremont.csv"

### Gunnison

In [None]:
# gunnison_model, gunnison_history = run_model_search(gunnison)

In [None]:
# display_loss([gunnison_history])

### Fremont

In [None]:
# fremont_model, fremont_history = run_model_search(fremont)

In [None]:
# display_loss([fremont_history])