In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras

print(tf. __version__) 
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

2.6.2
Num GPUs Available:  1


## Processing Data

In [2]:
# For NOAA CSV Files

# def get_features_and_target(csv_file):
#     df = pd.read_csv(f'../spark_output/colorado/{csv_file}')
#     df.drop(['year_month_day'], axis=1, inplace=True)

#     features = df.iloc[:, 1:-1].values
#     target = df.iloc[:, 0].values.reshape(-1,1)

#     assert features.shape[0] == target.shape[0]
#     assert target.shape[1] == 1

#     return features.astype('float64'), target.astype('float64')


In [3]:
def get_features_and_target(csv_file):
    df = pd.read_csv(f'../datasets/csu-weather-data/colorado/2020-2021/{csv_file}')
    df.drop(['date'], axis=1, inplace=True)
    columns = ['max_temperature', 'min_temperature', 'precipitation', 'snowfall']

    features = df[['max_temperature', 'min_temperature', 'precipitation']].values
    target = df['snowfall'].values.reshape(-1, 1)
    target

    assert features.shape[0] == target.shape[0]
    assert target.shape[1] == 1

    return features.astype('float64'), target.astype('float64')


In [4]:
def split_train_test_val(X, T):
    
    
    # Calculate standardization parameters 
    x_means = np.mean(X, axis=0)
    x_stds = np.std(X , axis=0)
    x_stds[x_stds == 0] = 1
    
    t_means = np.mean(T, axis=0)
    t_stds = np.std(T, axis=0)
    t_stds[t_stds == 0] = 1
    
    X = (X - x_means) / x_stds
    T = (T - t_means) / t_stds
    
    x_train, x_test, t_train, t_test = train_test_split(X, T, test_size=0.20, random_state=42)

    # 0.25 x 0.8 = 0.2
    x_train, x_validate, t_train, t_validate = train_test_split(x_train, t_train, test_size=0.25, random_state=42)

    assert x_train.shape[0] == t_train.shape[0]
    assert x_test.shape[0] == t_test.shape[0]
    assert x_validate.shape[0] == t_validate.shape[0]

    return [x_train, x_test, x_validate], [t_train, t_test, t_validate]

## Model Building

In [5]:
def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(3, )))
    
    # Tune the number of units in the first Dense layer
    hp_units = hp.Int('units', min_value=2, max_value=50, step=2)
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(1))
    
    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3, 1e-4, 5e-2, 5e-3, 5e-4])
    
              
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='mae',
                metrics=['mae'])
    return model

In [6]:
from IPython.display import clear_output
def run_model_search(county):
 
    features, target = get_features_and_target(county)
    X, T = split_train_test_val(features, target)
  
    x_train, x_test, x_validate = X
    t_train, t_test, t_validate = T
    
    
    tuner = kt.Hyperband(
        model_builder,
        objective='loss',
        max_epochs=10,
        factor=3,
        directory='SnowPredictions',
        project_name=county)
    
    
    stop_early = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    tuner.search(x_train, t_train, epochs=200, validation_data=(x_validate, t_validate), callbacks=[stop_early])
    
    # Get the optimal hyperparameters
    best_hps=tuner.get_best_hyperparameters(num_trials=100)[0]
    
    
    # Build the model with the optimal hyperparameters
    model = tuner.hypermodel.build(best_hps)
    history = model.fit(x_train, t_train, epochs=200, validation_data=(x_validate, t_validate))
    
    mae_per_epoch = history.history['mae']
    best_epoch = mae_per_epoch.index(max(mae_per_epoch)) + 1
    print('Best epoch: %d' % (best_epoch,))
    
    hypermodel = tuner.hypermodel.build(best_hps)
    
    # Retrain the model
    history = hypermodel.fit(x_train, t_train, epochs=200, validation_data=(x_validate, t_validate))
    history.history['county'] = county
    
    clear_output(wait=True)
    eval_result = hypermodel.evaluate(x_test, t_test)
    print("[test loss, test mae]:", eval_result)
    
    return hypermodel, history

## Graph Loss

In [7]:
def graph_loss(model_history):

    history = model_history.history
    
    county = history['county']
    county = county[:county.index('.')]
    train_loss = history['loss']
    val_loss = history['val_loss']

    epochs = range(1, len(train_loss) + 1)

    plt.plot(train_loss, '-b', label='Training loss')
    plt.plot(val_loss, '-g', label='Validation loss')
    plt.title(f"{county}'s Training and Validation Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.savefig(f'LossGraphs/{county}', bbox_inches='tight')
    plt.close()

## Running Experiment

In [8]:
from os import listdir
from os.path import isfile, join
counties_path = '../datasets/csu-weather-data/colorado/2020-2021/'
colorado_counties = [f for f in listdir(counties_path) if isfile(join(counties_path, f))]
colorado_counties

['park-bailey.csv',
 'fremont-canon-city.csv',
 'jackson-walden.csv',
 'grand-grand-lake-6-ssw.csv',
 'larimer-hourglass-reservoir.csv',
 'larimer-fort-collins.csv',
 'boulder-gross-reservoir.csv',
 'larimer-rustic-9.csv']

## All Models and History For All Counties

In [None]:
colorado_county_models = []
colorado_county_model_histories = []

for county in colorado_counties:
    model, history = run_model_search(county)
    graph_loss(history)
    colorado_county_models.append(model)
    colorado_county_model_histories.append(history)

Trial 30 Complete [00h 00m 01s]
loss: 0.16711686551570892

Best loss So Far: 0.09350863844156265
Total elapsed time: 00h 00m 25s
INFO:tensorflow:Oracle triggered exit
Epoch 1/200

2021-11-30 17:11:32.602687: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/200
Epoch 3/200
 1/14 [=>............................] - ETA: 0s - loss: 0.0951 - mae: 0.0951

2021-11-30 17:11:32.860320: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200


Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200


Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200


Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
Best epoch: 1
Epoch 1/200

2021-11-30 17:11:50.103979: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/200
Epoch 3/200
 1/14 [=>............................] - ETA: 0s - loss: 0.0973 - mae: 0.0973

2021-11-30 17:11:50.351510: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200


Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
 1/14 [=>............................] - ETA: 0s - loss: 0.0032 - mae: 0.0032

<br />
<br />
<br />

## For NOAA

In [None]:
# gunnison = "Gunnison.csv"
# fremont = "Fremont.csv"

### Gunnison

In [None]:
# gunnison_model, gunnison_history = run_model_search(gunnison)

In [None]:
# display_loss([gunnison_history])

### Fremont

In [None]:
# fremont_model, fremont_history = run_model_search(fremont)

In [None]:
# display_loss([fremont_history])