In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras

print(tf. __version__) 
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

2.6.2
Num GPUs Available:  1


## Processing Data

In [2]:
def get_features_and_target(csv_file):
    df = pd.read_csv(f'../datasets/csu-weather-data/colorado/2020-2021/{csv_file}')
    df.drop(['date'], axis=1, inplace=True)
    columns = ['max_temperature', 'min_temperature', 'precipitation', 'snowfall']

    features = df[['max_temperature', 'min_temperature', 'precipitation']].values
    target = df['snowfall'].values.reshape(-1, 1)
    target

    assert features.shape[0] == target.shape[0]
    assert target.shape[1] == 1

    return features.astype('float64'), target.astype('float64')


In [3]:
def split_train_test_val(X, T):
    
    
    # Calculate standardization for cut_off
    t_means = np.mean(T, axis=0)
    t_stds = np.std(T, axis=0)
    t_stds[t_stds == 0] = 1
    
    cut_off = t_stds * 3
    lower, upper = t_means - cut_off, t_means + cut_off
    
    # outliers
    outliers_index = [i for i in range(T.size) if T[i] < lower or T[i] > upper]
    T = np.delete(T, outliers_index).reshape(-1,1)
    X = np.delete(X, outliers_index, axis=0).reshape(-1,3)

    
    # Now standardize
    x_means = np.mean(X, axis=0)
    x_stds = np.std(X , axis=0)
    x_stds[x_stds == 0] = 1
    
    t_means = np.mean(T, axis=0)
    t_stds = np.std(T, axis=0)
    t_stds[t_stds == 0] = 1

    X = (X - x_means) / x_stds
    T = (T - t_means) / t_stds
    
    x_train, x_test, t_train, t_test = train_test_split(X, T, test_size=0.20, random_state=42)

    assert x_train.shape[0] == t_train.shape[0]
    assert x_test.shape[0] == t_test.shape[0]

    return [x_train, x_test], [t_train, t_test]

## Model Building

In [4]:
def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(3, )))
    
    # Tune the number of units in the first Dense layer
    hp_units = hp.Int('units', min_value=2, max_value=128, step=2)
    
#     kernel_regularizer=tf.keras.regularizers.l2(0.001), 
    
    model.add(keras.layers.Dense(units=hp_units, 
                                 
                                 activation='relu'))
#     model.add(tf.keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(units=hp_units, 
                                  
                                 activation='relu'))
#     model.add(tf.keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(1))
    
    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 5e-2, 5e-3])
    
              
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='mse',
                metrics=['mse'])
    return model

In [5]:
def run_model_search(county):
 
    features, target = get_features_and_target(county)
    X, T = split_train_test_val(features, target)
  
    x_train, x_test = X
    t_train, t_test = T
    
    batch_size=64
    epochs=150
    
    tuner = kt.Hyperband(
        model_builder,
        objective='val_loss',
        max_epochs=50,
        factor=3,
        directory='SnowPredictions',
        project_name=county)
    
    
    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5)
    tuner.search(x_train, t_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)
    
    # Get the optimal hyperparameters
    best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
    
    
    # Build the model with the optimal hyperparameters
    model = tuner.hypermodel.build(best_hps)
    model.fit(x_train, t_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[stop_early])
    
    hypermodel = tuner.hypermodel.build(best_hps)
    
    # Retrain the model
    history = hypermodel.fit(x_train, t_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[stop_early])
    history.history['county'] = county
    
    eval_result = hypermodel.evaluate(x_test, t_test)
    print("[test loss, test mse]:", eval_result)
    
    return hypermodel, history

## Graph Loss

In [6]:
def graph_loss(model_history):

    history = model_history.history
    
    county = history['county']
    county = county[:county.index('.')]
    train_loss = history['loss']
    val_loss = history['val_loss']

    epochs = range(1, len(train_loss) + 1)

    plt.plot(train_loss, '-b', label='Training loss')
    plt.plot(val_loss, '-', color='orange', label='Validation loss')
    plt.title(f"{county}'s Training and Validation Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.savefig(f'SnowLossGraphs/{county}', bbox_inches='tight')
    plt.close()

## Running Experiment

In [7]:
from os import listdir
from os.path import isfile, join
counties_path = '../datasets/csu-weather-data/colorado/2020-2021/'
colorado_counties = [f for f in listdir(counties_path) if isfile(join(counties_path, f))]
colorado_counties

['park-bailey.csv',
 'fremont-canon-city.csv',
 'jackson-walden.csv',
 'grand-grand-lake-6-ssw.csv',
 'larimer-hourglass-reservoir.csv',
 'larimer-fort-collins.csv',
 'boulder-gross-reservoir.csv',
 'larimer-rustic-9.csv']

## All Models and History For All Counties

In [8]:
colorado_county_models = []
colorado_county_model_histories = []

for county in colorado_counties:
    model, history = run_model_search(county)
    graph_loss(history)
    colorado_county_models.append(model)
    colorado_county_model_histories.append(history)

Trial 51 Complete [00h 00m 01s]
val_loss: 0.1914362758398056

Best val_loss So Far: 0.1914362758398056
Total elapsed time: 00h 00m 28s

Search: Running Trial #52

Hyperparameter    |Value             |Best Value So Far 
units             |122               |116               
learning_rate     |0.01              |0.01              
tuner/epochs      |50                |50                
tuner/initial_e...|17                |17                
tuner/bracket     |3                 |3                 
tuner/round       |3                 |3                 
tuner/trial_id    |e95fd154ffee3c1...|449fc82b0f399f1...

Epoch 18/50

2021-12-01 13:24:47.057696: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-12-01 13:24:47.231905: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50

KeyboardInterrupt: 

In [None]:
colorado_county_models[0].summary()

<br />
<br />
<br />