In [37]:
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import optuna
import torch
from torch.autograd import Variable
import plotly
import tqdm as notebook_tqdm


In [2]:
x_train = pd.read_csv(
    "../data/processed/x_train_w_OHE.csv", index_col=0, dtype=str
)
x_test = pd.read_csv(
    "../data/processed/x_test_w_OHE.csv", index_col=0, dtype=str
)
y_train = pd.read_csv(
    "../data/processed/y_train.csv", index_col=0, dtype=float
).squeeze("columns").reset_index(drop=True)
y_test = pd.read_csv(
    "../data/processed/y_test.csv", index_col=0, dtype=float
).squeeze("columns").reset_index(drop=True)

x_train, x_valid, y_train, y_valid = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42)


In [3]:
x_train.head()

Unnamed: 0,circuit_name,deenergize_time,restoration_time,key_communities,hftd_tier,total_affected,residential_affected,zip_code,longitude,latitude,...,zip_is_96035,zip_is_96051,zip_is_96055,zip_is_96059,zip_is_96069,zip_is_96073,zip_is_96076,zip_is_96080,zip_is_96096,zip_is_96137
452,SWIFT2110,2019-10-10 00:05:00,2019-10-11 14:17:00,"SAN JOSE, LIVERMORE",2.0,2232.0,2047.0,95148,-121.796959988177,37.3225680192999,...,0,0,0,0,0,0,0,0,0,0
443,STATION E EUREKA1105,2019-10-09 01:20:00,2019-10-09 23:10:00,EUREKA,0.0,1618.0,1264.0,95501,-124.180313467792,40.7934324220744,...,0,0,0,0,0,0,0,0,0,0
1804,SILVERADO,2021-08-17 18:22:00,2021-08-18 23:55:00,NAPA,3.0,1815.0,1516.0,94574,-122.459110675812,38.4998179385502,...,0,0,0,0,0,0,0,0,0,0
340,OREGON TRAIL1103,2019-10-09 01:39:00,2019-10-11 11:32:00,"REDDING, BELLA VISTA",2.0,1706.0,1599.0,96003,-122.322060242415,40.619045588007,...,0,0,0,0,0,0,0,0,0,0
1390,OREGON TRAIL,2020-10-22 03:23:00,2020-10-23 11:30:00,"PALO CEDRO, REDDING",2.0,952.0,843.0,96003,-122.322060242415,40.619045588007,...,0,0,0,0,0,0,0,0,0,0


In [4]:
zip_cols = x_train.columns[
    [re.search('zip_is', col) is not None for col in x_train.columns]
]


In [5]:
def get_correct_types_x(df, numeric_cols):
    for col in ['deenergize_time', 'restoration_time']:
        df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')
    for col in numeric_cols:
        df[col] = df[col].astype(float)
    return df


numeric_cols = [
    'hftd_tier', 'total_affected', 'residential_affected',
    'longitude', 'latitude', 'total_pop', 'median_age', 'median_income',
    'white_pct', 'tmin_d-5', 'tmax_d-5', 'wspd_d-5', 'tmin_d-4', 'tmax_d-4',
    'wspd_d-4', 'tmin_d-3', 'tmax_d-3', 'wspd_d-3', 'tmin_d-2', 'tmax_d-2',
    'wspd_d-2', 'tmin_d-1', 'tmax_d-1', 'wspd_d-1'
]
x_train = get_correct_types_x(x_train, numeric_cols)
x_valid = get_correct_types_x(x_valid, numeric_cols)
x_test = get_correct_types_x(x_test, numeric_cols)
rel_x_train = x_train[numeric_cols]
rel_x_valid = x_valid[numeric_cols]
rel_x_test = x_test[numeric_cols]

scaler = StandardScaler()
scaler.fit(rel_x_train)
scaled_x_train = scaler.transform(rel_x_train)
scaled_x_valid = scaler.transform(rel_x_valid)
scaled_x_test = scaler.transform(rel_x_test)


In [25]:
class base_model(torch.nn.Module):

    def __init__(self, n_hidden_layers, n_hidden_units, p=0.5, activation=torch.nn.ReLU()):
        super(base_model, self).__init__()
        if n_hidden_layers == 0:
            self.linears = torch.nn.ModuleList([
                torch.nn.Linear(scaled_x_train.shape[1], 1)
            ])
            self.activation = activation
            self.dropout = torch.nn.Dropout(p)
        else:
            assert len(n_hidden_units) == n_hidden_layers
            self.layers = []

            for layer, n_units in enumerate(n_hidden_units):
                if layer == 0:
                    curr_layer = torch.nn.Linear(
                        scaled_x_train.shape[1], n_units)
                else:
                    curr_layer = torch.nn.Linear(
                        n_hidden_units[layer - 1], n_units)
                self.layers.append(curr_layer)
            self.layers.append(torch.nn.Linear(n_hidden_units[-1], 1))
            self.linears = torch.nn.ModuleList(self.layers)
            self.activation = activation

    def forward(self, x):
        for layer in self.linears:
            x = self.activation(layer(x))
        return x


In [7]:
# x = torch.from_numpy(scaled_x_train).float()
# y = torch.from_numpy(y_train.values.reshape(-1, 1)).float()

# inputs = Variable(x)
# targets = Variable(y)

# # base = base_model(1, [1], activation=torch.nn.Tanh())
# base = base_model(2, [6, 3])
# print(base)
# optimizer = torch.optim.Adagrad(base.parameters(), lr=0.2)
# loss_func = torch.nn.MSELoss()

# for i in range(100000):
#    prediction = base(inputs)
#    loss = loss_func(prediction, targets)
#    if i % 100 == 0:
#       print(loss)
#    optimizer.zero_grad()
#    loss.backward()
#    optimizer.step()


In [8]:
# pd.Series(prediction.detach().numpy().reshape(-1))

In [9]:
# np.sqrt(loss.detach().numpy())

In [10]:
# test_x = Variable(torch.from_numpy(scaled_x_test).float())
# test_y = Variable(torch.from_numpy(y_test.values.reshape(-1, 1)).float())
# test_predictions = base(test_x)
# valid_x = Variable(torch.from_numpy(scaled_x_valid).float())
# valid_y = Variable(torch.from_numpy(y_valid.values.reshape(-1, 1)).float())
# valid_predictions = base(valid_x)

In [11]:
# loss = loss_func(valid_predictions, valid_y)
# print(np.sqrt(loss.detach().numpy()))


In [12]:
# 1. Define an objective function to be maximized.
def objective(trial):

    # 2. Suggest values of the hyperparameters using a trial object.
    n_layers = trial.suggest_int('n_layers', 0, 3)
    n_hidden_units = [0] * n_layers
    print(n_layers)
    for i in range(n_layers):
        n_hidden_units[i] = trial.suggest_int(f"n_h_{i}", 1, 100)
    lr = trial.suggest_float("lr", 1e-5, 5e-1, log=True)
    n_epochs = trial.suggest_int("n_epochs", 1000, 100000)
    print(f"""Params:
          n_layers: {n_layers}
          n_hidden_units: {n_hidden_units}
          lr: {lr}
          n_epochs: {n_epochs}""")
        
    x = torch.from_numpy(scaled_x_train).float()
    y = torch.from_numpy(y_train.values.reshape(-1, 1)).float()

    inputs = Variable(x)
    targets = Variable(y)

    # base = base_model(1, [1], activation=torch.nn.Tanh())
    base = base_model(n_layers, n_hidden_units)
    optimizer = torch.optim.Adagrad(base.parameters(), lr=lr)
    loss_func = torch.nn.MSELoss()

    for i in range(n_epochs):
        prediction = base(inputs)
        loss = loss_func(prediction, targets)
        if i % 1000 == 0:
            print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
    valid_x = Variable(torch.from_numpy(scaled_x_valid).float())
    valid_y = Variable(torch.from_numpy(y_valid.values.reshape(-1, 1)).float())
    valid_predictions = base(valid_x)
    loss = loss_func(valid_predictions, valid_y)
    print(f"Final valid loss: {loss}")
    print("#################")
    return np.sqrt(loss.detach().numpy())


In [None]:
# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
pd.DataFrame.from_dict({"value": study.best_trial.values, "params": str(
    study.best_trial.params)}).to_csv("nn_hpo/run_1.csv", index=False)

fig = optuna.visualization.plot_optimization_history(study)
# fig.show()
fig.write_image("nn_hpo/run_1.png")


In [15]:
study.best_trial

FrozenTrial(number=34, values=[869.7608032226562], datetime_start=datetime.datetime(2022, 11, 25, 14, 54, 52, 398277), datetime_complete=datetime.datetime(2022, 11, 25, 14, 55, 24, 652445), params={'n_layers': 2, 'n_h_0': 25, 'n_h_1': 70, 'lr': 0.0065996011510886, 'n_epochs': 75122}, distributions={'n_layers': IntDistribution(high=3, log=False, low=0, step=1), 'n_h_0': IntDistribution(high=100, log=False, low=1, step=1), 'n_h_1': IntDistribution(high=100, log=False, low=1, step=1), 'lr': FloatDistribution(high=0.5, log=True, low=1e-05, step=None), 'n_epochs': IntDistribution(high=100000, log=False, low=1000, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=34, state=TrialState.COMPLETE, value=None)

In [32]:
# best_params_dict = {
#     "n_layers": 2,
#     "n_hidden_units": [47, 72],
#     "lr": 0.003220255014661397,
#     "n_epochs": 3584372,
#     "act_function": torch.nn.ReLU(),
#     "dropout": 0.003755084817386567
# }

best_params = pd.read_csv("nn_hpo/run_1.csv")
best_params_dict = eval(best_params["params"].values[0])

In [33]:
best_params_dict

{'n_layers': 2,
 'n_h_0': 46,
 'n_h_1': 96,
 'lr': 0.011578444404576697,
 'n_epochs': 44718}

In [46]:
x = torch.from_numpy(scaled_x_train).float()
y = torch.from_numpy(y_train.values.reshape(-1, 1)).float()

inputs = Variable(x)
targets = Variable(y)

base = base_model(1, [10])
# best = base_model(best_params_dict["n_layers"], 
#                   [46, 96],
#                   # best_params_dict["n_hidden_units"], 
#                   # activation=best_params_dict["act_function"],
#                   # p=best_params_dict["dropout"])
# )
print(base)
optimizer = torch.optim.Adagrad(base.parameters(), lr=best_params_dict["lr"])
loss_func = torch.nn.MSELoss()

# for i in range(best_params_dict["n_epochs"]):
for i in range(100000):
   prediction = base(inputs)
   loss = loss_func(prediction, targets)
   if i % 1000 == 0:
      print(loss)
   optimizer.zero_grad()
   loss.backward()
   optimizer.step()


base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9496224., grad_fn=<MseLossBackward0>)
tensor(9319894., grad_fn=<MseLossBackward0>)
tensor(9134559., grad_fn=<MseLossBackward0>)
tensor(8954075., grad_fn=<MseLossBackward0>)
tensor(8778147., grad_fn=<MseLossBackward0>)
tensor(8606591., grad_fn=<MseLossBackward0>)
tensor(8439298., grad_fn=<MseLossBackward0>)
tensor(8276132.5000, grad_fn=<MseLossBackward0>)
tensor(8116994.5000, grad_fn=<MseLossBackward0>)
tensor(7961791., grad_fn=<MseLossBackward0>)
tensor(7810437.5000, grad_fn=<MseLossBackward0>)
tensor(7662857., grad_fn=<MseLossBackward0>)
tensor(7518958., grad_fn=<MseLossBackward0>)
tensor(7378659.5000, grad_fn=<MseLossBackward0>)
tensor(7241892.5000, grad_fn=<MseLossBackward0>)
tensor(7108579.5000, grad_fn=<MseLossBackward0>)
tensor(6978643., grad_fn=<MseLossBackward0>)
tensor(6852013.5000, grad_

In [48]:
test_x = Variable(torch.from_numpy(scaled_x_test).float())
test_y = Variable(torch.from_numpy(y_test.values.reshape(-1, 1)).float())
test_predictions_base = base(test_x)
loss_base = loss_func(test_predictions_base, test_y)
baseline_rmse = np.sqrt(loss_base.detach().numpy())
print(baseline_rmse)


1716.3685


In [39]:
def calc_test_r2(pred_vals, true_vals, baseline_rmse):
    sse = mean_squared_error(pred_vals, true_vals) * len(true_vals)
    sst = (baseline_rmse ** 2) * len(true_vals)
    return (
        1 - sse / sst, np.sqrt(sse / len(true_vals)),
        mean_absolute_error(pred_vals, true_vals)
    )


In [49]:
calc_test_r2(test_predictions.detach().numpy(), y_test.values.reshape(-1, 1), baseline_rmse)

(0.7521819208473581, 854.4310777544767, 597.6407546802939)