In [None]:
%load_ext autoreload
%autoreload 2
from copy import deepcopy
import os
os.chdir("/home/jakob/doktor/projects/EnsembleUncertainty/code")
"""Learing "logit" distribution in regression example"""
from pathlib import Path
from datetime import datetime
import logging
import numpy as np
import matplotlib.pyplot as plt
import torch

import src.dataloaders.uci.wine as uci_wine
import src.utils as utils
from src.distilled import logits_probability_distribution
from src.ensemble import ensemble
from src.ensemble import sep_regressor, mean_regressor, simple_regressor
import src.metrics as metrics

LOGGER = logging.getLogger(__name__)
EXPERIMENT_NAME = "regression_logits"

# Settings
class Args():
    pass
args = Args()
args.seed = 1
args.gpu = False
args.log_dir = Path("./logs")
args.log_level = logging.INFO
args.retrain = True

args.num_ensemble_members=10
args.num_epochs=100
args.lr = 0.01

# General constructs
rmse = metrics.Metric(name="RMSE", function=metrics.root_mean_squared_error)
BATCH_SIZE = 100

In [None]:
def make_plots(distilled_model, data):
    test_loader = torch.utils.data.DataLoader(data,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=0)

    predictions = np.zeros((data.n_samples, distilled_model.output_size))
    all_x = np.zeros((data.n_samples, 1))
    all_y = np.zeros((data.n_samples, 1))

    idx = 0
    for batch in test_loader:
        inputs, targets = batch

        predictions[idx * test_loader.batch_size:(idx + 1) * test_loader.batch_size, :, :] = \
            distilled_model.predict(inputs, t=None).data.numpy()

        all_x[idx * test_loader.batch_size:(idx + 1) *
              test_loader.batch_size, :] = inputs
        all_y[idx * test_loader.batch_size:(idx + 1) *
              test_loader.batch_size, :] = targets

        idx += 1

    plt.scatter(np.squeeze(all_x), np.squeeze(all_y), label="Data", marker=".")

    plt.errorbar(np.squeeze(all_x),
                 predictions[:, 0],
                 np.sqrt(predictions[:, 1]),
                 label="Distilled model predictions",
                 marker=".",
                 ls="none")

    plt.legend()
    plt.show()

In [None]:
log_file = Path("{}_{}.log".format(
    EXPERIMENT_NAME,
    datetime.now().strftime("%Y%m%d_%H%M%S")))
utils.setup_logger(log_path=Path.cwd() / args.log_dir / log_file,
                   log_level=args.log_level)
LOGGER.info("Args: {}".format(args))
device = utils.torch_settings(args.seed, args.gpu)
LOGGER.info("Creating dataloader")
training_data, validation_data = uci_wine.WineData(
    "~/doktor/datasets/UCI/wine/winequality-red.csv").create_train_val_split(
    training_samples_ratio=0.9)

input_size = 11
layer_sizes = [input_size, 50, 1]
ensemble_output_size = layer_sizes[-1] * 2
args.num_ensemble_members = 1
args.num_epochs=40
args.lr = 0.001
args.log_level = logging.INFO
train_loader = torch.utils.data.DataLoader(training_data,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=0)
validation_loader = torch.utils.data.DataLoader(validation_data,
                                                batch_size=BATCH_SIZE,
                                                shuffle=True,
                                                num_workers=0)

prob_ensemble = ensemble.Ensemble(ensemble_output_size)
for _ in range(args.num_ensemble_members):
    model = mean_regressor.MeanRegressor(layer_sizes,
                                       device=device,
                                       learning_rate=args.lr)
    prob_ensemble.add_member(model)
prob_ensemble.add_metrics([rmse])

prob_ensemble.train(train_loader, args.num_epochs, validation_loader=validation_loader)

In [None]:
log_file = Path("{}_{}.log".format(
    EXPERIMENT_NAME,
    datetime.now().strftime("%Y%m%d_%H%M%S")))
utils.setup_logger(log_path=Path.cwd() / args.log_dir / log_file,
                   log_level=args.log_level)
LOGGER.info("Args: {}".format(args))
device = utils.torch_settings(args.seed, args.gpu)
LOGGER.info("Creating dataloader")
training_data, validation_data = uci_wine.WineData(
    "~/doktor/datasets/UCI/wine/winequality-red.csv").create_train_val_split(
    training_samples_ratio=0.9)

input_size = 11
layer_sizes = [input_size, 100, 1]
ensemble_output_size = layer_sizes[-1] * 2
args.num_ensemble_members = 5
args.num_epochs=40
args.lr = 0.00001
args.log_level = logging.DEBUG
train_loader = torch.utils.data.DataLoader(training_data,
                                           batch_size=32,
                                           shuffle=True,
                                           num_workers=0)

validation_loader = torch.utils.data.DataLoader(validation_data,
                                           batch_size=32,
                                           shuffle=True,
                                           num_workers=0)

prob_ensemble = ensemble.Ensemble(ensemble_output_size)
for _ in range(args.num_ensemble_members):
    model = sep_regressor.SepRegressor(layer_sizes,
                                       device=device,
                                       learning_rate=args.lr)
    prob_ensemble.add_member(model)
prob_ensemble.add_metrics([rmse])
for member in prob_ensemble.members:
        member.mean_only = True
        
prob_ensemble.train(train_loader, args.num_epochs, validation_loader=validation_loader)

In [None]:
prob_ensemble.members[0]._validate_epoch(validation_loader=validation_loader)

In [None]:
log_file = Path("{}_{}.log".format(
    EXPERIMENT_NAME,
    datetime.now().strftime("%Y%m%d_%H%M%S")))
utils.setup_logger(log_path=Path.cwd() / args.log_dir / log_file,
                   log_level=args.log_level)
LOGGER.info("Args: {}".format(args))
device = utils.torch_settings(args.seed, args.gpu)
LOGGER.info("Creating dataloader")
training_data, validation_data = uci_wine.WineData(
    "~/doktor/datasets/UCI/wine/winequality-red.csv").create_train_val_split(
    training_samples_ratio=0.7)

input_size = 11
layer_sizes = [input_size, 50, 50, 2]
ensemble_output_size = layer_sizes[-1]
args.num_ensemble_members = 1
args.num_epochs=40
args.lr = 0.001
args.log_level = logging.DEBUG
train_loader = torch.utils.data.DataLoader(training_data,
                                           batch_size=32,
                                           shuffle=True,
                                           num_workers=0)
validation_loader = torch.utils.data.DataLoader(validation_data,
                                                batch_size=32,
                                                shuffle=True,
                                                num_workers=0)

prob_ensemble = ensemble.Ensemble(ensemble_output_size)
for _ in range(args.num_ensemble_members):
    model = simple_regressor.SimpleRegressor(layer_sizes,
                                       device=device,
                                       learning_rate=args.lr)
    model.mean_only = True
    prob_ensemble.add_member(model)
prob_ensemble.add_metrics([rmse])
prob_ensemble.train(train_loader, args.num_epochs, validation_loader=validation_loader)

In [None]:
for member in prob_ensemble.members:
        member.mean_only = False
prob_ensemble.train(train_loader, args.num_epochs, validation_loader=validation_loader)