In [1]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
from dataset import Dataset, to_device
from model import NNSingleFeatureModel, NNMeanAnomalyModel, NNBiEpochBiasModel
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import pickle
import normalize_data
import random
from time import time
from tqdm.notebook import tqdm

pd.set_option('display.max_columns', 999)
pd.set_option('display.precision', 10)

In [2]:
raw_data = {} # loads raw data and stores as a dict cache

def dataset_key(dataset='', validation=False):
    return dataset+('test' if validation else 'train')


def load_data(raw, dataset='', validation=False):
    '''
    Return dataframe matching data set and validation. Dictionary input will be updated.

    Parameters
    ----------
    raw : dict
        dictionary which caches the dataframes and will be updated accordingly

    dataset : str
        which dataset to use? valid input includes: empty str for full set, sample_, and secret_

    validation : bool
        load validation set? if true then use _test, otherwise use _train.  Note secret_ doesn't have _train
    '''
    key = dataset+('test' if validation else 'train')
    if key not in raw:
        print(f"Loading data to cache for: {key}")
        raw[key] = pd.read_pickle(f'{os.environ["GP_HIST_PATH"]}/../t5_data/{key}.pkl')
    return raw[key]

In [3]:
def load_sub_model_with_config(train_config, model_configs, sub_model_key, X_count=0, force_recreate=False):
    # a bit hacky, but in the training phase, we never load and use the minmax scalers
    # just putting it here for when we want to load the model elsewhere THEN revert scaling
    # probably better to have the scalers saved separately....

    path = train_config['model_path']
    prefix = train_config['model_prefix']
    model_config = model_configs[sub_model_key]
    f = f"{path}/{prefix}{sub_model_key}.pth"
    if os.path.exists(f) and not force_recreate:
        print("Loading existing model")
        checkpoint = torch.load(f)
        net = checkpoint['net']
        loss_func = checkpoint['loss_func']
        optimizer = checkpoint['optimizer']
        mean_losses = checkpoint['mean_losses']
        next_epoch = checkpoint['next_epoch']
    else:
        if X_count == 0:
            raise Exception('Cannot create model without training_set')
        print("New model created")
        net = NNSingleFeatureModel(X_count,
                                   model_config['head_out_size'],
                                   model_config['feature_index'],
                                   model_config['epoch_diff_index'],
                                   model_config['mag_index'],
                                   model_config['alternate_model'],
                                   model_config['model_definition'])
        loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss
#         loss_func = torch.nn.L1Loss()
        # just setup a dummy optimizer and override it later
        optimizer = torch.optim.Adam(net.parameters())
#         optimizer = torch.optim.SGD(net.parameters(), lr=config['lr'], momentum=config['momentum'], weight_decay=config['weight_decay'])
        mean_losses = []
        next_epoch = 0
        save_model_with_config(train_config, sub_model_key, net=net, loss_func=loss_func,
                               optimizer=optimizer, mean_losses=mean_losses, next_epoch=next_epoch )
    return net, loss_func, optimizer, mean_losses, next_epoch

def save_model_with_config(train_config, sub_model_key, **kwargs):
    path = train_config['model_path']
    prefix = train_config['model_prefix']
    f = f"{path}/{prefix}{sub_model_key}.pth"
    torch.save(kwargs, f)

In [4]:
def train_model(X_train, y_train, X_test, y_test, train_config, model_configs, sub_model_key, force_optimizer=None):
    torch.manual_seed(train_config.get('random_seed',0))
    device = train_config.get('device','cpu')
    pyt_device = torch.device(device)
    training_set = Dataset(X_train, y_train)
    training_generator = torch.utils.data.DataLoader(training_set, **train_config['train_params'])
    testing_set = Dataset(X_test, y_test)
    testing_generator = torch.utils.data.DataLoader(testing_set, **train_config['test_params'])
    net, loss_func, optimizer, mean_losses, next_epoch = load_sub_model_with_config(train_config, model_configs, sub_model_key, training_set)
    
    if force_optimizer != None:
        optimizer = force_optimizer
    to_device(net, pyt_device)
    net.train()
#     print(net)
    if next_epoch == train_config['max_epochs']:
        print("Model finished training. To retrain set force_train = True ")
        net.eval()
        return net, mean_losses
    
    epbar = tqdm(range(next_epoch, train_config['max_epochs']))
    for epoch in epbar:
        epbar.set_description(f"Epoch {epoch}")

        running_eloss = 0
        running_vloss = 0

        ipbar = tqdm(training_generator, leave=False)
        ipbar.set_description(f"Training")

        for i, (x, y) in enumerate(ipbar):
            x = to_device(x, pyt_device)
            y = to_device(y, pyt_device)

            optimizer.zero_grad()
            prediction = net(x)     # input x and predict based on x
            loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)
            loss.backward()         # backpropagation, compute gradients
            optimizer.step()        # apply gradients
            running_eloss += loss.item()

        net.eval()
        mean_vlosses = 0
        if train_config['do_validate']:
            with torch.set_grad_enabled(False):
                vpbar = tqdm(testing_generator, leave=False)
                vpbar.set_description("Validating")
                for i, (x, y) in enumerate(vpbar):
                    x = to_device(x, pyt_device)
                    y = to_device(y, pyt_device)
                    prediction = net(x)
                    loss = loss_func(prediction, y)
                    running_vloss += loss.item()
            mean_vlosses = running_vloss / len(testing_generator)

        path = train_config['model_path']
        prefix = train_config['model_prefix']
        f = f"{path}/model_history/{prefix}{sub_model_key}_{next_epoch:04}.pth"
        torch.save(net, f)

        mean_elosses = running_eloss / len(training_generator)
        mean_losses.append((mean_elosses, mean_vlosses))
        next_epoch = len(mean_losses)
        
        save_model_with_config(train_config, sub_model_key, net=net, loss_func=loss_func,
                               optimizer=optimizer, mean_losses=mean_losses, next_epoch=next_epoch )
        net.train()

        epbar.set_postfix({'train_loss':f"{mean_elosses:.12f}", 'val_loss':f"{mean_vlosses:.12f}"})
    net.eval()
    return net, mean_losses

In [5]:
def get_ref_X_y(df):
    ref_cols = [c for c in df.columns if c.startswith('__')]
    X_cols = [c for c in df.columns if c.startswith('X_')]
    y_cols = [c for c in df.columns if c.startswith('y_')]
    return (df[ref_cols], df[X_cols], df[y_cols])

In [6]:
train_config = {
    'dataset' : 'sample2_', # eg: '', 'sample_', 'sample2_', 'secret_'
    'model_path' : f"{os.environ['GP_HIST_PATH']}/../t5_models",
    'model_prefix' : "TRY_2_", 
    'device' : 'cpu',
    'random_seed' : 0,
    'max_epochs' : 500,
    'do_validate' : True,
    'train_params' : {
        'batch_size': 100000,
        'shuffle': True,
        'num_workers': 4,
        'pin_memory': True,
    },
    'test_params' : {
        'batch_size': 200000,
        'num_workers': 4,
        'pin_memory': True,
    },
}

In [7]:
%%time

train_df = normalize_data.normalize_all_columns(load_data(raw_data,dataset=train_config['dataset'],validation=False)).dropna()
test_df = normalize_data.normalize_all_columns(load_data(raw_data,dataset=train_config['dataset'],validation=True)).dropna()
ref_train, X_train, y_train = get_ref_X_y(train_df)
ref_test, X_test, y_test = get_ref_X_y(test_df)
y_cols = ['y_INCLINATION', 'y_ECCENTRICITY', 'y_MEAN_MOTION', 'y_RA_OF_ASC_NODE_REG', 'y_ARG_OF_PERICENTER_REG', 'y_REV_MA_REG']
y_train = y_train[y_cols]
y_test = y_test[y_cols]

Loading data to cache for: sample_train
Loading data to cache for: sample_test
CPU times: user 9.94 s, sys: 10.8 s, total: 20.8 s
Wall time: 17.2 s


In [8]:
model_configs = {
    'y_INCLINATION': { 
        'feature_index': X_train.columns.get_loc('X_INCLINATION_1'),
        'epoch_diff_index': X_train.columns.get_loc('X_delta_EPOCH'),
        'mag_index': None,
        'alternate_model': NNBiEpochBiasModel,
        'model_definition' : [
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
            ('layer', 100), ('relu', None), ('drop', 0.5),
        ], 'head_out_size': 100 },
    'y_ECCENTRICITY': {
        'feature_index': X_train.columns.get_loc('X_ECCENTRICITY_1'),
        'epoch_diff_index': X_train.columns.get_loc('X_delta_EPOCH'),
        'mag_index': None,
        'alternate_model': NNBiEpochBiasModel,
        'model_definition' : [
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
            ('layer', 150), ('relu', None), ('drop', 0.5),
        ], 'head_out_size': 150 },
    'y_MEAN_MOTION': {
        'feature_index': X_train.columns.get_loc('X_MEAN_MOTION_1'),
        'epoch_diff_index': X_train.columns.get_loc('X_delta_EPOCH'),
        'mag_index': None,
        'alternate_model': NNBiEpochBiasModel,
        'model_definition' : [
            ('layer', 1000), ('relu', None),
            ('layer', 500), ('relu', None),
        ], 'head_out_size': 100 },
    'y_RA_OF_ASC_NODE_REG': {
        'feature_index': X_train.columns.get_loc('X_RA_OF_ASC_NODE_1'),
        'epoch_diff_index': X_train.columns.get_loc('X_delta_EPOCH'),
        'mag_index': None,
        'alternate_model': NNBiEpochBiasModel,
        'model_definition' : [
            ('layer', 1000), ('relu', None),
            ('layer', 500), ('relu', None),
        ], 'head_out_size': 100 },
    'y_ARG_OF_PERICENTER_REG': {
        'feature_index': X_train.columns.get_loc('X_ARG_OF_PERICENTER_1'),
        'epoch_diff_index': X_train.columns.get_loc('X_delta_EPOCH'),
        'mag_index': None,
        'alternate_model': NNBiEpochBiasModel,
        'model_definition' : [
            ('layer', 1000), ('relu', None),
            ('layer', 500), ('relu', None),
        ], 'head_out_size': 100 },
    'y_REV_MA_REG': {
        'feature_index': X_train.columns.get_loc('X_MEAN_ANOMALY_1'),
        'epoch_diff_index': X_train.columns.get_loc('X_delta_EPOCH'),
        'mag_index': X_train.columns.get_loc('X_MEAN_MOTION_1'),
        'alternate_model': NNMeanAnomalyModel,
        'model_definition' : [
            ('layer', 80), ('relu', None), ('drop', 0.5),
            ('layer', 80), ('relu', None), ('drop', 0.5),
            ('layer', 80), ('relu', None), ('drop', 0.5),
        ], 'head_out_size': 40 },
}

In [9]:
# Create or load all new sub models here if needed.
all_models = {}
for sub_key in model_configs.keys():
    # When new models are created, a dummy optimizer is used
    model, loss_func, optimizer, mean_losses, next_epoch = load_sub_model_with_config(train_config, model_configs, sub_key, len(X_train.columns), force_recreate=False)
    print(f'Model for "{sub_key}" __ Last loss: {mean_losses[-1:]} __ Epoch: {next_epoch}')
    all_models[sub_key] = (model, loss_func, optimizer, mean_losses, next_epoch)

Loading existing model
Model for "y_INCLINATION" __ Last loss: [(3.455963329329612e-08, 7.591381967486655e-10)] __ Epoch: 44
Loading existing model
Model for "y_ECCENTRICITY" __ Last loss: [(9.142103758652778e-07, 3.299049993188419e-07)] __ Epoch: 43
Loading existing model
Model for "y_MEAN_MOTION" __ Last loss: [] __ Epoch: 0
Loading existing model
Model for "y_RA_OF_ASC_NODE_REG" __ Last loss: [] __ Epoch: 0
Loading existing model
Model for "y_ARG_OF_PERICENTER_REG" __ Last loss: [] __ Epoch: 0
Loading existing model
Model for "y_REV_MA_REG" __ Last loss: [(8.267218959818463e-06, 3.879294938867263e-06)] __ Epoch: 4


In [10]:
# ['y_INCLINATION', 'y_ECCENTRICITY', 'y_MEAN_MOTION', 'y_RA_OF_ASC_NODE_REG', 'y_ARG_OF_PERICENTER_REG', 'y_REV_MA_REG']
submodel_to_train = "y_REV_MA_REG"

net, loss_func, optimizer, mean_losses, next_epoch = load_sub_model_with_config(
    train_config, model_configs, submodel_to_train, len(X_train.columns), force_recreate=False)

New model created


In [11]:
# # revert to the best validation loss epoch
# min_val_loss_idx = np.argmin(np.array(mean_losses)[:,1])
# print(f'Reverting to epoch: {min_val_loss_idx}')
# f = f"{train_config['model_path']}/model_history/{train_config['model_prefix']}{submodel_to_train}_{min_val_loss_idx:04}.pth"
# net = torch.load(f)
# mean_losses = mean_losses[:min_val_loss_idx+1]
# next_epoch = len(mean_losses)

In [12]:
# Override optimizer or loss_func here
optimizer = torch.optim.AdamW(net.parameters(), lr=1e-3, weight_decay=0.1, amsgrad=True)

In [13]:
save_model_with_config(train_config, submodel_to_train, net=net, loss_func=loss_func,
                       optimizer=optimizer, mean_losses=mean_losses, next_epoch=next_epoch)

In [None]:
model, mean_losses = train_model(X_train, y_train[[submodel_to_train]], X_test, y_test[[submodel_to_train]], train_config, model_configs, submodel_to_train)

Loading existing model


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]