In [2]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import pickle
import clean_data
import random
import train
import json
from tqdm.notebook import tqdm
import time

ts = time.time()
init_cols = ['BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE', 'ECCENTRICITY', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
             'MEAN_MOTION', 'NORAD_CAT_ID', 'EPOCH', 'SUNSPOTS_1D', 'SUNSPOTS_3D', 'SUNSPOTS_7D',
             'AIR_MONTH_AVG_TEMP', 'WATER_MONTH_AVG_TEMP',
            ]

def load_raw(name=None):
    if not name:
        train_df = pd.read_pickle(os.environ['GP_HIST_PATH'] + '/../3_min/train.pkl' ) # Time: 25.7s
        test_df = pd.read_pickle(os.environ['GP_HIST_PATH'] + '/../3_min/test.pkl' ) # Time: 5
        return {'train': train_df, 'test': test_df}
    else:
        return {name: pd.read_pickle(os.environ['GP_HIST_PATH'] + f'/../3_min/{name}.pkl' )}

def create_save(perc, name=None):
    print('>>> Loading raw data')
    df_in = load_raw(name)

    df_out = []
    for name, df in df_in.items():
        print(f'>>> Truncating {name} data...')
        df = df[init_cols].reset_index(drop=True)  # 4s

        print(f'>>> Normalizing {name} data...')
        df = clean_data.normalize_all_columns(df) # 53.4s

        print(f'>>> Building {name} index map...')
        try:
            idx_map = clean_data.load_index_map(name=name, path='data')
        except:
            idx_map = clean_data.create_index_map(df, write=True, name=name, path='data') # 3min 29s

        print(f'>>> Building {name} inputs and labels')
        X,y = clean_data.build_xy(df, idx_map) # 59min 41s
        X = clean_data.normalize_epoch_diff(X, drop_epoch=False) # 19s

        print(f'>>> Saving {name} data')
        X.to_pickle(f'data/x_{name}1.pkl')
        y.to_pickle(f'data/y_{name}1.pkl')
        df_out.extend([X,y])
    
    return df_out

def load(perc, force_update=False):
    if not force_update:
        print('>>> Loading data', end='')
        
        X_train = pd.read_pickle('data/x_train1.pkl')
        print('.', end='')
        y_train = pd.read_pickle('data/y_train1.pkl')
        print('.', end='')
        X_test = pd.read_pickle('data/x_test1.pkl')
        print('.', end='')
        y_test = pd.read_pickle('data/y_test1.pkl')
        print('.')
        return X_train, y_train, X_test, y_test
    else:
        return create_save(perc)
        

perc = 0.05
force_update = True

# X_train, y_train, X_test, y_test = load(perc, force_update)
print(f'>>> Complete. {round(time.time()-ts)} sec')

>>> Complete. 0 sec


In [3]:
%time create_save(perc, 'secret_test')

>>> Loading raw data
>>> Truncating secret_test data...
>>> Normalizing secret_test data...
>>> Building secret_test index map...


Creating index map:   0%|          | 0/2711 [00:00<?, ?it/s]

>>> Building secret_test inputs and labels
>>> Saving secret_test data
Wall time: 1min 32s


[          BSTAR  INCLINATION  RA_OF_ASC_NODE  ECCENTRICITY  ARG_OF_PERICENTER  \
 0        0.0001     0.553438        0.085252      0.024740           0.186124   
 1        0.0000     0.552689        0.550223      0.024460           0.865216   
 2        0.0001     0.553558        0.308731      0.024688           0.112939   
 3        0.0001     0.552696        0.209585      0.024756           0.397970   
 4        0.0001     0.553406        0.142681      0.024429           0.395079   
 ...         ...          ...             ...           ...                ...   
 9532777  0.0000     0.352270        0.410539      0.001217           0.745323   
 9532778  0.0000     0.352269        0.405774      0.001360           0.776715   
 9532779  0.0000     0.352273        0.393067      0.001356           0.770533   
 9532780  0.0000     0.352267        0.407358      0.001305           0.775244   
 9532781  0.0000     0.352454        0.411607      0.001134           0.068958   
 
          MEAN

In [2]:
configurations = {
    'model_identifier' : "full_remote_2",
    'model_path' : './data',
    'device' : 'cpu',
    'random_seed' : 0,
    'lr' : 0.001,
    'momentum' : 0.9,
    'weight_decay' : 1e-6,
    'max_epochs' : 100,
    'do_validate' : True,
    'model_width' : 128,
    'train_params' : {
        'batch_size': 2000,
        'shuffle': True,
        'num_workers': 2,
        'pin_memory': True,
    },
    'test_params' : {
        'batch_size': 20000,
        'num_workers': 2,
        'pin_memory': True,
    },
}
model_id=configurations['model_identifier']
with open(f'data/{model_id}.cfg', 'w') as f:
    json.dump(configurations, f)

In [3]:
#model_id='full_remote_1'
model_id='full_remote_2'

with open(f'data/{model_id}.cfg') as f:
    configurations = json.load(f)

In [4]:
# configurations['lr'] = 0.01
# configurations['max_epochs'] = 600

# model_id=configurations['model_identifier']
# with open(f'data/{model_id}.cfg', 'w') as f:
#     json.dump(configurations, f)

In [5]:

cols = ['BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE', 'ECCENTRICITY',
       'ARG_OF_PERICENTER', 'MEAN_ANOMALY', 'MEAN_MOTION', 'SUNSPOTS_1D',
       'SUNSPOTS_3D', 'SUNSPOTS_7D', 'AIR_MONTH_AVG_TEMP',
       'WATER_MONTH_AVG_TEMP', 'year', 'month_sin', 'month_cos', 'hour_sin',
       'hour_cos', 'minute_sin', 'minute_cos', 'second_sin', 'second_cos',
       'ms_sin', 'ms_cos', 'year_y', 'month_sin_y', 'month_cos_y',
       'hour_sin_y', 'hour_cos_y', 'minute_sin_y', 'minute_cos_y',
       'second_sin_y', 'second_cos_y', 'ms_sin_y', 'ms_cos_y',
       'epoch_day_diff', 'epoch_sec_diff', 'epoch_ms_diff']

model, mean_losses = train.train_model(X_train[cols], y_train, X_test[cols], y_test, configurations, force_train=False)

New model created
ResNet28(
  (stack1): ResNetStack(
    (stack): Sequential(
      (dense_1): ResnetDenseBlock(
        (dense1): Linear(in_features=37, out_features=128, bias=True)
        (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (dense2): Linear(in_features=128, out_features=128, bias=True)
        (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (dense3): Linear(in_features=128, out_features=128, bias=True)
        (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (shortcut): Sequential(
          (dense_sc): Linear(in_features=37, out_features=128, bias=True)
          (bn_sc): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (identity_1a): ResnetIdentityBlock(
        (dense1): Linear(in_features=128, out_features=128, bias=True)
        (bn1): 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/24385 [00:00<?, ?it/s]

  0%|          | 0/482 [00:00<?, ?it/s]

  0%|          | 0/24385 [00:00<?, ?it/s]

  0%|          | 0/482 [00:00<?, ?it/s]

  0%|          | 0/24385 [00:00<?, ?it/s]

  0%|          | 0/482 [00:00<?, ?it/s]

  0%|          | 0/24385 [00:00<?, ?it/s]

  0%|          | 0/482 [00:00<?, ?it/s]

  0%|          | 0/24385 [00:00<?, ?it/s]

  0%|          | 0/482 [00:00<?, ?it/s]

  0%|          | 0/24385 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# model, _, _, mean_losses, _ = train.load_model_with_config(configurations)
    
# tl, vl = zip(*mean_losses)

# fig,ax = plt.subplots()
# ax.plot(tl, label="Training Loss")
# ax.plot(vl, label="Validation Loss")

# fig.legend()
# plt.show()