In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error

files = ['train']

model_cols = ['BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE', 'ECCENTRICITY', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
              'MEAN_MOTION', 'epoch_jd', 'epoch_fr',
              #'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 
             ]

#%time train_df = pd.read_pickle(os.environ['GP_HIST_PATH'] + '/raw_compiled/train.pkl' ) # Time: 25.7s
#%time test_df = pd.read_pickle(os.environ['GP_HIST_PATH'] + '/raw_compiled/test.pkl' ) # Time: 5s
train_df = pd.read_pickle(os.environ['my_home_path'] + '/data/space-track-gp-hist-sample/raw_compiled/train.pkl' )
test_df = pd.read_pickle(os.environ['my_home_path'] + '/data/space-track-gp-hist-sample/raw_compiled/test.pkl' )

In [2]:
# Percent of NORADs to use during training...
perc = 0.5
norad_count = int(len(train_df.NORAD_CAT_ID.unique()) * perc)
train_df = train_df[train_df.NORAD_CAT_ID.isin(train_df.NORAD_CAT_ID.unique()[:norad_count])].reset_index(drop=True)

In [3]:
%%time
import clean_data

for df in [train_df, test_df]:
    df = clean_data.add_epoch_data(df)
    df = clean_data.normalize_all_columns(df)
    
train_idx_map = clean_data.create_index_map(train_df)
test_idx_map = clean_data.create_index_map(test_df)

train_df = train_df[model_cols]
test_df = test_df[model_cols]

X_test,y_test = clean_data.build_xy(test_df,test_idx_map)  # create input/label pair

100%|████████████████████████████████████████████████████████████████████████████| 1519/1519 [00:00<00:00, 2409.91it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 599/599 [00:00<00:00, 2696.60it/s]

Wall time: 1.32 s





In [4]:
%%time
import train

device='cuda'

model = train.train_model(train_df, train_idx_map, batchSize=200,
                          print_itr=10, numEpochs=10, model_cols=model_cols,
                          learningRate=0.0001, device=device, num_workers=0,
                          loss='L2', hiddenSize=300)

y_pred = train.predict(model, X_test, device=device) # get predictions for each train
y_pred_df = pd.DataFrame(y_pred, columns=test_df.columns[:-2])  # put results into a dataframe

>>> Loading model
>>> Loading dataset
>>> Beginning training!


RuntimeError: cannot pin 'torch.cuda.FloatTensor' only dense CPU tensors can be pinned

In [5]:
print(f'    Test set MAE (L1) loss: {mean_absolute_error(y_test, y_pred_df)}')
print(f'    Test set MSE (L2) loss: {mean_squared_error(y_test, y_pred_df)}')

NameError: name 'y_pred_df' is not defined

# Loss Tracking

|Test L1 Loss | Test L2 Loss | NN Change History | Time |
|:-|:-|:-|-|
|0.2905|0.2089|norads=10%, epochs=10, batchSize=200,<br> learn=0.0001, device=cpu, loss=l2,<br> num_workers=5, hidden=10| 8s|
|0.1501|0.0651|norads=50%| 11s|
|0.1257|0.0543|norads=100%| 13s|
|0.1392|0.0620|norads=10%, hidden=100| 9s|
|0.0999|0.0557|norads=50%| 12s|
|0.0944|0.0518|norads=100%| 15s|
|0.1162|0.0587|norads=10%, hidden=300| 9s|
|0.0967|0.0544|norads=50%| 12s|
|0.0932|0.0511|norads=100%| 15s|
|0.0931|0.0560|norads=10%, loss=l1| 9s|
|0.0816|0.0649|norads=50%| 12s|
|0.0803|0.0635|norads=100%| 15s|

In [None]:
clean_data.normalize_all_columns(y_test.head().copy(), reverse=True)

In [None]:
clean_data.normalize_all_columns(y_pred_df.head().copy(), reverse=True)  # reverse the normalization