In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error

files = ['train']

model_cols = ['BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE', 'ECCENTRICITY', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
              'MEAN_MOTION', 'epoch_jd', 'epoch_fr',
              #'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 
             ]

#%time df = pd.read_pickle(os.environ['GP_HIST_PATH'] + '/raw_compiled/train.pkl' )  # Takes about 20s
train_df = pd.read_pickle(os.environ['my_home_path'] + '/data/space-track-gp-hist-sample/raw_compiled/train.pkl' )
test_df = pd.read_pickle(os.environ['my_home_path'] + '/data/space-track-gp-hist-sample/raw_compiled/test.pkl' )

In [2]:
# Percent of NORADs to use during training...
perc = 0.5
norad_count = int(len(train_df.NORAD_CAT_ID.unique()) * perc)
train_df = train_df[train_df.NORAD_CAT_ID.isin(train_df.NORAD_CAT_ID.unique()[:norad_count])].reset_index(drop=True)

In [3]:
%%time
import clean_data

for df in [train_df, test_df]:
    df = clean_data.add_epoch_data(df)
    df = clean_data.normalize_all_columns(df)
    
train_idx_map = clean_data.create_index_map(train_df)
test_idx_map = clean_data.create_index_map(test_df)

train_df = train_df[model_cols]
test_df = test_df[model_cols]

X_test,y_test = clean_data.build_xy(test_df,test_idx_map)  # create input/label pair

100%|████████████████████████████████████████████████████████████████████████████| 1519/1519 [00:00<00:00, 2402.24it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 599/599 [00:00<00:00, 2692.94it/s]

Wall time: 1.32 s





In [4]:
%%time
import train

device='cpu'

model = train.train_model(train_df, train_idx_map, batchSize=200,
                          print_itr=10, numEpochs=10, model_cols=model_cols,
                          learningRate=0.0001, device=device, num_workers=5,
                          loss='L2', hiddenSize=300)

y_pred = train.predict(model, X_test, device=device) # get predictions for each train
y_pred_df = pd.DataFrame(y_pred, columns=test_df.columns[:-2])  # put results into a dataframe

>>> Loading model
>>> Loading dataset
>>> Beginning training!
Epoch [1/10], Batch [10/297], Loss: 0.41383862495422363, Time: 1s
Epoch [1/10], Batch [20/297], Loss: 0.273721843957901, Time: 0s
Epoch [1/10], Batch [30/297], Loss: 0.2569553256034851, Time: 0s
Epoch [1/10], Batch [40/297], Loss: 0.2058221399784088, Time: 0s
Epoch [1/10], Batch [50/297], Loss: 0.17379535734653473, Time: 0s
Epoch [1/10], Batch [60/297], Loss: 0.13270550966262817, Time: 0s
Epoch [1/10], Batch [70/297], Loss: 0.17171163856983185, Time: 0s
Epoch [1/10], Batch [80/297], Loss: 0.11921536177396774, Time: 0s
Epoch [1/10], Batch [90/297], Loss: 0.11288155615329742, Time: 0s
Epoch [1/10], Batch [100/297], Loss: 0.08324187994003296, Time: 0s
Epoch [1/10], Batch [110/297], Loss: 0.08921951055526733, Time: 0s
Epoch [1/10], Batch [120/297], Loss: 0.08388017117977142, Time: 0s
Epoch [1/10], Batch [130/297], Loss: 0.07992846518754959, Time: 0s
Epoch [1/10], Batch [140/297], Loss: 0.06264731287956238, Time: 0s
Epoch [1/10],

Epoch [5/10], Batch [170/297], Loss: 0.037134215235710144, Time: 0s
Epoch [5/10], Batch [180/297], Loss: 0.03536238148808479, Time: 0s
Epoch [5/10], Batch [190/297], Loss: 0.02625701017677784, Time: 0s
Epoch [5/10], Batch [200/297], Loss: 0.032337773591279984, Time: 0s
Epoch [5/10], Batch [210/297], Loss: 0.038995981216430664, Time: 0s
Epoch [5/10], Batch [220/297], Loss: 0.031844377517700195, Time: 0s
Epoch [5/10], Batch [230/297], Loss: 0.04245379939675331, Time: 0s
Epoch [5/10], Batch [240/297], Loss: 0.039061713963747025, Time: 0s
Epoch [5/10], Batch [250/297], Loss: 0.029483569785952568, Time: 0s
Epoch [5/10], Batch [260/297], Loss: 0.026435939595103264, Time: 0s
Epoch [5/10], Batch [270/297], Loss: 0.03345818445086479, Time: 0s
Epoch [5/10], Batch [280/297], Loss: 0.037202708423137665, Time: 0s
Epoch [5/10], Batch [290/297], Loss: 0.03468162566423416, Time: 0s
Epoch [6/10], Batch [10/297], Loss: 0.03425326570868492, Time: 1s
Epoch [6/10], Batch [20/297], Loss: 0.03674475848674774

Epoch [10/10], Batch [10/297], Loss: 0.029420094564557076, Time: 1s
Epoch [10/10], Batch [20/297], Loss: 0.02880764752626419, Time: 0s
Epoch [10/10], Batch [30/297], Loss: 0.02982618659734726, Time: 0s
Epoch [10/10], Batch [40/297], Loss: 0.03563801199197769, Time: 0s
Epoch [10/10], Batch [50/297], Loss: 0.03392162546515465, Time: 0s
Epoch [10/10], Batch [60/297], Loss: 0.026278866454958916, Time: 0s
Epoch [10/10], Batch [70/297], Loss: 0.029527824372053146, Time: 0s
Epoch [10/10], Batch [80/297], Loss: 0.031557485461235046, Time: 0s
Epoch [10/10], Batch [90/297], Loss: 0.02765764854848385, Time: 0s
Epoch [10/10], Batch [100/297], Loss: 0.03136088326573372, Time: 0s
Epoch [10/10], Batch [110/297], Loss: 0.03074968047440052, Time: 0s
Epoch [10/10], Batch [120/297], Loss: 0.03424854949116707, Time: 0s
Epoch [10/10], Batch [130/297], Loss: 0.023816155269742012, Time: 0s
Epoch [10/10], Batch [140/297], Loss: 0.036441683769226074, Time: 0s
Epoch [10/10], Batch [150/297], Loss: 0.02812570333

In [5]:
print(f'    Test set MAE (L1) loss: {mean_absolute_error(y_test, y_pred_df)}')
print(f'    Test set MSE (L2) loss: {mean_squared_error(y_test, y_pred_df)}')

    Test set MAE (L1) loss: 0.09666776418831212
    Test set MSE (L2) loss: 0.05394306212580342


# Loss Tracking

|Test L1 Loss | Test L2 Loss | NN Change History | Time |
|:-|:-|:-|-|
|0.2905|0.2089|norads=10%, epochs=10, batchSize=200,<br> learn=0.0001, device=cpu, loss=l2,<br> num_workers=5, hidden=10| 8s|
|0.1501|0.0651|norads=50%| 11s|
|0.1257|0.0543|norads=100%| 13s|
|0.1392|0.0620|norads=10%, hidden=100| 9s|
|0.0999|0.0557|norads=50%| 12s|
|0.0944|0.0518|norads=100%| 15s|
|0.1162|0.0587|norads=10%, hidden=300| 9s|
|0.0967|0.0544|norads=50%| 12s|
|0.0932|0.0511|norads=100%| 15s|
|0.0931|0.0560|norads=10%, loss=l1| 9s|
|0.0816|0.0649|norads=50%| 12s|
|0.0803|0.0635|norads=100%| 15s|

In [6]:
clean_data.normalize_all_columns(y_test.head().copy(), reverse=True)

Unnamed: 0,BSTAR,INCLINATION,RA_OF_ASC_NODE,ECCENTRICITY,ARG_OF_PERICENTER,MEAN_ANOMALY,MEAN_MOTION
0,0.000198,99.2262,99.0297,0.006856,181.2493,178.8503,13.885952
1,0.000169,99.2266,99.5291,0.006851,179.9495,180.1667,13.88595
2,0.000241,99.2237,88.1223,0.006796,211.1098,148.6028,13.885913
3,0.000214,99.2263,98.032,0.006846,183.9584,176.1041,13.885948
4,0.000268,99.2242,79.1433,0.006793,235.6724,123.8004,13.885863


In [7]:
clean_data.normalize_all_columns(y_pred_df.head().copy(), reverse=True)  # reverse the normalization

Unnamed: 0,BSTAR,INCLINATION,RA_OF_ASC_NODE,ECCENTRICITY,ARG_OF_PERICENTER,MEAN_ANOMALY,MEAN_MOTION
0,0.000615,99.404831,140.879852,0.006192,187.490829,172.788605,13.920848
1,6e-06,97.8797,134.398941,0.006359,180.949326,175.144318,13.961693
2,0.000737,98.088058,150.278137,0.005778,179.294693,181.378052,13.919862
3,0.000616,99.335999,136.496399,0.006369,198.032944,163.733322,13.918449
4,0.000383,99.145355,140.204926,0.006108,182.397659,175.754379,13.938741
