In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error

model_cols = ['BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE', 'ECCENTRICITY', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
              'MEAN_MOTION', 'epoch_jd', 'epoch_fr',
              #'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 
             ]

# Load all data...
train_df = pd.read_pickle('data/train.pkl' ) # Time: 26.9s
test_df = pd.read_pickle('data/test.pkl' ) # Time: 5s

In [2]:
%%time
# Percent of NORADs to use during training...
perc = 0.2
norad_count = int(len(train_df.NORAD_CAT_ID.unique()) * perc)
train_df = train_df[train_df.NORAD_CAT_ID.isin(train_df.NORAD_CAT_ID.unique()[:norad_count])].reset_index(drop=True)
# 8s

CPU times: user 9.66 s, sys: 4.43 s, total: 14.1 s
Wall time: 14.1 s


In [3]:
%%time
import clean_data

for df in [train_df, test_df]:
    df = clean_data.add_epoch_data(df)
    df = clean_data.normalize_all_columns(df)
    
train_idx_map = clean_data.create_index_map(train_df)
test_idx_map = clean_data.create_index_map(test_df)

train_df = train_df[model_cols]
test_df = test_df[model_cols]

X_test,y_test = clean_data.build_xy(test_df,test_idx_map)  # create input/label pair
#5min

100%|██████████| 2925/2925 [04:19<00:00, 11.27it/s]
100%|██████████| 3150/3150 [00:52<00:00, 59.80it/s]


CPU times: user 9min 2s, sys: 1min 1s, total: 10min 4s
Wall time: 10min 3s


In [4]:
%%time
import train

device='cpu'

model = train.train_model(train_df, train_idx_map, batchSize=200,
                          print_itr=10000, numEpochs=10, model_cols=model_cols,
                          learningRate=0.0001, device=device, num_workers=5,
                          loss='L2', hiddenSize=300)

y_pred = train.predict(model, X_test, device=device) # get predictions for each train
y_pred_df = pd.DataFrame(y_pred, columns=test_df.columns[:-2])  # put results into a dataframe

>>> Loading model
>>> Loading dataset
>>> Beginning training!
Epoch [1/10], Batch [10000/173433], Loss: 0.036586690694093704, Time: 23s
Epoch [1/10], Batch [20000/173433], Loss: 0.038230035454034805, Time: 18s
Epoch [1/10], Batch [30000/173433], Loss: 0.03909628093242645, Time: 18s
Epoch [1/10], Batch [40000/173433], Loss: 0.038707248866558075, Time: 17s
Epoch [1/10], Batch [50000/173433], Loss: 0.04308190941810608, Time: 18s
Epoch [1/10], Batch [60000/173433], Loss: 0.03843908756971359, Time: 18s
Epoch [1/10], Batch [70000/173433], Loss: 0.043522223830223083, Time: 18s
Epoch [1/10], Batch [80000/173433], Loss: 0.03598054125905037, Time: 18s
Epoch [1/10], Batch [90000/173433], Loss: 0.05094822868704796, Time: 17s
Epoch [1/10], Batch [100000/173433], Loss: 0.042732518166303635, Time: 19s
Epoch [1/10], Batch [110000/173433], Loss: 0.04045610874891281, Time: 18s
Epoch [1/10], Batch [120000/173433], Loss: 0.03525495156645775, Time: 17s
Epoch [1/10], Batch [130000/173433], Loss: 0.038510926

Epoch [7/10], Batch [160000/173433], Loss: 0.042547713965177536, Time: 18s
Epoch [7/10], Batch [170000/173433], Loss: 0.03517116606235504, Time: 18s
Epoch [8/10], Batch [10000/173433], Loss: 0.04109114408493042, Time: 31s
Epoch [8/10], Batch [20000/173433], Loss: 0.04577181115746498, Time: 19s
Epoch [8/10], Batch [30000/173433], Loss: 0.03894698992371559, Time: 20s
Epoch [8/10], Batch [40000/173433], Loss: 0.05133158713579178, Time: 19s
Epoch [8/10], Batch [50000/173433], Loss: 0.04001729562878609, Time: 19s
Epoch [8/10], Batch [60000/173433], Loss: 0.03941473364830017, Time: 19s
Epoch [8/10], Batch [70000/173433], Loss: 0.04232988879084587, Time: 18s
Epoch [8/10], Batch [80000/173433], Loss: 0.04176436737179756, Time: 18s
Epoch [8/10], Batch [90000/173433], Loss: 0.03941004350781441, Time: 19s
Epoch [8/10], Batch [100000/173433], Loss: 0.040433578193187714, Time: 19s
Epoch [8/10], Batch [110000/173433], Loss: 0.03736703097820282, Time: 19s
Epoch [8/10], Batch [120000/173433], Loss: 0.

In [5]:
print(f'    Test set MAE (L1) loss: {mean_absolute_error(y_test, y_pred_df)}')
print(f'    Test set MSE (L2) loss: {mean_squared_error(y_test, y_pred_df)}')

    Test set MAE (L1) loss: 0.12223098437615441
    Test set MSE (L2) loss: 30.82074311507277


# Loss Tracking

|Test L1 Loss | Test L2 Loss | NN Change History | Time |
|:-|:-|:-|-|
|0.1234|21.7614|norads=10%, epochs=10, batchSize=200,<br> learn=0.0001, device=cpu, loss=l2,<br> num_workers=5, hidden=300|28min 39s|
|0.1235|34.4868|num_workers=20|28min 54s|
|0.1222|30.8207|norads=20%, num_workers=5|53min 47s|

In [6]:
clean_data.normalize_all_columns(y_test.head().copy(), reverse=True)

Unnamed: 0,BSTAR,INCLINATION,RA_OF_ASC_NODE,ECCENTRICITY,ARG_OF_PERICENTER,MEAN_ANOMALY,MEAN_MOTION
0,0.000221,99.4264,110.9001,0.006699,244.489,114.9328,13.893971
1,0.000236,99.3119,27.3184,0.006715,228.1724,131.368,13.891869
2,0.000147,99.3765,184.6182,0.006671,263.64,95.7151,13.897359
3,-4e-06,98.9771,322.9819,0.006616,272.4335,142.4944,13.902615
4,0.000166,98.8683,105.1738,0.006899,299.2842,60.143,13.876077


In [7]:
clean_data.normalize_all_columns(y_pred_df.head().copy(), reverse=True)  # reverse the normalization

Unnamed: 0,BSTAR,INCLINATION,RA_OF_ASC_NODE,ECCENTRICITY,ARG_OF_PERICENTER,MEAN_ANOMALY,MEAN_MOTION
0,0.000582,98.921455,178.598007,0.006357,178.737259,183.953232,13.921946
1,0.000632,99.409027,176.339615,0.006961,178.76062,183.723083,13.825844
2,0.000283,99.315933,173.922211,0.006942,178.134064,183.17511,13.866077
3,0.000186,99.340118,176.764664,0.006574,177.71701,182.23024,13.912745
4,8.6e-05,99.169548,176.04776,0.007385,176.449661,185.455872,13.757996


In [8]:
y_diff = y_test.to_numpy() - y_pred_df.to_numpy()

In [9]:
y_se = y_diff**2

In [10]:
np.mean(y_se)

30.8207431150799

In [11]:
np.mean(y_se, axis=0)

array([1.01653415e+00, 2.31445814e-02, 1.86952793e-01, 1.76418569e+00,
       1.68059100e-01, 2.72048774e-01, 2.12314277e+02])

In [12]:
np.mean(y_se, axis=1).max()

269411518.21497357

In [13]:
y_se[:,5].max()

1991057.259245302

In [14]:
np.argmax(y_se[:,5])

10316904

In [15]:
display(y_pred_df.iloc[10315138])
display(y_test.iloc[10315138])

BSTAR                0.003506
INCLINATION          0.351029
RA_OF_ASC_NODE       0.499284
ECCENTRICITY         0.270057
ARG_OF_PERICENTER    0.500381
MEAN_ANOMALY         0.506806
MEAN_MOTION         -0.146564
Name: 10315138, dtype: float32

BSTAR                0.000353
INCLINATION          0.352251
RA_OF_ASC_NODE       0.678309
ECCENTRICITY         0.243508
ARG_OF_PERICENTER    0.006970
MEAN_ANOMALY         0.993507
MEAN_MOTION         -0.141822
Name: 10315138, dtype: float64

In [16]:
y_pred_df.sort_values(by='MEAN_MOTION', ascending=False)

Unnamed: 0,BSTAR,INCLINATION,RA_OF_ASC_NODE,ECCENTRICITY,ARG_OF_PERICENTER,MEAN_ANOMALY,MEAN_MOTION
6863186,-125.660843,-4.464447,1.291618,-9.589123,-22.176479,20.472567,115.048454
7502729,-28.951485,-0.654898,0.588967,-2.020561,-5.654107,5.879601,28.738697
9984746,-28.384354,-0.928154,0.714697,-1.929229,-5.444254,5.717077,27.248369
7601514,-23.055277,-0.471195,0.552392,-1.991779,-4.081438,4.533108,26.557798
9955894,-18.745260,-0.157442,0.388579,-1.920563,-3.241878,3.946548,24.508139
...,...,...,...,...,...,...,...
9260439,1.513265,0.339901,0.316094,1.564108,0.010864,1.122360,-7.186084
9867849,-0.612881,0.315095,0.432969,1.846200,0.246573,0.922218,-8.801364
6461391,-1.336556,0.473748,0.370254,2.355693,0.157528,1.090706,-11.682398
7574589,-11.931201,-2.287872,6.065210,24.081017,-4.909996,8.417613,-234.294342


In [21]:
y_test.iloc[10316904]

BSTAR                0.000194
INCLINATION          0.352234
RA_OF_ASC_NODE       0.491539
ECCENTRICITY         0.264074
ARG_OF_PERICENTER    0.009096
MEAN_ANOMALY         0.991511
MEAN_MOTION         -0.141696
Name: 10316904, dtype: float64

In [22]:
X_test.iloc[10316904]

BSTAR                    0.000899
INCLINATION              5.555555
RA_OF_ASC_NODE           2.777778
ECCENTRICITY             0.886922
ARG_OF_PERICENTER        0.000000
MEAN_ANOMALY             0.000000
MEAN_MOTION          20309.414930
epoch_jd                 0.943785
epoch_fr                 0.856977
epoch_jd_y               0.987177
epoch_fr_y               0.398860
Name: 10316904, dtype: float64

In [19]:
train.predict(model, X_test, device=device)

array([[ 0.01164756,  0.54956365,  0.49610558, ...,  0.4964924 ,
         0.5109812 ,  0.07628661],
       [ 0.0126374 ,  0.5522724 ,  0.48983225, ...,  0.4965573 ,
         0.5103419 ,  0.03285605],
       [ 0.00565292,  0.5517552 ,  0.48311725, ...,  0.49481684,
         0.50881976,  0.05103861],
       ...,
       [-0.02729335,  0.29908   ,  0.49535504, ...,  0.49486732,
         0.5235591 ,  0.652351  ],
       [-0.02441044,  0.29933   ,  0.49657908, ...,  0.49561357,
         0.5215944 ,  0.66232955],
       [-0.02610679,  0.3004011 ,  0.4964425 , ...,  0.4968096 ,
         0.5220763 ,  0.66305244]], dtype=float32)

In [32]:
clean_data.normalize_all_columns(test_df.iloc[test_idx_map[10316904][0]].copy())

NORAD_CAT_ID                                                     40342
OBJECT_TYPE                                                     DEBRIS
OBJECT_NAME                                                  CZ-4C DEB
TLE_LINE1            1 40342U 14080E   18339.81488163 -.00000045 +0...
TLE_LINE2            2 40342 063.4051 191.5333 0133403 002.2971 357...
MEAN_MOTION_DOT                                                   -0.0
MEAN_MOTION_DDOT                                                   0.0
BSTAR                                                         0.029474
INCLINATION                                                   0.001957
RA_OF_ASC_NODE                                                0.001478
ECCENTRICITY                                                  0.618992
ARG_OF_PERICENTER                                             0.000018
MEAN_ANOMALY                                                  0.002761
MEAN_MOTION                                                  -6.279436
EPOCH 