In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import itertools
import matplotlib.pyplot as plt
from sgp4.api import Satrec, WGS72
from sgp4.conveniences import jday_datetime
from datetime import datetime, timezone

pd.set_option('display.max_columns', 500)

In [None]:
# Load all the norads used in training
train_norad_df = pd.read_pickle('train_norads.pkl.gz')
train_norad_list = train_norad_df.norad.to_list()

In [2]:
#csv_store_path = os.environ['GP_HIST_PATH']
csv_store_path = os.environ['my_home_path'] + '\data\space-track-gp-hist-sample'

dfs = None
files = sorted([x for x in os.listdir(f'{csv_store_path}/') if x.endswith(".csv.gz")])
for f in tqdm(files):
    #df = pd.read_csv(f'{csv_store_path}/{f}', parse_dates=['EPOCH'], infer_datetime_format=True, index_col='EPOCH', compression='gzip')
    df = pd.read_csv(f'{csv_store_path}/{f}', parse_dates=['EPOCH'], infer_datetime_format=True, compression='gzip')
    # LEO = Mean Motion > 11.25 and Eccentricity < 0.25
    #df = df[(df.MEAN_MOTION > 11.25) & (df.ECCENTRICITY < 0.25)]
    df = df[df.NORAD_CAT_ID.isin(train_norad_list)]

    # Since animated gabbard diagrams are generated per frame, we can revert the scaling when we plot the graphs
    if dfs is None:
        dfs = df
    else:
        dfs = pd.concat([dfs,df])
            
# Remove unnecessary columns to save memory
unnecessary_columns = ['CCSDS_OMM_VERS', 'COMMENT', 'CREATION_DATE', 'ORIGINATOR', 'OBJECT_NAME', 'OBJECT_ID',
                       'CENTER_NAME', 'REF_FRAME', 'TIME_SYSTEM', 'MEAN_ELEMENT_THEORY', 'EPHEMERIS_TYPE',
                       'CLASSIFICATION_TYPE', 'ELEMENT_SET_NO', 'REV_AT_EPOCH', 'SEMIMAJOR_AXIS', 'PERIOD',
                       'APOAPSIS', 'PERIAPSIS', 'OBJECT_TYPE', 'RCS_SIZE', 'COUNTRY_CODE', 'LAUNCH_DATE',
                       'SITE', 'DECAY_DATE', 'FILE', 'GP_ID', 'TLE_LINE0', 'index']
dfs = dfs.reset_index().drop(columns=unnecessary_columns, axis=1)

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.14it/s]


In [3]:
def get_satellite_position_data():
    # Create the satellite object (used to find satellite position)
    dfs['satobj'] = dfs.apply(lambda x: Satrec.twoline2rv(x['TLE_LINE1'], x['TLE_LINE2']), axis=1)

    # Get the Julian date of the EPOCH
    #dfs['epoch_julian'] = dfs['EPOCH'].apply(lambda x: jday_datetime(x.replace(tzinfo=timezone.utc)))
    dfs[['epoch_jd', 'epoch_fr']] = dfs['EPOCH'].apply(lambda x: jday_datetime(x.replace(tzinfo=timezone.utc))).to_list()

    # Get the days since 1949 December 31 00:00 UT
    # This will be used when creating satobj for the test set
    # (this is needed to get the satellite position from generated TLEs
    #  because of how Satrec sgp4init() works')
    ref_date = datetime.strptime('12/31/1949 00:00:00', '%m/%d/%Y %H:%M:%S')
    dfs['epoch_days'] = dfs['EPOCH'].apply(lambda x: (x-ref_date)/np.timedelta64(1, 'D'))

    # Get satellite x,y,z positions from TLE
    #dfs['satpos'] = dfs.apply(lambda x: x['satobj'].sgp4(*x['epoch_julian'])[1], axis=1)
    dfs['satpos'] = dfs.apply(lambda x: np.array(x['satobj'].sgp4(x['epoch_jd'], x['epoch_fr'])[1]), axis=1)

get_satellite_position_data()

In [4]:
def create_xy():
    
    # ML Structure
    # Input:
    #  - Reference TLE Data (+ EPOCH)
    #  - Target EPOCH
    # Output:
    #  - Target TLE Data
    
    def groups(lst):
        arr = lst.copy()
        np.random.shuffle(arr)
        i=1
        if len(lst)<=1:
            return
        while True:
            if i==len(lst):
                yield tuple((arr[i-1],arr[0]))
                break
            else:
                yield tuple((arr[i-1],arr[i]))
                i+=1
    
    # For each unique NORAD, find all TLE indexes and generate
    # a list of combinations
    idx_pairs = []
    for norad in dfs['NORAD_CAT_ID'].unique():
        norad_idxs = dfs[dfs['NORAD_CAT_ID']==norad].index.values
        if len(norad_idxs > 1):
            idx_pairs.extend(groups(norad_idxs))
    idx_pairs = np.array(idx_pairs)
    
    # Build our X/Y datasets
    X_all = dfs.loc[idx_pairs[:,0]].reset_index()
    Y_all = dfs.loc[idx_pairs[:,1]].reset_index()
    
    # This will be the column that links x and y
    key_columns = ['epoch_jd', 'epoch_fr']
    target_columns = ['target_epoch_jd', 'target_epoch_fr']
    X_all[target_columns] = Y_all[key_columns]
    
    return X_all, Y_all

X_all, y_all = create_xy()

In [5]:
def clean_X(X):
    # Perform any cleaning of values
    
    # Return only necessary columns
    X_columns = ['MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 'BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE',
                 'ECCENTRICITY', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY', 'MEAN_MOTION',
                 'epoch_jd', 'epoch_fr', 'target_epoch_jd', 'target_epoch_fr']
    
    return X[X_columns]
    
X_all_clean = clean_X(X_all)

In [6]:
def clean_Y(Y):
    # Perform any cleaning of values
    
    # Return only necessary columns
    Y_columns = ['MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 'BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE',
                 'ECCENTRICITY', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY', 'MEAN_MOTION',
                 'epoch_days', 'epoch_jd', 'epoch_fr', 'satpos']
    
    return Y[Y_columns]
    
y_all_clean = clean_Y(y_all)

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_all_clean, y_all_clean, test_size=0.33, random_state=42, shuffle=True)

# Remove non-training columns from y_train
non_training_cols = ['epoch_days', 'epoch_jd', 'epoch_fr', 'satpos']
y_train = y_train.drop(columns=non_training_cols, axis=1)

In [8]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR

y_test = y_test.drop(columns=non_training_cols, axis=1)

ESTIMATORS = {
    "Extra trees": ExtraTreesRegressor(n_estimators=10, random_state=0, n_jobs=-1),
    "Random Forest": RandomForestRegressor(n_estimators=10, random_state=0, n_jobs=-1),
    #"GradientBoost": MultiOutputRegressor(GradientBoostingRegressor(n_estimators=10, random_state=0)),
    "K-nn": KNeighborsRegressor(n_jobs=-1),
    "Linear regression": LinearRegression(n_jobs=-1),
    #"Ridge": RidgeCV(),
    #"SVM": MultiOutputRegressor(SVR()),
}

# Let's see how it does on the same NORAD
for name, estimator in ESTIMATORS.items():
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    print (f'{name} got score {score}')

Extra trees got score 0.516434536110915
Random Forest got score 0.5414598281573768
K-nn got score 0.37590439661058755
Linear regression got score 0.419811728932068


iterator.combinations()
```
Extra trees got score 0.9080013082091861
Random Forest got score 0.883921000656464
GradientBoost got score 0.5516253957176365
K-nn got score 0.7644015286756071
Linear regression got score 0.4746575589454271
Ridge got score 0.4744900285859706
```
groups()
```
Extra trees got score 0.534262241603748
Random Forest got score 0.4692323383272387
GradientBoost got score 0.356691868881702
K-nn got score 0.36862432289895664
Linear regression got score 0.4765033608802853
Ridge got score 0.45090731103313353
```

In [24]:
dfs.loc[0,'RA_OF_ASC_NODE']

186.7557

In [None]:
from sklearn.ensemble import ExtraTreesRegressor
#model = ExtraTreesRegressor(n_estimators=10, random_state=0, n_jobs=-1).fit(X_train, y_train)
model = LinearRegression(n_jobs=-1).fit(X_train, y_train)

In [None]:
results = model.predict(X_test)

In [None]:
def get_predicted_error(results):
    '''
    This returns the Mean-Squared Error
    of the predicted TLE's satellite position
    vs the actional TLE's satellites position
    '''
    
    def get_mserror(x):
        return ((x['satpos']-x['satpos_calc'])**2).mean()

    def get_satpos(x):
        sat = Satrec()
        sat.sgp4init(
             WGS72,           # gravity model
             'i',             # 'a' = old AFSPC mode, 'i' = improved mode
             0,               # satnum: Satellite number
             x['epoch_days'],       # epoch: days since 1949 December 31 00:00 UT
             x['BSTAR'],      # bstar: d`rag coefficient (/earth radii)
             x['MEAN_MOTION_DOT'], # ndot (NOT USED): ballistic coefficient (revs/day)
             x['MEAN_MOTION_DDOT'],             # nddot (NOT USED): mean motion 2nd derivative (revs/day^3)
             x['ECCENTRICITY'],       # ecco: eccentricity
             x['ARG_OF_PERICENTER'], # argpo: argument of perigee (radians)
             x['INCLINATION'], # inclo: inclination (radians)
             x['MEAN_ANOMALY'], # mo: mean anomaly (radians)
             x['MEAN_MOTION'], # no_kozai: mean motion (radians/minute)
             x['RA_OF_ASC_NODE'], # nodeo: right ascension of ascending node (radians)
        )
        return np.array(sat.sgp4(x['epoch_jd'], x['epoch_fr'])[1])

    # Join our results with the y_test column data
    y_test_error = pd.DataFrame(results, columns=y_test.columns[:-4]) \
                     .merge(y_test.reset_index()[['epoch_days', 'epoch_jd', 'epoch_fr', 'satpos']],
                            left_index=True, right_index=True)
    
    # Convert columns to radians
    cols_to_radians = ['RA_OF_ASC_NODE', 'MEAN_ANOMALY', 'INCLINATION', 'ARG_OF_PERICENTER']
    y_test_error[cols_to_radians] = y_test_error[cols_to_radians]*np.pi/180
    y_test_error['MEAN_MOTION'] = y_test_error['MEAN_MOTION']*np.pi/(4*180)
    
    # Calculate position based on predicted values
    y_test_error['satpos_calc'] = y_test_error.apply(get_satpos, axis=1)
    
    # Get the error between calculated position and TLE position
    y_test_error['pos_predict_error'] = y_test_error.apply(get_mserror, axis=1)
    
    return y_test_error

y_predict_error = get_predicted_error(results)

In [None]:
y_predict_error['pos_predict_error'].describe()

In [None]:
def get_propigated_error():
    '''
    This returns the Mean-Squared Error
    of the propigated TLE's satellite position
    vs the actional TLE's satellites position
    '''
    def get_satpos(x):
        return np.array(x.satobj.sgp4(x.target_epoch_jd, x.target_epoch_fr)[1])

    def get_mserror(x):
        return ((x['satpos']-x['satpos_prop'])**2).mean()

    X_propigation_error = X_all.loc[X_test.index]
    X_propigation_error['satpos_prop'] = X_propigation_error.apply(get_satpos, axis=1)
    X_propigation_error['pos_propigate_error'] = X_propigation_error.apply(get_mserror, axis=1)
    
    return X_propigation_error

X_prop_error = get_propigated_error()

In [None]:
X_prop_error['pos_propigate_error'].describe()

In [None]:
print(f'Mean Error in Propigation {np.sqrt(X_prop_error.pos_propigate_error.mean())}')
print(f'Mean Error in Prediction {np.sqrt(y_predict_error.pos_predict_error.mean())}')