In [1]:
from kessler import EventDataset
from kessler.nn import LSTMPredictor
from kessler.data import kelvins_to_event_dataset
import pandas as pd

# Set the random number generator seed for reproducibility
#kessler.seed(1)

# Data Loading

Kessler accepts CDMs either in KVN format or as pandas dataframes. We hereby show a pandas dataframe loading example:

In [2]:
#As an example, we first show the case in which the data comes from the Kelvins competition.
#For this, we built a specific converter that takes care of the conversion from Kelvins format
#to standard CDM format (the data can be downloaded at https://kelvins.esa.int/collision-avoidance-challenge/data/):
#file_name = '/home/gunes/data/kelvins/train_data/train_data.csv'
#events = kelvins_to_event_dataset(file_name, drop_features=['c_rcs_estimate', 't_rcs_estimate'], num_events=1000) #we use only 200 events

In [3]:
#Instead, this is a generic real CDM data loader that should parse your Pandas (uncomment the following lines if needed):
file_name = '/Users/nicolas/Desktop/Etudes/4_centrale_sup/CentraleSupelec/cours/centralesup_S_1/lab_project/project/dependencies/kessler/data/train_data.csv'
df=pd.read_csv(file_name)
df.head()
#events = EventDataset.from_pandas(df)

Unnamed: 0,event_id,time_to_tca,mission_id,risk,max_risk_estimate,max_risk_scaling,miss_distance,relative_speed,relative_position_r,relative_position_t,...,t_sigma_rdot,c_sigma_rdot,t_sigma_tdot,c_sigma_tdot,t_sigma_ndot,c_sigma_ndot,F10,F3M,SSN,AP
0,0,1.566798,5,-10.204955,-7.834756,8.602101,14923.0,13792.0,453.8,5976.6,...,0.14735,58.272095,0.004092,0.165044,0.002987,0.386462,89.0,83.0,42.0,11.0
1,0,1.207494,5,-10.355758,-7.848937,8.956374,14544.0,13792.0,474.3,5821.2,...,0.059672,57.966413,0.003753,0.164383,0.002933,0.386393,89.0,83.0,42.0,11.0
2,0,0.952193,5,-10.345631,-7.847406,8.932195,14475.0,13792.0,474.6,5796.2,...,0.039258,57.907599,0.003576,0.164352,0.002967,0.386381,89.0,83.0,42.0,11.0
3,0,0.579669,5,-10.337809,-7.84588,8.913444,14579.0,13792.0,472.7,5838.9,...,0.022066,57.993905,0.003298,0.164309,0.002918,0.3864,89.0,83.0,40.0,14.0
4,0,0.257806,5,-10.39126,-7.852942,9.036838,14510.0,13792.0,478.7,5811.1,...,0.015075,57.946717,0.00367,0.164172,0.00322,0.386388,89.0,83.0,40.0,14.0


# Descriptive Statistics

In [6]:
for col in df.columns:
    print(col)

event_id
time_to_tca
mission_id
risk
max_risk_estimate
max_risk_scaling
miss_distance
relative_speed
relative_position_r
relative_position_t
relative_position_n
relative_velocity_r
relative_velocity_t
relative_velocity_n
t_time_lastob_start
t_time_lastob_end
t_recommended_od_span
t_actual_od_span
t_obs_available
t_obs_used
t_residuals_accepted
t_weighted_rms
t_rcs_estimate
t_cd_area_over_mass
t_cr_area_over_mass
t_sedr
t_j2k_sma
t_j2k_ecc
t_j2k_inc
t_ct_r
t_cn_r
t_cn_t
t_crdot_r
t_crdot_t
t_crdot_n
t_ctdot_r
t_ctdot_t
t_ctdot_n
t_ctdot_rdot
t_cndot_r
t_cndot_t
t_cndot_n
t_cndot_rdot
t_cndot_tdot
c_object_type
c_time_lastob_start
c_time_lastob_end
c_recommended_od_span
c_actual_od_span
c_obs_available
c_obs_used
c_residuals_accepted
c_weighted_rms
c_rcs_estimate
c_cd_area_over_mass
c_cr_area_over_mass
c_sedr
c_j2k_sma
c_j2k_ecc
c_j2k_inc
c_ct_r
c_cn_r
c_cn_t
c_crdot_r
c_crdot_t
c_crdot_n
c_ctdot_r
c_ctdot_t
c_ctdot_n
c_ctdot_rdot
c_cndot_r
c_cndot_t
c_cndot_n
c_cndot_rdot
c_cndot_tdot
t

In [None]:
#Descriptive statistics of the event:
kessler_stats = events.to_dataframe().describe()
print(kessler_stats)


# LSTM Training

In [None]:
#We only use features with numeric content for the training
#nn_features is a list of the feature names taken into account for the training:
#it can be edited in case more features want to be added or removed
nn_features = events.common_features(only_numeric=True)
print(nn_features)

In [None]:
# Split data into a test set (5% of the total number of events)
len_test_set=int(0.05*len(events))
print('Test data:', len_test_set)
events_test=events[-len_test_set:]
print(events_test)

# The rest of the data will be used for training and validation
print('Training and validation data:', len(events)-len_test_set)
events_train_and_val=events[:-len_test_set]
print(events_train_and_val)

In [None]:
# Create an LSTM predictor, specialized to the nn_features we extracted above
model = LSTMPredictor(
            lstm_size=256,  # Number of hidden units per LSTM layer
            lstm_depth=2,  # Number of stacked LSTM layers
            dropout=0.2,  # Dropout probability
            features=nn_features)  # The list of feature names to use in the LSTM

# Start training
model.learn(events_train_and_val, 
            epochs=10, # Number of epochs (one epoch is one full pass through the training dataset)
            lr=1e-3, # Learning rate, can decrease it if training diverges
            batch_size=16, # Minibatch size, can be decreased if there are issues with memory use
            device='cpu', # Can be 'cuda' if there is a GPU available
            valid_proportion=0.15, # Proportion of the data to use as a validation set internally
            num_workers=4, # Number of multithreaded dataloader workers, 4 is good for performance, but if there are any issues or errors, please try num_workers=1 as this solves issues with PyTorch most of the time
            event_samples_for_stats=1000) # Number of events to use to compute NN normalization factors, have this number as big as possible (and at least a few thousands)

In [None]:
#Save the model to a file after training:
model.save(file_name="LSTM_20epochs_lr10-4_batchsize16")

In [None]:
#NN loss plotted to a file:
model.plot_loss(file_name='plot_loss.pdf')

In [None]:
#we show an example CDM from the set:
events_train_and_val[0][0]

In [None]:
#we take a single event, we remove the last CDM and try to predict it
event=events_test[3]
event_len = len(event)
print(event)
event_beginning = event[0:event_len-1]
print(event_beginning)
event_evolution = model.predict_event(event_beginning, num_samples=100, max_length=14)

In [None]:
#We plot the prediction in red:
axs = event_evolution.plot_features(['RELATIVE_SPEED', 'MISS_DISTANCE', 'OBJECT1_CT_T'], return_axs=True, linewidth=0.1, color='red', alpha=0.33, label='Prediction')
#and the ground truth value in blue:
event.plot_features(['RELATIVE_SPEED', 'MISS_DISTANCE', 'OBJECT1_CT_T'], axs=axs, label='Real', legend=True)

In [None]:
#we now plot the uncertainty prediction for all the covariance matrix elements of both OBJECT1 and OBJECT2:
axs = event_evolution.plot_uncertainty(return_axs=True, linewidth=0.5, label='Prediction', alpha=0.5, color='red', legend=True, diagonal=False)
event.plot_uncertainty(axs=axs, label='Real', diagonal=False)