In [1]:
#############################################################################################################
# SCRIPT TO CREATE DATASET INPUT FOR THE LIGHT GBM MODEL
# IT SAMPLES AN EQUALLY DISTRIBUTED DATAFRAME CONDISERING ONLY RISKY EVENTS 
# RISKY EVENTS ARE THOSE THAT COLLISSION_PROBABILITY IS GREATER THAN 10 E-6
##############################################################################################################
import pandas as pd
import datetime as dt
import numpy as np
import os

from preparing_data import *



df=pd.read_csv("./data/train_data.csv")

# CONVERT KELVIN DATASET TO CDM FORMAT TO SIMULATE ACTUAL INPUT
cdm=convertKelvinDatasetToCDMFormat(df)

# DELETE NULLS FROM ONE COLUMN NEEDED TO RUN FOLLOWING TIME CONVERSIONS
cdm.dropna(subset = ["OBJECT2_TIME_LASTOB_START"], inplace=True)

# CONVERT TIME STRING TO TIMEDATE
cdm=convertTimestringToTimedate(cdm)
# CONVERT TIMEDATE TO RANGE IN DAYS
cdm=convertTimedateToDaysRange(cdm)
# CONVERT RISK IN LOGARITHMIC SCALE TO NATURAL SCALE THE SAME THAT COLLISSION PROBABILITY USES IN THE CDMs
cdm=convertPCto10logaritmicscale(cdm)

#DELETE NULS FROM ALL THER OTHER ROWS
cdm.dropna(inplace=True)

# DROP NON NUMERIC COLUMNS
numeric_cols=cdm.select_dtypes(exclude='number')
cdm.drop(numeric_cols, axis=1, inplace=True)

print("Adding correlation matrix elements to the dataframe \n")

# CALCULATE AND ADD CORRELATION COLUMNS TO IMPROVE MACHINE LEARNING MODEL
cdm=addCorrelationColumns(cdm)

#DELETE COVARIANCE MATRIX NON DIAGONAL ELEMENTS
print("Deleting covariance matrix elements from the dataframe \n")

cdm=deleteCovarianceNonDiagonalElements(cdm)
print("Dataframe size without feature engineering {} x {}".format(cdm.shape[0],cdm.shape[1]))
cdm.head()


#DELETING OBSERVATION COLUMNS NO NEEDED IN THE MODEL
cdm.drop([     'OBJECT1_TIME_LASTOB_START',
                'OBJECT1_TIME_LASTOB_END',
                'OBJECT2_TIME_LASTOB_START',
                'OBJECT2_TIME_LASTOB_END'
                ], inplace=True, axis=1)


# REORDERING COLUMNS BRING __time_to_tca TO FRONT
cdm=cdm[ ['__time_to_tca'] + [ col for col in cdm.columns if col != '__time_to_tca' ] ]


#SORT DATAFRAME BY event_id AND THEN BY __time_to_tca DESCENDING
cdm.sort_values(by=['event_id', '__time_to_tca'],ascending=[True, False],inplace=True)

Adding correlation matrix elements to the dataframe 

Deleting covariance matrix elements from the dataframe 

Dataframe size without feature engineering 153393 x 81


In [2]:
prueba=CreateSingleRowEventDataFrame(cdm,200,progress_indicator=50)

Creating dataframe...
 Starting at: 2021-12-25 11:40:05.790223
Dataframe successfully created...
Dimension: 132 x 382
Finished at: 2021-12-25 11:41:28.111487
Total time elapsed: 0h 1min 22sec.
Saving dataframe for future usage filename = full_dataframe_20211225_114128.pkl
Dataframe was successfully saved at working directory: /home/esteban/automatic-collision-avoidance/automatic-collision-detection


In [3]:
print(prueba.shape)
prueba.head()

(132, 382)


Unnamed: 0,__time_to_tca,event_id,MISS_DISTANCE,RELATIVE_SPEED,RELATIVE_POSITION_R,RELATIVE_POSITION_T,RELATIVE_POSITION_N,RELATIVE_VELOCITY_R,RELATIVE_VELOCITY_T,RELATIVE_VELOCITY_N,...,OBJECT2_CORR_CTDOT_R_4,OBJECT2_CORR_CTDOT_T_4,OBJECT2_CORR_CTDOT_N_4,OBJECT2_CORR_CTDOT_RDOT_4,OBJECT2_CORR_CNDOT_R_4,OBJECT2_CORR_CNDOT_T_4,OBJECT2_CORR_CNDOT_N_4,OBJECT2_CORR_CNDOT_RDOT_4,OBJECT2_CORR_CNDOT_TDOT_4,RELATIVE_VELOCITY_N_5
0,1.883868,2.0,18753.0,14347.0,-700.1,-5172.4,18012.1,14.4,-13791.4,-3957.2,...,-0.999926,0.378319,-0.845081,-0.301615,0.901438,-0.372185,0.971804,0.2585,-0.901937,-3957.2
1,1.900422,3.0,23982.0,13574.0,22.3,10104.3,21749.5,-39.7,-12310.9,5718.9,...,-0.999511,-0.675979,-0.776708,0.66741,0.911423,0.652982,0.860347,-0.641016,-0.9119,5718.9
2,1.797727,4.0,23709.0,12093.0,188.2,-13921.3,19191.2,19.7,-9788.5,-7101.8,...,-0.999995,-0.973987,0.792263,0.961697,-0.517752,-0.54337,0.926369,0.531127,0.517949,-7101.8
3,1.754397,5.0,314.0,2001.0,-18.4,-311.4,-42.7,0.5,-268.6,1983.8,...,-0.999942,-0.12133,-0.007675,0.161742,0.087987,0.03435,0.360326,-0.070074,-0.088322,1983.8
4,1.690361,6.0,10978.0,10027.0,132.6,-8226.7,-7268.6,8.8,-6638.6,7515.2,...,-0.99934,0.352616,-0.301764,-0.332939,0.229495,-0.077586,0.700588,0.085,-0.229904,7515.2


In [4]:
print(list(prueba.columns))

['__time_to_tca', 'event_id', 'MISS_DISTANCE', 'RELATIVE_SPEED', 'RELATIVE_POSITION_R', 'RELATIVE_POSITION_T', 'RELATIVE_POSITION_N', 'RELATIVE_VELOCITY_R', 'RELATIVE_VELOCITY_T', 'RELATIVE_VELOCITY_N', 'COLLISSION_PROBABILITY', 'OBJECT1_CR_R', 'OBJECT1_CT_T', 'OBJECT1_CN_N', 'OBJECT1_CRDOT_RDOT', 'OBJECT1_CTDOT_TDOT', 'OBJECT1_CNDOT_NDOT', 'OBJECT1_RECOMMENDED_OD_SPAN', 'OBJECT1_ACTUAL_OD_SPAN', 'OBJECT1_OBS_AVAILABLE', 'OBJECT1_OBS_USED', 'OBJECT1_RESIDUALS_ACCEPTED', 'OBJECT1_WEIGHTED_RMS', 'OBJECT1_SEDR', 'OBJECT1_CD_AREA_OVER_MASS', 'OBJECT1_CR_AREA_OVER_MASS', 'OBJECT1_APOGEE_ALTITUDE', 'OBJECT1_PERIGEE_ALTITUDE', 'OBJECT1_INCLINATION', 'OBJECT2_CR_R', 'OBJECT2_CT_T', 'OBJECT2_CN_N', 'OBJECT2_CRDOT_RDOT', 'OBJECT2_CTDOT_TDOT', 'OBJECT2_CNDOT_NDOT', 'OBJECT2_RECOMMENDED_OD_SPAN', 'OBJECT2_ACTUAL_OD_SPAN', 'OBJECT2_OBS_AVAILABLE', 'OBJECT2_OBS_USED', 'OBJECT2_RESIDUALS_ACCEPTED', 'OBJECT2_WEIGHTED_RMS', 'OBJECT2_SEDR', 'OBJECT2_CD_AREA_OVER_MASS', 'OBJECT2_CR_AREA_OVER_MASS', 'OBJE

In [8]:
one_event=cdm[(cdm["event_id"]==3)]

In [9]:
one_event

Unnamed: 0,__time_to_tca,event_id,MISS_DISTANCE,RELATIVE_SPEED,RELATIVE_POSITION_R,RELATIVE_POSITION_T,RELATIVE_POSITION_N,RELATIVE_VELOCITY_R,RELATIVE_VELOCITY_T,RELATIVE_VELOCITY_N,...,OBJECT2_CORR_CRDOT_N,OBJECT2_CORR_CTDOT_R,OBJECT2_CORR_CTDOT_T,OBJECT2_CORR_CTDOT_N,OBJECT2_CORR_CTDOT_RDOT,OBJECT2_CORR_CNDOT_R,OBJECT2_CORR_CNDOT_T,OBJECT2_CORR_CNDOT_N,OBJECT2_CORR_CNDOT_RDOT,OBJECT2_CORR_CNDOT_TDOT
29,6.950088,3,29654.0,13574.0,19.2,12494.9,26893.3,-42.3,-12311.1,5718.7,...,-0.050083,-0.813999,0.207397,-0.818841,-0.207178,0.507124,0.124765,0.970631,-0.124688,-0.840472
30,6.591328,3,28462.0,13574.0,28.2,11987.9,25815.2,-41.8,-12311.1,5718.7,...,-0.05047,-0.817824,0.200633,-0.820201,-0.200408,0.518152,0.123383,0.97073,-0.123303,-0.842971
31,6.218261,3,28496.0,13574.0,24.4,12005.1,25844.5,-41.8,-12311.1,5718.7,...,-0.050457,-0.817667,0.201171,-0.820086,-0.200946,0.51749,0.123461,0.970724,-0.123382,-0.842783
32,5.848699,3,29373.0,13574.0,14.7,12374.4,26639.5,-42.2,-12311.1,5718.7,...,-0.050209,-0.814434,0.20779,-0.818698,-0.20757,0.507592,0.124502,0.970653,-0.124424,-0.840427
33,5.53223,3,29181.0,13574.0,16.0,12291.7,26466.2,-42.1,-12311.1,5718.7,...,-0.050273,-0.814962,0.206624,-0.818937,-0.206403,0.509322,0.124276,0.970669,-0.124198,-0.840858
34,5.169648,3,29202.0,13574.0,18.2,12300.9,26485.4,-42.1,-12311.1,5718.7,...,-0.050257,-0.815074,0.207144,-0.818823,-0.206923,0.509035,0.124323,0.970665,-0.124245,-0.840682
35,4.870712,3,30024.0,13574.0,9.8,12649.7,27229.4,-42.4,-12311.1,5718.7,...,-0.050056,-0.812617,0.213617,-0.817426,-0.2134,0.500249,0.125276,0.970601,-0.1252,-0.838354
36,4.577064,3,24809.0,13574.0,136.7,10452.5,22499.9,-40.1,-12311.3,5718.4,...,-0.014933,-0.978625,0.076234,-0.948868,-0.081122,0.889809,0.025069,0.975491,-0.022132,-0.930122
37,4.241684,3,23811.0,13574.0,25.2,10032.7,21595.0,-39.7,-12310.9,5718.9,...,0.037754,-0.968048,0.20393,-0.899976,-0.213701,0.714923,-0.006718,0.811578,0.011682,-0.77878
38,3.929682,3,23853.0,13574.0,25.1,10045.9,21634.4,-39.7,-12310.9,5718.9,...,0.037407,-0.96686,0.205986,-0.899631,-0.215663,0.712364,-0.006161,0.811576,0.01108,-0.778356
