In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
import data_handling as dh

In [3]:
fields=["event_id",
 "time_to_tca",
 "miss_distance",
 "risk",
 "relative_speed",
 "relative_position_r",
"relative_position_t",
"relative_position_n",
"relative_velocity_r",
"relative_velocity_t",
"relative_velocity_n",
"mahalanobis_distance"]
path="./data/train_data.csv"

In [4]:
# Load dataframe
df=dh.load_data(path,fields)

In [5]:
# Convert risk to decimals
df=dh.convert_pc_from_log_to_dec(df)

In [6]:
df.head()

Unnamed: 0,event_id,time_to_tca,miss_distance,relative_speed,relative_position_r,relative_position_t,relative_position_n,relative_velocity_r,relative_velocity_t,relative_velocity_n,mahalanobis_distance,pc
0,0,1.566798,14923.0,13792.0,453.8,5976.6,-13666.8,-7.2,-12637.0,-5525.9,129.430951,6.238e-11
1,0,1.207494,14544.0,13792.0,474.3,5821.2,-13319.8,-7.0,-12637.0,-5525.9,271.540424,4.408e-11
2,0,0.952193,14475.0,13792.0,474.6,5796.2,-13256.1,-7.0,-12637.0,-5525.9,347.899292,4.512e-11
3,0,0.579669,14579.0,13792.0,472.7,5838.9,-13350.7,-7.0,-12637.0,-5525.9,435.376626,4.594e-11
4,0,0.257806,14510.0,13792.0,478.7,5811.1,-13288.0,-7.0,-12637.0,-5525.9,469.178802,4.062e-11


In [7]:
from cdm import Event, Cdm
    

In [8]:
# Create list of Event objects
events_list=[]
for event in  df.event_id.unique():
    events_list.append(Event(event))

In [None]:
# Create list of Cdm objects
cdm_list=[]
for row in df.index:
    cdm_object = Cdm(event_id=df["event_id"][row],
                time_to_tca = df["time_to_tca"][row],
                miss_distance = df["miss_distance"][row],
                pc = df["pc"][row],
                relative_position_r = df["relative_position_r"][row],
                relative_position_t = df["relative_position_t"][row],
                relative_position_n = df["relative_position_n"][row],
                relative_velocity_r = df["relative_velocity_r"][row],
                relative_velocity_t = df["relative_velocity_t"][row],
                relative_velocity_n = df["relative_velocity_n"][row],
                relative_speed = df["relative_speed"][row],
                mahalanobis_distance = df["mahalanobis_distance"][row]
              )
    cdm_list.append(cdm_object)


In [None]:
def get_cdm_from_event_id(event):
    """Get cdm based on event_id

    Args:
        event (int): event_id
    """
    x1=[]
    #print(f"CDMs for event_id:{event}")
    for report in cdm_list:
        if report.event_id == event:
            #print(report.__dict__)
            x1.append(report.__dict__)
    return x1

In [None]:
def get_closest_cdm_to_tca_by_event_id(event):    
    list_of_cdm_of_event=get_cdm_from_event_id(event)
    cdm_sorted=sorted(list_of_cdm_of_event, key=lambda x: x['time_to_tca'],reverse=True)
    closest_cdm_to_tca=cdm_sorted.pop()
    #print(f"CDM closest to TCA for event_id: {event}")
    #print(closest_cdm_to_tca)
    return closest_cdm_to_tca

In [None]:
events_list_dict=[]
for event in events_list:
    events_list_dict.append(event.__dict__)

In [None]:
def classify_with_last_available_cdm(event:int,pc:float=0.00001,md:float=400):
    cdm=get_closest_cdm_to_tca_by_event_id(event)
    if  cdm.get("pc") > pc and cdm.get("miss_distance") < md:
        events_list_dict[event]["maneuver_required"]=True
    else:
        events_list_dict[event]["maneuver_required"]=False
    return events_list_dict[event]


In [None]:
list_of_dataframes=[]
for event in range(len(events_list)):
    event_df=pd.DataFrame(get_cdm_from_event_id(event)).iloc[:-1,:]
    event_df["TARGET_MD"]=get_closest_cdm_to_tca_by_event_id(event).get("miss_distance")
    list_of_dataframes.append(event_df)


In [None]:
result = pd.concat(list_of_dataframes)


In [None]:
result.head()

In [None]:
result.tail()

In [None]:
result.to_pickle("./dataframe.pkl")

In [9]:
hola=pd.read_pickle("./dataframe_prueba.pkl") 

In [13]:
hola.tail()

Unnamed: 0,event_id,time_to_tca,miss_distance,pc,relative_position_r,relative_position_t,relative_position_n,relative_velocity_r,relative_velocity_t,relative_velocity_n,relative_speed,mahalanobis_distance,TARGET_MD
7,13153,3.408859,2205.0,8.309e-08,-472.5,-159.4,-2148.3,-5.6,-14937.0,1082.7,14976.0,43.161322,2185.0
8,13153,3.029751,2307.0,7.787e-08,-472.1,-159.9,-2253.1,-5.6,-14937.0,1082.7,14976.0,45.789081,2185.0
9,13153,2.799253,2254.0,8.51e-08,-469.7,-163.3,-2199.4,-5.6,-14937.0,1082.7,14976.0,44.748673,2185.0
10,13153,2.385399,2259.0,8.586e-08,-469.3,-158.8,-2204.7,-5.6,-14937.0,1082.7,14976.0,45.020671,2185.0
11,13153,2.043339,2257.0,9.369e-08,-465.2,-160.3,-2203.0,-5.6,-14937.0,1082.7,14976.0,45.2623,2185.0


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error,root_mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
train, test = train_test_split(result, test_size=0.30, random_state=42)
df=train
print("Train dataframe dimension {} x {}".format(train.shape[0],train.shape[1]))
print("Test dataframe dimension {} x {}".format(test.shape[0],test.shape[1]))

In [None]:
columnt_to_predict_name="TARGET_MD"
Y_df = df[columnt_to_predict_name]
X_df= df.drop([columnt_to_predict_name], axis=1)

In [None]:
x, y = np.array(X_df), np.array(Y_df)

In [None]:
model = LinearRegression().fit(x, y)


In [None]:
r_sq = model.score(x, y)
print('coefficient of determination:', r_sq)
print('intercept:', model.intercept_)
print('slope:', model.coef_)

In [None]:
y_test = test[columnt_to_predict_name]
x_test= test.drop([columnt_to_predict_name], axis=1)

In [None]:
x_test.shape


In [None]:
model.predict(x_test)


In [None]:
pred=model.predict(x_test)


In [None]:
# REGRESION MODEL METRICS
print("The r2 of prediction is:", r2_score(y_test, pred))
print("The MSE of prediction is:", root_mean_squared_error(y_test, pred)**2)
print("The RMSE of prediction is:", root_mean_squared_error(y_test, pred))
print("The MAE of prediction is:", mean_absolute_error(y_test, pred))