In [10]:
from src.utils.check_mps_device import check_mps_device

from src.utils.data_loading import load_data
from src.utils.filtering import filter_data

from src.utils.label_encoding import label_encode_column

import numpy as np

# Check if PyTorch Multi-Process Service (MPS) is available (GPU)
check_mps_device()

tensor([1.], device='mps:0')


In [11]:
selected_features = [
    "geocentric_latitude",  # Latitude of conjunction point [deg]
    "c_sigma_rdot",  # covariance; radial velocity standard deviation (sigma) of chaser [m/s]
    "c_obs_used",  # number of observations used for orbit determination (per CDM) of chaser
    "c_time_lastob_start",
    # start of the time in days of the last accepted observation used in the orbit determination of chaser
    "c_time_lastob_end",
    # end of the time interval in days of the last accepted observation used in the orbit determination of chaser
    "mahalanobis_distance",  # The distance between the chaser and target
    "miss_distance",  # relative position between chaser & target at tca [m
    "time_to_tca",  # Time interval between CDM creation and time-of-closest approach [days]
    "t_cndot_r",
    # covariance; correlation of normal (cross-track) velocity vs radial position of chaser
    "c_cr_area_over_mass",
    # solar radiation coefficient . A/m (ballistic coefficient equivalent) of chaser
    "max_risk_estimate",  # maximum collision probability obtained by scaling combined covariance
    "c_span",  # size used by the collision risk computation algorithm of chaser [m]
    "max_risk_scaling",  # scaling factor used to compute maximum collision probability
    "t_rcs_estimate",  # radar cross-sectional area [m2m2] of target
    "c_sigma_t",
    # covariance; transverse (along-track) position standard deviation (sigma) of chaser [m]
    "c_obs_available",  # number of observations available for orbit determination (per CDM),
    "risk",
]

In [12]:
# Load and data and filter it
df = load_data()
df_filtered = filter_data(df)

# Remove rows with missing values
df_filtered.dropna(axis=0, how="any", inplace=True)

# Label encode the categorical column "c_object_type"
label_encode_column(df_filtered, "c_object_type")

# Call the function to get the processed DataFrame
df_processed = df_filtered[selected_features]

# Separate features and target variable
X = df_processed.drop('risk', axis=1)  # features (17)
y = df_processed['risk']  # target

Raw data
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162634 entries, 0 to 162633
Columns: 103 entries, event_id to AP
dtypes: float64(98), int64(4), object(1)
memory usage: 127.8+ MB
Filtered data
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3032 entries, 0 to 3031
Columns: 103 entries, event_id to AP
dtypes: float64(98), int64(4), object(1)
memory usage: 2.4+ MB


In [13]:
from sklearn.model_selection import train_test_split

y_class = np.where(y >= -6, 1, 0)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True,
                                                    stratify=y_class)
# 
# X_train, X_val, y_train_real, y_val_real = train_test_split(X_t, y_t_real, 
#                                                             test_size=0.20, 
#                                                             random_state=21,  
#                                                             shuffle=True,
#                                                             stratify=y_t_class)
# 
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1058, 16)
(1058,)
(265, 16)
(265,)


In [14]:
from imblearn.over_sampling import SMOTE

# Apply SMOTE to the training data to balance the classes
y_train = np.where(y_train >= -6, 1, 0)

smote = SMOTE(random_state=42, k_neighbors=20)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [15]:
from sklearn.preprocessing import StandardScaler

# Perform scaling after splitting the data
scaler = StandardScaler()

# Fit the scaler on the training set
X_train_final = scaler.fit_transform(X_train_resampled)
y_train_final = y_train_resampled

X_test_final = scaler.transform(X_test)
y_test_final = y_test
# # Display the shapes of the sets
print("Testing set shape:", X_test_final.shape, y_test_final.shape)
print("Training set shape:", X_train_final.shape, y_train_final.shape)

Testing set shape: (265, 16) (265,)
Training set shape: (1962, 16) (1962,)


In [16]:
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense

# Initialize and define the neural network model
model = Sequential([
    Dense(64, input_dim=X_train_final.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer
])

model_name = 'Feed Forward Neural Network'
# Compile the neural network model
model.compile(optimizer='adam', loss='mean_squared_error')

In [17]:
# Train the neural network model
model.fit(X_train_final, y_train_final, epochs=10, batch_size=32, verbose=1)

# Evaluate the neural network model on the test data
mse_nn = model.evaluate(X_test_final, y_test_final)
print("Neural Network Mean Squared Error:", mse_nn)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Neural Network Mean Squared Error: 241.5624542236328


In [18]:
from src.utils.f_beta import calculate_f_beta_sklearn

# Create dictionaries to store results
f_beta_scores = {}
mse_hr_scores = {}


# Predict on the test set
y_pred_test = model.predict(X_test_final)
y_pred = np.where(y_pred_test >= -6, -5, -6.00001)

# Check the shapes before calculating the scores
print(f"Shapes before calculation for {model_name}:")
print("y_test_final shape:", y_test_final.shape)
print("y_pred shape:", y_pred.shape)

# Calculate F-beta and MSE_HR scores
score2, f_beta2, mse_hr2 = calculate_f_beta_sklearn(y_test_final, y_pred, beta=2, threshold=-6)

print(f"Best Model: {model_name}")
print(f"Score: {score2}")
print(f"F-beta Score: {f_beta2}")
print(f"MSE_HR Score: {mse_hr2}")

Shapes before calculation for Feed Forward Neural Network:
y_test_final shape: (265,)
y_pred shape: (265, 1)
Best Model: Feed Forward Neural Network
Score: 1.289534146107888
F-beta Score: 0.2785923753665689
MSE_HR Score: 0.3592543808804966


In [19]:
print(y_pred)

[[-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]
 [-5.]

In [20]:
print(y_pred_test)

[[ 3.15610319e-01]
 [-3.35189961e-02]
 [-1.41815871e-01]
 [-5.40789329e-02]
 [-6.40127510e-02]
 [ 6.70694351e-01]
 [-2.82474421e-02]
 [-1.55452192e-01]
 [-6.67191446e-02]
 [-5.66445105e-02]
 [ 1.04958177e-01]
 [ 3.08476716e-01]
 [ 2.40786538e-01]
 [ 8.82634893e-03]
 [ 1.77726783e-02]
 [ 7.20064044e-01]
 [ 9.74106789e-01]
 [-9.33485925e-02]
 [-1.04119517e-02]
 [ 7.40434527e-01]
 [ 1.88381672e-01]
 [ 4.71651345e-01]
 [ 2.56069273e-01]
 [ 4.38093811e-01]
 [ 3.35618585e-01]
 [ 1.14436030e-01]
 [ 1.62418224e-02]
 [ 5.85043803e-03]
 [ 9.82493758e-02]
 [ 7.25975037e-01]
 [ 9.46916342e-02]
 [ 6.47541165e-01]
 [ 2.65810281e-01]
 [ 3.60486299e-01]
 [ 1.36392787e-01]
 [-1.94764525e-01]
 [ 2.46146113e-01]
 [-5.21868728e-02]
 [ 4.25952286e-01]
 [-7.26324171e-02]
 [ 3.01229358e-01]
 [ 9.37167406e-02]
 [ 3.79870355e-01]
 [-1.65721148e-01]
 [-3.73656787e-02]
 [ 3.65808047e-02]
 [ 3.63743812e-01]
 [ 8.16834420e-02]
 [ 1.03699207e-01]
 [-7.28254318e-02]
 [-2.18974315e-02]
 [ 1.25081226e-01]
 [-6.2430489

In [21]:
print(y_test_final)

2790    -8.111821
1921   -30.000000
1990   -30.000000
556    -10.236048
1829    -9.498804
          ...    
249     -5.264880
2387   -30.000000
1391    -5.813326
2202   -11.787280
1982    -4.388595
Name: risk, Length: 265, dtype: float64
