## Data Preprocessing

In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib

In [35]:
# Load the dataset
data = pd.read_excel('data/space_traffic.xlsx')

In [36]:
# Check for missing values in the dataset
missing_values = data.isnull().sum()

# Display the missing values count for each column
print(missing_values)


Timestamp          0
Location           0
Object_Type        0
Traffic_Density    0
Peak_Time          0
dtype: int64


In [37]:
# Clean column names by stripping any extra spaces or special characters
data.columns = data.columns.str.strip()


In [38]:
# Convert 'Timestamp' to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')

# Extract Year, Month, Day, and Time from the Timestamp
data['Year'] = data['Timestamp'].dt.year
data['Month'] = data['Timestamp'].dt.month
data['Day'] = data['Timestamp'].dt.day
data['Time'] = data['Timestamp'].dt.strftime('%H:%M:%S')


In [39]:
# One-hot encoding on 'Object_Type'
data_encoded = pd.get_dummies(data, columns=['Object_Type'], drop_first=False)
data_encoded['Object_Type'] = data['Object_Type']
data = data_encoded.copy()

In [40]:
# Label Encoding on 'Location'
label_encoder = LabelEncoder()

# Fit the label encoder and transform the 'Location' column
data['Location_Encoded'] = label_encoder.fit_transform(data['Location'])

# Check the encoded data and label mapping
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

print(data[['Location', 'Location_Encoded']])
print("\nLabel Mapping:")
print(label_mapping)

# Save the LabelEncoder object 
joblib.dump(label_encoder, 'model/label_encoder.joblib')

print("Label Encoder saved successfully.")


                Location  Location_Encoded
0      Lagrange Point L2                 1
1              Orbit LEO                 4
2              Orbit LEO                 4
3    Mars Transfer Orbit                 2
4      Lagrange Point L1                 0
..                   ...               ...
995            Orbit MEO                 5
996    Lagrange Point L1                 0
997            Orbit MEO                 5
998    Lagrange Point L2                 1
999    Lagrange Point L2                 1

[1000 rows x 2 columns]

Label Mapping:
{'Lagrange Point L1': np.int64(0), 'Lagrange Point L2': np.int64(1), 'Mars Transfer Orbit': np.int64(2), 'Orbit GEO': np.int64(3), 'Orbit LEO': np.int64(4), 'Orbit MEO': np.int64(5)}
Label Encoder saved successfully.


In [41]:
data.head()

Unnamed: 0,Timestamp,Location,Traffic_Density,Peak_Time,Year,Month,Day,Time,Object_Type_Asteroid Mining Ship,Object_Type_Manned Spacecraft,Object_Type_Satellite,Object_Type_Scientific Probe,Object_Type_Space Debris,Object_Type_Space Station,Object_Type,Location_Encoded
0,2024-10-21 21:00:00,Lagrange Point L2,17,15:00:00,2024,10,21,21:00:00,False,False,False,False,False,True,Space Station,1
1,2024-10-11 05:00:00,Orbit LEO,21,15:00:00,2024,10,11,05:00:00,False,False,True,False,False,False,Satellite,4
2,2024-10-29 13:00:00,Orbit LEO,88,06:00:00,2024,10,29,13:00:00,False,False,False,False,False,True,Space Station,4
3,2024-10-24 08:00:00,Mars Transfer Orbit,65,08:00:00,2024,10,24,08:00:00,False,False,False,True,False,False,Scientific Probe,2
4,2024-10-23 17:00:00,Lagrange Point L1,9,06:00:00,2024,10,23,17:00:00,False,False,False,False,False,True,Space Station,0


In [42]:
data.shape

(1000, 16)

In [43]:
data.to_excel('data/processed_data.xlsx')

## Train Model

In [44]:
X = data_encoded.drop(columns=['Traffic_Density', 'Timestamp', 'Location', 'Peak_Time', 'Time'])
y = data_encoded['Traffic_Density']

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [46]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

# Define the features and target
features = ['Location_Encoded','Year', 'Month', 'Day', 
            'Object_Type_Asteroid Mining Ship', 'Object_Type_Manned Spacecraft', 
            'Object_Type_Satellite', 'Object_Type_Scientific Probe', 
            'Object_Type_Space Debris', 'Object_Type_Space Station']
target = 'Traffic_Density'

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Initialize the RandomForestRegressor
random_forest_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
random_forest_model.fit(X_train, y_train)

# Predict on the training set
y_pred_train = random_forest_model.predict(X_train)

# Evaluate the model on the training set using various metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)

print(f"Training Set Mean Squared Error: {mse_train}")
print(f"Training Set Mean Absolute Error: {mae_train}")
print(f"Training Set R-squared: {r2_train}")

# Predict on the test set
y_pred_test = random_forest_model.predict(X_test)

# Evaluate the model on the test set using various metrics
mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)

print(f"Test Set Mean Squared Error: {mse_test}")
print(f"Test Set Mean Absolute Error: {mae_test}")
print(f"Test Set R-squared: {r2_test}")

# Save the trained model
joblib.dump(random_forest_model, 'model/RandomForestRegressor.joblib')

print("Model RandomForestRegressor saved in 'model/RandomForestRegressor.joblib'")

Training Set Mean Squared Error: 318.2471972649969
Training Set Mean Absolute Error: 13.869703314879565
Training Set R-squared: 0.5922010105979878
Test Set Mean Squared Error: 936.9052801443287
Test Set Mean Absolute Error: 25.420753253968254
Test Set R-squared: -0.17004112185484233
Model RandomForestRegressor saved in 'model/RandomForestRegressor.joblib'


In [47]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

# Define the features and target
features = ['Location_Encoded', 'Year', 'Month', 'Day', 
            'Object_Type_Asteroid Mining Ship', 'Object_Type_Manned Spacecraft', 
            'Object_Type_Satellite', 'Object_Type_Scientific Probe', 
            'Object_Type_Space Debris', 'Object_Type_Space Station']
target = 'Traffic_Density'

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Initialize the LinearRegression model
linear_regression_model = LinearRegression()

# Train the model
linear_regression_model.fit(X_train, y_train)

# Predict on the training set
y_pred_train = linear_regression_model.predict(X_train)

# Evaluate the model on the training set using various metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)

print(f"Training Set Mean Squared Error: {mse_train}")
print(f"Training Set Mean Absolute Error: {mae_train}")
print(f"Training Set R-squared: {r2_train}")

# Predict on the test set
y_pred_test = linear_regression_model.predict(X_test)

# Evaluate the model on the test set using various metrics
mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)

print(f"Test Set Mean Squared Error: {mse_test}")
print(f"Test Set Mean Absolute Error: {mae_test}")
print(f"Test Set R-squared: {r2_test}")

# Save the trained model
joblib.dump(linear_regression_model, 'model/LinearRegression.joblib')

print("Model LinearRegression saved in 'model/LinearRegression.joblib'")

Training Set Mean Squared Error: 774.2231233170689
Training Set Mean Absolute Error: 23.637366383894136
Training Set R-squared: 0.007917713105666468
Test Set Mean Squared Error: 802.3211727159644
Test Set Mean Absolute Error: 24.731522353621543
Test Set R-squared: -0.001967632061873781
Model LinearRegression saved in 'model/LinearRegression.joblib'


In [48]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

# Define the features and target
features = ['Location_Encoded', 'Year', 'Month', 'Day', 
            'Object_Type_Asteroid Mining Ship', 'Object_Type_Manned Spacecraft', 
            'Object_Type_Satellite', 'Object_Type_Scientific Probe', 
            'Object_Type_Space Debris', 'Object_Type_Space Station']
target = 'Traffic_Density'

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Initialize the SVR model
svr_model = SVR(kernel='rbf')

# Train the model
svr_model.fit(X_train, y_train)

# Predict on the training set
y_pred_train = svr_model.predict(X_train)

# Evaluate the model on the training set using various metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)

print(f"Training Set Mean Squared Error: {mse_train}")
print(f"Training Set Mean Absolute Error: {mae_train}")
print(f"Training Set R-squared: {r2_train}")

# Predict on the test set
y_pred_test = svr_model.predict(X_test)

# Evaluate the model on the test set using various metrics
mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)

print(f"Test Set Mean Squared Error: {mse_test}")
print(f"Test Set Mean Absolute Error: {mae_test}")
print(f"Test Set R-squared: {r2_test}")

# Save the trained model
joblib.dump(svr_model, 'model/SVR.joblib')

print("Model SVR saved in 'model/SVR.joblib'")

Training Set Mean Squared Error: 780.6619779905138
Training Set Mean Absolute Error: 23.771465830051067
Training Set R-squared: -0.00033297519984865076
Test Set Mean Squared Error: 800.7477340473764
Test Set Mean Absolute Error: 24.73196848974163
Test Set R-squared: -2.6650753703894026e-06
Model SVR saved in 'model/SVR.joblib'


## Testing Prediction

In [49]:
import joblib
import pandas as pd

# Load the trained RandomForestRegressor model
model = joblib.load('model/RandomForestRegressor.joblib')

# Load the trained LabelEncoder
location_encoder = joblib.load('model/label_encoder.joblib')  # Updated to match the correct saved file name

# Example input data for prediction (replace this with actual input data)
input_data = {
    'Location': ['Lagrange Point L2'],  
    'Year': [2024],
    'Month': [10],
    'Day': [21],
    'Object_Type_Asteroid Mining Ship': [False],
    'Object_Type_Manned Spacecraft': [False],
    'Object_Type_Satellite': [False],
    'Object_Type_Scientific Probe': [False],
    'Object_Type_Space Debris': [False],
    'Object_Type_Space Station': [True]
}

# Convert the input data into a pandas DataFrame
input_df = pd.DataFrame(input_data)

# Encode the 'Location' column using the loaded LabelEncoder
try:
    input_df['Location_Encoded'] = location_encoder.transform(input_df['Location'])
except ValueError as e:
    print(f"Error encoding location: {e}. Ensure the input data matches the training data locations.")

# Drop the 'Location' column (as it's no longer needed after encoding)
input_df = input_df.drop('Location', axis=1)

# Ensure the column order matches the model's training data
columns_order = ['Location_Encoded','Year', 'Month', 'Day', 'Object_Type_Asteroid Mining Ship', 'Object_Type_Manned Spacecraft', 
            'Object_Type_Satellite', 'Object_Type_Scientific Probe', 'Object_Type_Space Debris', 
            'Object_Type_Space Station']

input_df = input_df[columns_order]

# Make prediction using the trained model
prediction = model.predict(input_df)

# Display the prediction result
print("Predicted Traffic Density:", prediction[0])


Predicted Traffic Density: 20.983
