In [1]:
import pandas as pd
import joblib
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# Load the trained model, scaler, and status mapping
model = joblib.load('../artifacts/best_random_forest_orbital_parameters_model.joblib')
scaler = joblib.load('../artifacts/scaler.joblib')
status_mapping = joblib.load('../artifacts/status_mapping.joblib')

# Reverse the status mapping
reverse_status_mapping = {v: k for k, v in status_mapping.items()}

# Load the dataset
file_path = '../data/combined_df.csv'
data = pd.read_csv(file_path, low_memory=False)

# Selecting features for prediction
features = [
    'period_mins', 'perigee_km', 'apogee_km', 'inclination',
    'object_type', 'object_owner'
]

# Handling missing values in the test data
imputer = SimpleImputer(strategy='most_frequent')
data[features] = imputer.fit_transform(data[features])

# Encode categorical features
label_encoders = {}
for col in features:
    if data[col].dtype == 'object':
        label_encoders[col] = LabelEncoder()
        data[col] = label_encoders[col].fit_transform(data[col])

# Standardize the test data using the loaded scaler
test_data = data[features]
test_data_scaled = scaler.transform(test_data)

# Predict the status using the loaded model
predictions = model.predict(test_data_scaled)

# Map the numerical predictions to the original status labels
predicted_statuses = [reverse_status_mapping[pred] for pred in predictions]

# Convert the predicted statuses to a pandas Series to get a summary
predicted_statuses_series = pd.Series(predicted_statuses)

# Print a summary of the predicted statuses
print(predicted_statuses_series.value_counts())

# If you want to see the mapping of numbers to statuses for reference
print(f"Mapping of numbers to statuses: {reverse_status_mapping}")


R      28673
O      27851
L       1096
ERR      925
DK       675
N        289
D        182
E         93
Name: count, dtype: int64
Mapping of numbers to statuses: {0: 'R', 1: 'O', 2: 'N', 3: 'ERR', 4: 'L', 5: 'D', 6: 'E', 7: 'DK'}
