In [None]:
import pandas as pd
import numpy as np
import joblib

# File Paths
model_path = '../models/voting_model.pkl'
test_data_path = '../data/raw/test.csv'
sample_submission_path = '../data/raw/sample_submission.csv'
scaler_rf_path = '../models/scaler_rf.pkl'
scaler_xgb_path = '../models/scaler_xgb.pkl'

# Load Test Data
test_data = pd.read_csv(test_data_path).copy()
X_test = test_data.drop(columns=['ID_code'])

# Load Saved Scalers
scaler_rf = joblib.load(scaler_rf_path)
scaler_xgb = joblib.load(scaler_xgb_path)

# Define Important Features for Random Forest and XGBoost
rf_features = [
    "var_81", "var_146", "var_12", "var_76", "var_174", "var_34", "var_21", "var_165",
    "var_109", "var_44", "var_166", "var_198", "var_192", "var_148", "var_33", "var_80",
    "var_169", "var_115", "var_92", "var_149", "var_154", "var_121", "var_107", "var_127",
    "var_122", "var_172", "var_177", "var_36", "var_108", "var_75", "var_188", "var_123",
    "var_87", "var_197", "var_86", "var_93", "var_31"
]
xgb_features = [
    "var_6", "var_53", "var_26", "var_110", "var_99", "var_190", "var_133", "var_22",
    "var_179", "var_2", "var_94", "var_40", "var_78", "var_173", "var_184", "var_170",
    "var_0", "var_1", "var_191", "var_67", "var_118", "var_147", "var_18", "var_164",
    "var_89", "var_35", "var_48", "var_95", "var_199", "var_155", "var_32", "var_5",
    "var_91", "var_90", "var_71", "var_157", "var_162", "var_130", "var_135", "var_52"
]

X_rf_test = X_test[rf_features]
X_xgb_test = X_test[xgb_features]

# Scale Test Data (using saved scalers)
X_rf_test_scaled = scaler_rf.transform(X_rf_test)
X_xgb_test_scaled = scaler_xgb.transform(X_xgb_test)

# Load Voting Model
voting_model = joblib.load(model_path)

# Make Predictions with Model
proba_predictions = voting_model.predict_proba(np.hstack((X_rf_test_scaled, X_xgb_test_scaled)))[:, 1]  # Probability of class 1

# Save Submission File
submission = pd.read_csv(sample_submission_path).copy()
submission['target'] = proba_predictions
submission.to_csv('../submission/voting_classifier_sample_submission.csv', index=False)

print("Submission file saved: '../submission/voting_classifier_sample_submission.csv'")