In [1]:
import joblib
import numpy as np
import pandas as pd

In [2]:
# Step 1: Load the trained model (ensure 'stacked_model.joblib' is in your local directory)
stacked_model = joblib.load('stacked_model.joblib')


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.



In [3]:
# Step 2: Manually define the feature columns based on the preprocessed data
feature_columns = [
    'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'age', 'result',
    'Sex_m', 'Ethnicity_Black', 'Ethnicity_Hispanic', 'Ethnicity_Latino',
    'Ethnicity_Middle Eastern', 'Ethnicity_Others', 'Ethnicity_Pasifika',
    'Ethnicity_South Asian', 'Ethnicity_Turkish', 'Ethnicity_White-European',
    'Jaundice_yes', 'austim_yes', 'contry_of_res_AmericanSamoa',
    'contry_of_res_Angola', 'contry_of_res_Armenia', 'contry_of_res_Aruba',
    'contry_of_res_Australia', 'contry_of_res_Austria', 'contry_of_res_Bahamas',
    'contry_of_res_Bangladesh', 'contry_of_res_Belgium', 'contry_of_res_Bolivia',
    'contry_of_res_Brazil', 'contry_of_res_Burundi', 'contry_of_res_Canada',
    'contry_of_res_Chile', 'contry_of_res_China', 'contry_of_res_Costa Rica',
    'contry_of_res_Cyprus', 'contry_of_res_Czech Republic', 'contry_of_res_Ecuador',
    'contry_of_res_Egypt', 'contry_of_res_Ethiopia', 'contry_of_res_Finland',
    'contry_of_res_France', 'contry_of_res_Germany', 'contry_of_res_Iceland',
    'contry_of_res_India', 'contry_of_res_Indonesia', 'contry_of_res_Iran',
    'contry_of_res_Ireland', 'contry_of_res_Italy', 'contry_of_res_Jordan',
    'contry_of_res_Malaysia', 'contry_of_res_Mexico', 'contry_of_res_Nepal',
    'contry_of_res_Netherlands', 'contry_of_res_New Zealand', 'contry_of_res_Nicaragua',
    'contry_of_res_Niger', 'contry_of_res_Oman', 'contry_of_res_Pakistan',
    'contry_of_res_Philippines', 'contry_of_res_Portugal', 'contry_of_res_Romania',
    'contry_of_res_Russia', 'contry_of_res_Saudi Arabia', 'contry_of_res_Serbia',
    'contry_of_res_Sierra Leone', 'contry_of_res_South Africa', 'contry_of_res_Spain',
    'contry_of_res_Sri Lanka', 'contry_of_res_Sweden', 'contry_of_res_Tonga',
    'contry_of_res_Turkey', 'contry_of_res_Ukraine', 'contry_of_res_United Arab Emirates',
    'contry_of_res_United Kingdom', 'contry_of_res_United States', 'contry_of_res_Uruguay',
    'contry_of_res_Viet Nam', 'used_app_before_yes', 'relation_Others',
    'relation_Parent', 'relation_Relative', 'relation_Self'
]

In [4]:
# Step 3: Define a new input in the original format (example)
new_input = {
    'A1': 1, 'A2': 1, 'A3': 1, 'A4': 1, 'A5': 1, 'A6': 1, 'A7': 0, 'A8': 0, 'A9': 1, 'A10': 1,
    'age': 25, 'Sex': 'm', 'Ethnicity': 'White-European', 'Jaundice': 'yes', 'austim': 'no',
    'contry_of_res': 'Canada', 'used_app_before': 'yes', 'result': 8, 'age_desc': '18 and more', 'relation': 'Parent'
}

# Step 4: Preprocess the input (convert original input to one-hot encoded format)
input_df = pd.DataFrame([new_input])

print(input_df)

# One-hot encoding for categorical features
input_df_encoded = pd.get_dummies(input_df, columns=['Sex', 'Ethnicity', 'Jaundice', 'austim', 'contry_of_res', 'used_app_before', 'relation'])

# Step 5: Fill in any missing columns (created during one-hot encoding)
missing_cols = set(feature_columns) - set(input_df_encoded.columns)
for col in missing_cols:
    input_df_encoded[col] = 0

# Ensure the input data has the same column order as the training data
input_df_encoded = input_df_encoded[feature_columns]

# Step 6: Use the trained model to make predictions
prediction = stacked_model.predict(input_df_encoded)

   A1  A2  A3  A4  A5  A6  A7  A8  A9  A10  age Sex       Ethnicity Jaundice  \
0   1   1   1   1   1   1   0   0   1    1   25   m  White-European      yes   

  austim contry_of_res used_app_before  result     age_desc relation  
0     no        Canada             yes       8  18 and more   Parent  


In [23]:
# Since it's a binary classification (0 = Not Autistic, 1 = Autistic)
if prediction == 1:
    print("The person is predicted to be autistic.")
else:
    print("The person is predicted to be not autistic.")

The person is predicted to be autistic.
