In [1]:
import joblib
import numpy as np
import pandas as pd

In [2]:
# Step 1: Load the trained model (ensure 'stacked_model.joblib' is in your local directory)
stacked_model = joblib.load('stacked_model_children.joblib')


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.



In [8]:
# Step 2: Define the feature columns based on the preprocessed data for children
feature_columns_children = [
    'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10',
    'Qchat-10-Score', 'Jaundice', 'Family_mem_with_ASD', 'Age_Years',
    'Sex_m', 'Ethnicity_Latino', 'Ethnicity_Native Indian', 'Ethnicity_Others',
    'Ethnicity_Pacifica', 'Ethnicity_White European', 'Ethnicity_asian',
    'Ethnicity_black', 'Ethnicity_middle eastern', 'Ethnicity_mixed',
    'Ethnicity_south asian', 'Who completed the test_Health care professional',
    'Who completed the test_Others', 'Who completed the test_Self',
    'Who completed the test_family member'
]

# Step 3: Define a new input for the children's data in the original format (example)
new_input_children = {
    'A1': 1, 'A2': 1, 'A3': 1, 'A4': 1, 'A5': 1,
    'A6': 1, 'A7': 1, 'A8': 1, 'A9': 1, 'A10': 1, 'Age_Mons': 12,
    'Qchat-10-Score': 10, 'Sex': '0', 'Ethnicity': 'Asian',
    'Jaundice': 'no', 'Family_mem_with_ASD': 'no',
    'Who completed the test': 'Parent', 'Class/ASD Traits': 'no'
}

In [9]:
# Step 4: Preprocess the input (convert original input to one-hot encoded format)
input_df_children = pd.DataFrame([new_input_children])

print("input data: ",input_df_children)

# One-hot encoding for categorical features
input_df_encoded_children = pd.get_dummies(input_df_children, columns=['Sex', 'Ethnicity', 'Jaundice', 'Family_mem_with_ASD', 'Who completed the test'])

# Convert 'Age_Mons' to 'Age_Years'
input_df_encoded_children['Age_Years'] = input_df_encoded_children['Age_Mons'] / 12

input data:     A1  A2  A3  A4  A5  A6  A7  A8  A9  A10  Age_Mons  Qchat-10-Score Sex  \
0   1   1   1   1   1   1   1   1   1    1         2               5   m   

        Ethnicity Jaundice Family_mem_with_ASD    Who completed the test  \
0  White European      yes                 yes  Health care professional   

  Class/ASD Traits  
0              yes  


In [10]:
# Step 5: Fill in any missing columns (created during one-hot encoding)
missing_cols_children = set(feature_columns_children) - set(input_df_encoded_children.columns)
for col in missing_cols_children:
    input_df_encoded_children[col] = 0

# Ensure the input data has the same column order as the training data
input_df_encoded_children = input_df_encoded_children[feature_columns_children]

# Step 6: Use the trained model to make predictions
prediction_children = stacked_model.predict(input_df_encoded_children)

In [11]:
# Output the prediction result
if prediction_children == 1:
    print("The child is predicted to be autistic.")
else:
    print("The child is predicted to be not autistic.")

The child is predicted to be autistic.


In [19]:
print(prediction_children[0])

0
