In [2]:
import pickle
import pandas as pd
import joblib
import numpy as np

# Load the models
PCA_model = pickle.load(open('./ADSv1.3/data/AA_models/vascular/RF_vascular_PCA.pkl', 'rb'))
cerebellum_model = pickle.load(open('./ADSv1.3/data/AA_models/lobe/RF_lobe_cerebellum.pkl', 'rb'))
midbrain_model = pickle.load(open('./ADSv1.3/data/AA_models/lobe/RF_lobe_midbrain&pons&brainstem.pkl', 'rb'))
thalamus_model = pickle.load(open('./ADSv1.3/data/AA_models/lobe/RF_lobe_thalamus.pkl', 'rb'))

# Load the laterality models and scalers
PCA_laterality_model = joblib.load('PCA_logistic_regression_model.pkl')
PCA_scaler = joblib.load('PCA_scaler.pkl')
cerebellum_laterality_model = joblib.load('cerebellum_logistic_regression_model.pkl')
cerebellum_scaler = joblib.load('cerebellum_scaler.pkl')
thalamus_laterality_model = joblib.load('Thalamus_logistic_regression_model.pkl')
thalamus_scaler = joblib.load('Thalamus_scaler.pkl')

# Load the dataset
data = pd.read_csv('final_expanded_qfv_data.csv')
subject_id = data['ID']  # Replace 'ID' with the actual column name for the subject ID in your dataset

# Prepare data for model inference
PCA_features = data[['logVol', 'ACA', 'MCA', 'PCA', 'cerebellar', 'basilar', 'Lenticulostriate', 'Choroidal&Thalamoperfurating', 'watershed']]
other_features = data[['logVol', 'basal ganglia', 'deep white matter', 'cerebellum', 'frontal', 'insula', 'internal capsule', 'brainstem', 'occipital', 'parietal', 'temporal', 'thalamus']]

# Make predictions
PCA_predictions = PCA_model.predict(PCA_features)
cerebellum_predictions = cerebellum_model.predict(other_features)
midbrain_predictions = midbrain_model.predict(other_features)
thalamus_predictions = thalamus_model.predict(other_features)

def predict_laterality(data, predictions, affected_value, left_col, right_col, laterality_model, scaler):
    if left_col in data.columns and right_col in data.columns:
        laterality_features = data[[left_col, right_col]]
        laterality_features_scaled = scaler.transform(laterality_features)

        laterality_predictions = np.zeros(len(predictions), dtype=int)
        mask_affected = (predictions == affected_value)
        laterality_predictions[mask_affected] = laterality_model.predict(laterality_features_scaled[mask_affected])

        def map_laterality(prediction):
            if prediction == 1:
                return 'Left'
            elif prediction == 2:
                return 'Right'
            elif prediction == 3:
                return 'Bilateral'
            else:
                return 'Not Affected'

        laterality_labels = [map_laterality(pred) if affected else 'Not Affected' 
                             for pred, affected in zip(laterality_predictions, mask_affected)]
    else:
        print(f"Warning: '{left_col}' and/or '{right_col}' columns not found. Skipping laterality prediction.")
        laterality_labels = ['Not Available'] * len(predictions)
    
    return laterality_labels

# Predict laterality for PCA, cerebellum, and thalamus
PCA_laterality_labels = predict_laterality(data, PCA_predictions, 1, 'PCAL_vol', 'PCAR_vol', PCA_laterality_model, PCA_scaler)
cerebellum_laterality_labels = predict_laterality(data, cerebellum_predictions, 1, 'cerebellumL_vol', 'cerebellumR_vol', cerebellum_laterality_model, cerebellum_scaler)
thalamus_laterality_labels = predict_laterality(data, thalamus_predictions, 1, 'ThalamusL_vol', 'ThalamusR_vol', thalamus_laterality_model, thalamus_scaler)

# Analyze the results
results = pd.DataFrame({
    'subject_id': subject_id,
    'PCA': PCA_predictions,
    'PCA_Laterality': PCA_laterality_labels,
    'Cerebellum': cerebellum_predictions,
    'Cerebellum_Laterality': cerebellum_laterality_labels,
    'Midbrain': midbrain_predictions,
    'Thalamus': thalamus_predictions,
    'Thalamus_Laterality': thalamus_laterality_labels
})

# Display results
print(results.head())

# Save results
results.to_csv('results.csv', index=False)

  subject_id  PCA PCA_Laterality  Cerebellum Cerebellum_Laterality  Midbrain  \
0    3338494    0   Not Affected         0.0          Not Affected         1   
1    4405376    0   Not Affected         0.0          Not Affected         1   
2    5087200    0   Not Affected         1.0                 Right         0   
3    9532194    1           Left         0.0          Not Affected         0   
4   20089560    0   Not Affected         0.0          Not Affected         1   

   Thalamus Thalamus_Laterality  
0         0        Not Affected  
1         0        Not Affected  
2         0        Not Affected  
3         0        Not Affected  
4         1                Left  


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
Feature names unseen at fit time:
- PCAL_vol
- PCAR_vol
Feature names seen at fit time, yet now missing:
- PCAL_x
- PCAR_x

Feature names unseen at fit time:
- cerebellumL_vol
- cerebellumR_vol
Feature names seen at fit time, yet now missing:
- cerebellumL_x
- cerebellumR_x

Feature names unseen at fit time:
- ThalamusL_vol
- ThalamusR_vol
Feature names seen at fit time, yet now missing:
- ThalamusL_x
- ThalamusR_x

