In [66]:
# Ignore all warnings
from warnings import simplefilter
simplefilter(action='ignore')

# Import dependencies
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt

# Import the models from SKLearn (Model 1 through Model 6)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import pickle

# Local module
from ml_classification import model_performance

## Load data

In [67]:
# General Population data
genpop_df = pd.read_csv(Path('model_blackbox_genpop.csv'))

# Separate the X variable, the features
X = genpop_df.drop(columns='Classification')


# Get dummies
dummies_df = pd.get_dummies(X.drop(columns=['Age']))
X = pd.concat([X['Age'], dummies_df], axis=1)

# Get features from dataset
dataset_features = X.columns

## Load model

In [68]:
# Load model from file
with open('blackbox.model','rb') as f:
    model_1 = pickle.load(f)

# Get names of necessary features
model_features = model_1.feature_names_in_

## Prepare dataset for predictions

In [69]:
# Create empty list to store missing features
missing_features = []

# Create empty list to store extra features
extra_features = []

# Check if the dataset is missing features
if len(dataset_features) < len(model_features):

    # Loop through model features
    for feature in model_features:
        # Check if feature is missing and add missing feature to the list
        if feature not in dataset_features:
            missing_features.append(feature)

# Check if the dataset has too many features
elif len(dataset_features) > len(model_features):

    # Loop through dataset features
    for feature in dataset_features:
        # Check if feature is missing and add missing feature to the list
        if feature not in model_features:
            extra_features.append(feature)

# If the number of features is the same, make sure they are identical
if len(dataset_features) == len(model_features):
    
    # Loop through dataset features
    for feature in dataset_features:
        # Check if feature is missing and add missing feature to the list
        if feature not in model_features:
            extra_features.append(feature)

    # Loop through model features
    for feature in model_features:
        # Check if feature is missing and add missing feature to the list
        if feature not in dataset_features:
            missing_features.append(feature)


# Add missing features to dataset with value of 0
for mf in missing_features:
    X[mf] = 0

# Drop extra features from DataSet
X = X.drop(columns=extra_features)

In [70]:
# Data Preparation
X = X.values

## Make predictions

In [80]:
predictions_1 = model_1.predict(X)
genpop_df['Classification'] = predictions_1

In [82]:
genpop_df.loc[genpop_df['Classification']==1,:]

Unnamed: 0,Age,Gender,Race,Immigrant,Education,RelStatus,Employed,Work,MilService,Arrested,ParentDivorce,SES,MentalIllness,MentalIllnessHistory,Autism,HealthIssues,Classification
31,15,Female,White,No,Some college/trade school,Single,Not working,In between,No,Yes,No evidence,Middle class,No evidence,No evidence,Diagnosed or extremely likely,No evidence,1
41,31,Male,White,No,Bachelor's degree,Divorced/separated/widowed,Not Working,White collar,No,No,No evidence,Lower class,Yes,No evidence,No evidence,No evidence,1
72,50,Male,White,No,Graduate school/advanced degree,Single,Not Working,White collar,No,Yes,No evidence,Upper class,No evidence,No evidence,No evidence,No evidence,1
83,31,Female,Black,Yes,Bachelor's degree,Married,Working,White collar,No,No,No evidence,Upper class,Yes,No evidence,Diagnosed or extremely likely,No evidence,1
86,27,Male,White,No,Graduate school/advanced degree,Divorced/separated/widowed,Working,White collar,No,No,No evidence,Lower class,No evidence,No evidence,No evidence,No evidence,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9766,62,Female,White,No,Graduate school/advanced degree,Divorced/separated/widowed,Not Working,White collar,No,Yes,No evidence,Middle class,Yes,Yes,No evidence,No evidence,1
9815,47,Female,White,No,Graduate school/advanced degree,Married,Working,White collar,No,Yes,No evidence,Lower class,No evidence,No evidence,No evidence,No evidence,1
9826,39,Female,White,No,Bachelor's degree,Married,Not Working,White collar,No,Yes,No evidence,Upper class,Yes,No evidence,No evidence,No evidence,1
9853,36,Male,Latinx,Yes,Bachelor's degree,Single,Working,White collar,No,No,No evidence,Lower class,Yes,Yes,No evidence,Yes,1
