# Automated Model Evaluation

Now that we have a general model function, we can create a script that makes models out of different combinations of features and displays the best results.

In [8]:
import itertools
import pandas as pd
import GeneralModel as gm
from tqdm import tqdm_notebook as tqdm

In [2]:
# retrieves data
merged = pd.read_csv('../../DataPlus/feature_dataframe.csv')

In [3]:
model_df = gm.prepare_df(merged, cat_vars=['edu_binary', 'marry_binary', 'Advice1'], cont_vars=[])

# of Data Points: 358


In [5]:
fscore, metrics, auc_score, feat_info = gm.general_model(model_df, algorithm='rf', print_feat=True)


             Feature    Weight
0  No College Degree  0.193892
1        Not Married  0.112416
2                 AR  0.044520
3                 AS  0.051509
4                ASR  0.280635
5                  R  0.100839
6                  S  0.133658
7                 SR  0.082531


F-score: 0.469
AUC: 0.607


In [46]:
"""
    Given a set of categorical and continuous features, the function will iterate
    over the combinations of features and present the results in a dataframe
    
    Columns in dataframe: features (list), fscore, auc, feature importance (if applicable) 
    
    input:
        - df: original dataframe
        - sort: column by which to sort dataframe (by AUC or fscore)
        - algorithm: type of algorithm
        - cat_vars: list of categorical variables
        - cont_vars: list of continuous variables
"""
def model_evaluation(df, sort=None, algorithm='rf', cat_vars=['gleason'], cont_vars=['age']):
    total_features = cat_vars + cont_vars;
    results = pd.DataFrame();
    
    for var_set in tqdm(itertools.combinations(cat_vars, 2)):
        model_df = gm.prepare_df(df, cont_vars=['age'], cat_vars=list(var_set), print_dims=False)
        
        fscore, _, auc_score, feat_importance_df = gm.general_model(model_df, algorithm=algorithm, print_metrics=False, tqdm_on=False)
        
        feature_string = ', '.join(list(var_set))
        
        results = results.append(pd.DataFrame({'features': feature_string, 'fscore': fscore, 'auc': auc_score}, index=[0]))
        print(results)
    
    if sort is not None : results.sort(sort, ascending=False)
    
    return results

In [47]:
result = model_evaluation(merged, cat_vars=['marry_binary', 'gleason', 'edu_binary'])

        auc               features   fscore
0  0.722274  marry_binary, gleason  0.65751
        auc                  features    fscore
0  0.722274     marry_binary, gleason  0.657510
0  0.561897  marry_binary, edu_binary  0.419785
        auc                  features    fscore
0  0.722274     marry_binary, gleason  0.657510
0  0.561897  marry_binary, edu_binary  0.419785
0  0.695579       gleason, edu_binary  0.621407



In [41]:
result.head()

Unnamed: 0,AUC,features,fscore
0,0.730968,"marry_binary, gleason",0.665025
0,0.544274,"marry_binary, edu_binary",0.399158
0,0.692151,"gleason, edu_binary",0.610539
