# Setup stuff

In [None]:
#=====[ Setup - don't modify ]=====
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys
import csv
import pandas as pd
import string
import os
from plotting_tools import *
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import datetime

from helper_functions import *

## Define run parameters

In [None]:
purpose_of_run = 'for_plotting'
#purpose_of_run = 'for_tables'

desired_specificity_bin_width = 0.01 if purpose_of_run=='for_tables' else 0.05

training_only_scenarios_dict = collections.OrderedDict([
    ('default', {'age_split': False, 'full_desc': 'full default, no encoding or filtering, one age bin, default features selection', 'latex_desc': 'all default'}),
    ('balanced_tally', {'age_split': False, 'full_desc': 'no encoding or filtering, one age bin, tally features selection, balanced', 'latex_desc': 'tally selection, others default, balanced'}),
    ('age_binned', {'age_split': True, 'young_prefix': 'young_age', 'old_prefix': 'old_age', 'full_desc': 'no encoding or filtering, tally features selection, age binned', 'latex_desc': 'age binned, no encoding'}),
    ('age_binned_encoded', {'age_split': True, 'young_prefix': 'young_encoded', 'old_prefix': 'old_encoded', 'full_desc': 'tally features selection, age binned, encoded', 'latex_desc': 'age binned, with encoding'}),
])

clinical_scenarios_dict = collections.OrderedDict([
     ('scalar_encoding_all_features', {'encoding': 'scalar', 'feature_restriction': None, 'latex_desc': 'Scalar, all features'}),
     ('scalar_encoding_restricted_features', {'encoding': 'scalar', 'feature_restriction': [], 'latex_desc': 'Scalar, restricted features'}),
     ('production_encoding_restricted_features', {'encoding': 'production', 'feature_restriction': [], 'latex_desc': 'Encoded, restricted features'}),
     ('production_encoding_restricted_features_proportional_loss', {'encoding': 'production', 'feature_restriction': [], 'inject_loss': 'proportional', 'latex_desc': 'Proportional loss'}),
     ('exact_production_optimization', {'encoding': 'production', 'feature_restriction': [], 'inject_loss': 'proportional', 'latex_desc': 'Exact prod'}),
     ('age_binned_official', {'age_split': True, 'young_prefix': 'young_age_official', 'old_prefix': 'old_age_official', 'full_desc': 'no encoding or filtering, official features selection, age binned', 'latex_desc': 'Age silo variant', 'color': 'red', 'linestyle': 'dashed', 'linewidth': 5}),
     ('age_binned_encoded_official', {'age_split': True, 'young_prefix': 'young_encoded_official', 'old_prefix': 'old_encoded_official','full_desc': 'official features selection, encoded', 'latex_desc': 'Severity-level feature encoding variant', 'color': 'blue', 'linestyle': 'dotted', 'linewidth': 5}),
     ('engineering_official', {'age_split': True, 'young_prefix': 'young_engineered_official', 'old_prefix': 'old_engineered_official','full_desc': 'official features selection, encoded, feature engineering', 'latex_desc': 'Aggregate features variant'}), 
     ('exact_optimization', {'age_split': True, 'young_prefix': 'young_age_exact', 'old_prefix': 'old_age_exact', 'latex_desc': 'Exact'}),
])
algorithms = collections.OrderedDict([    
    ('guardian.qnnaire.3-', {'filter_key': 'Guardian Qnnaire Version', 'filter_value': '3-'}),
    ('guardian.qnnaire.4+', {'filter_key': 'Guardian Qnnaire Version', 'filter_value': '4+'}),
    ('video.module1', {'filter_key': ' Video Version', 'filter_value': 'module1'}),
    ('video.module2', {'filter_key': ' Video Version', 'filter_value': 'module2'})
])
clinical_scenarios = clinical_scenarios_dict.keys()

metrics_to_do = collections.OrderedDict([
    ('AUC', {'desc': 'AUC'}),
    ('average_recall', {'desc': 'Average Recall'}),
    ('average_precision_dataset', {'desc': 'Average Precision [Dataset]', 'key': 'average precision [Dataset]'}),
    ('average_precision_dataset', {'desc': 'Average Precision [Dataset]'}),
    ('autism_recall', {'desc': 'Autism Recall'}),
    ('not_recall', {'desc': 'Not Recall'})
])
ages_dict = {
    'guardian.qnnaire.3-': 'young',
    'guardian.qnnaire.4+': 'old',
    'questionnaire': 'combined',
    'video.module.1': 'young',
    'video.module.2': 'old',
    'video': 'combined'
}
print 'baseline stuff defined. purpose of run is ', purpose_of_run
sys.stdout.flush()


## Helper functions

In [None]:
def get_most_recent_filename_with_prefix_suffix_and_date(prefix, suffix, directory='.', debug=False, max_date=None):
    ''' Get most recent filename with a given prefix, suffix, and date in a given directory '''
    filenames_matching_constraints = [my_file for my_file in os.listdir(directory) if my_file.startswith(prefix)\
                                      and my_file.endswith(suffix)]
    all_dates = []
    max_date_so_far = None
    latest_filename = None
    if debug:
        print 'get most recent file from ', filenames_matching_constraints
    for filename in filenames_matching_constraints:
        date_str = filename.split(prefix)[1].split(suffix)[0]
        if date_str[0]=='_': date_str = date_str[1:]
        if debug:
            print 'date_str: ', date_str
        try:
            #print 'get my_datetime from date_str: ', date_str
            my_datetime = datetime.datetime.strptime(date_str, '%m.%d.%y')
            if max_date is not None and my_datetime > max_date:
                print 'datetime ', my_datetime, ' is past max date of ', max_date, ', so skip this.'
                continue
        except:
            'File ', filename, ', has no recognized datetime'
            continue
        if debug:
            print 'my_datetime: ', my_datetime
        if max_date_so_far is None:
            max_date_so_far = my_datetime
            latest_filename = directory + '/' + filename
        elif my_datetime > max_date_so_far:
            max_date_so_far = my_datetime
            latest_filename = directory + '/' + filename
    if debug:
        print 'most recent one is ', latest_filename
    #print 'directory: ', directory, ', prefix: ', prefix, ', suffix: ', suffix
    return latest_filename

### Load baseline comparison models

In [None]:
dataFilePath = # PATH TO MCHAT-R BASELINE DATA FILE
f = open(dataFilePath, 'rU')
dictReader = csv.DictReader(f, delimiter='\t')
data = []
for row in dictReader:
    data.append(row)

mchat_df = pd.DataFrame(data)
mchat_df['Mchat Final Score'] = mchat_df['Mchat Final Score'].apply(lambda x: None if x=='' else float(x))
mchat_df['Patient Id'] = mchat_df['Patient Id'].astype(int)
mchat_df.shape

dataFilePath = # PATH DO CBCL BASELINE DATA FILE
f = open(dataFilePath, 'rU')
dictReader = csv.DictReader(f, delimiter='\t')
data = []
for row in dictReader:
    data.append(row)

cbcl_df = pd.DataFrame(data)

cbcl_df['CBCL Score'] = cbcl_df['Autism Spectrum Problems Total Score'].apply(lambda x: float(x) if len(x)>0 else np.NaN)
cbcl_df['Patient Id'] = cbcl_df['Patient Id'].astype(int)
cbcl_df.shape

In [None]:
def make_clinical_latex_tables(clinical_metric_dfs, desc):
    metrics_to_combine = ['AUC', 'not_precision', 'autism_precision', 'average_precision_dataset', 'not_recall']
    comb_latex_df = None
    full_metric_dfs = cp.deepcopy(clinical_metric_dfs)

    def get_metric_column(metric_name):
        if 'key' in metrics_to_do[metric_name].keys():
            return metrics_to_do[metric_name]['key']
        else:
            return metric_name.replace('_', ' ')

    ### Determine training results and merge with clinical results df
    for metric_name, metric_details in metrics_to_do.iteritems():
        for algorithm in algorithms.keys():
            if algorithm == 'video.module.1' or algorithm == 'video.module.2':
                continue


            optimized_filename = '../training_code/optimized_settings/optimal_parameters_for_'+algorithm+'_only_official_features.csv'
            optimized_df = pd.read_csv(optimized_filename)
            print 'for algorithm: ', algorithm, ', filename: ', optimized_filename
            print 'clinical scenarios dict: ', clinical_scenarios_dict
            print 'scenarios: ', optimized_df['scenario'].values
            optimized_df['latex_scenario'] = [clinical_scenarios_dict[scenario]['latex_desc'] for scenario in\
                                    optimized_df['scenario'].values ]
            if 'average precision [Dataset]' not in optimized_df.columns:
                optimized_df['average precision [Dataset]'] = 0.5*(optimized_df['autism precision [Dataset]'] + optimized_df['not precision [Dataset]'])
            print 'metric: ', metric_name, ', algo: ', algorithm, ', optimized_df: ', optimized_df



            columns_to_use = ['latex_scenario', get_metric_column(metric_name)]
                    
            print 'optimized_df columns: ', list(optimized_df.columns)
            print 'columns_to_use: ', columns_to_use
            relevant_part_of_df = cp.deepcopy(optimized_df[columns_to_use])
            relevant_part_of_df.columns = ['latex_scenario', 'train '+algorithm]
            full_metric_dfs[metric_name] = full_metric_dfs[metric_name].merge(relevant_part_of_df, on='latex_scenario',
                                                            how='outer')

        print 'For metric: ', metric_name, ', cols: ', list(full_metric_dfs[metric_name].columns)
        full_metric_dfs[metric_name].index = full_metric_dfs[metric_name]['latex_scenario']
        full_metric_dfs[metric_name].index.names = [metric_details['desc']]
        #numeric_columns = [ 'guardian.qnnaire.3-', 'guardian.qnnaire.4+', 'video.module.1', 'video.module.2', 'train guardian.qnnaire.3-', 'train guardian.qnnaire.4+', 'train video.module.1', 'train video.module.2']
        numeric_columns = [ 'guardian.qnnaire.3-', 'guardian.qnnaire.4+', 'questionnaire']
        full_metric_dfs[metric_name][numeric_columns] = full_metric_dfs[metric_name][numeric_columns].round(2)

        print 'For metric: ', metric_name, ', after merging training optimizations, full avg recall df: ', full_metric_dfs[metric_name]

        cols_to_write = ['guardian.qnnaire.3-', 'guardian.qnnaire.4+', 'video.module.1', 'video.module.2', 'train guardian.qnnaire.3-', 'train guardian.qnnaire.4+', 'train video.module.1', 'train video.module.2']
        latex_filename = 'clinical_optimized_performance_'+metric_name+'.tex'

        to_latex_df = cp.deepcopy(full_metric_dfs[metric_name][cols_to_write])
            #to_latex_df.columns = ['scenario', 'All '+metric, '$< 4$ '+metric, '$\geq 4$ '+metric]


        to_latex_df.columns = ['$< 4$ yr', '$\\geq 4$ yr', 'All']
        to_latex_df = to_latex_df[['All', '$< 4$ yr', '$\\geq 4$ yr']]
        to_latex_df.to_latex(buf=latex_filename, index=True)
        to_latex_df.columns = ['All '+metric_name, '$< 4$ yr '+metric_name, '$\\geq 4$ yr '+metric_name]


        if metric_name in metrics_to_combine:
            if comb_latex_df is None:
                comb_latex_df = cp.deepcopy(to_latex_df)
            else:
                comb_latex_df = comb_latex_df.merge(to_latex_df, left_index=True, right_index=True)

    comb_latex_df.index.name = 'scenario'
    print 'comb_latex_df to write, before conversions of strings: ', comb_latex_df
    latex_string = comb_latex_df.to_latex(escape=False, index=True)
    #### These lines are to make the table multicell and put the metrics in the multicell contents
    latex_string = latex_string.replace('lrrrrrrrrr', 'p{3.5cm}|p{1cm}p{1cm}p{1cm}|p{1cm}p{1cm}p{1cm}|p{1cm}p{1cm}p{1cm}')
    latex_string = latex_string.replace('\\toprule', '\\hline\n & \\multicolumn{3}{|c|}{AUC} & \\multicolumn{3}{c|}{Average precision @ 80\% sensitivity} & \\multicolumn{3}{c}{Specificity @ 80\% sensitivity} \\\\')
    #### This is to correct a multicell display error
    latex_string = latex_string.replace('\\midrule', '\\hline')
    latex_string = latex_string.replace('\\bottomrule', '\\hline')
    ##### This part is to remove the extra column labels that will now go in the header of the table
    #latex_string = latex_string.replace(' AUC ', '')
    #latex_string = latex_string.replace(' autism_recall ', '')
    #latex_string = latex_string.replace(' not_recall ', '')
    #latex_string = latex_string.replace(' average_precision_dataset ', '')
    ##### This part is to deal with some weird bug that splits a row
    latex_string = latex_string.replace('scenario                 &          &            &               &                    &                      &                         &                 &                   &                      \\\\', '')
    latex_string = latex_string.replace('scenario                 &          &            &               &                                 &                                   &                                      &                 &                   &                      \\\\', '')
    latex_string = latex_string.replace('{}', 'scenario')


    latex_file_out = open(desc+'_clinical_optimization_results_questionnaire_combined.tex', 'w')
    latex_file_out.write(latex_string)
    latex_file_out.close()



## def make_clinical_latex_tables(clinical_metric_dfs, desc):
    metrics_to_combine = ['AUC', 'average_precision_dataset', 'not_recall']
    comb_latex_df = None
    full_metric_dfs = cp.deepcopy(clinical_metric_dfs)

    def get_metric_column(metric_name):
        if 'key' in metrics_to_do[metric_name].keys():
            return metrics_to_do[metric_name]['key']
        else:
            return metric_name.replace('_', ' ')

    ### Determine training results and merge with clinical results df
    for metric_name, metric_details in metrics_to_do.iteritems():
        for algorithm in algorithms.keys():
            if algorithm == 'video.module.1' or algorithm == 'video.module.2':
                continue


            optimized_filename = '../training_code/optimized_settings/optimal_parameters_for_'+algorithm+'_only_official_features.csv'
            optimized_df = pd.read_csv(optimized_filename)
            print 'for algorithm: ', algorithm, ', filename: ', optimized_filename
            print 'clinical scenarios dict: ', clinical_scenarios_dict
            print 'scenarios: ', optimized_df['scenario'].values
            optimized_df['latex_scenario'] = [clinical_scenarios_dict[scenario]['latex_desc'] for scenario in\
                                    optimized_df['scenario'].values ]
            print 'metric: ', metric_name, ', algo: ', algorithm, ', optimized_df: ', optimized_df



            columns_to_use = ['latex_scenario', get_metric_column(metric_name)]
            relevant_part_of_df = cp.deepcopy(optimized_df[columns_to_use])
            relevant_part_of_df.columns = ['latex_scenario', 'train '+algorithm]
            full_metric_dfs[metric_name] = full_metric_dfs[metric_name].merge(relevant_part_of_df, on='latex_scenario',
                                                            how='outer')

        print 'For metric: ', metric_name, ', cols: ', list(full_metric_dfs[metric_name].columns)
        full_metric_dfs[metric_name].index = full_metric_dfs[metric_name]['latex_scenario']
        full_metric_dfs[metric_name].index.names = [metric_details['desc']]
        #numeric_columns = [ 'guardian.qnnaire.3-', 'guardian.qnnaire.4+', 'video.module.1', 'video.module.2', 'train guardian.qnnaire.3-', 'train guardian.qnnaire.4+', 'train video.module.1', 'train video.module.1']
        numeric_columns = [ 'guardian.qnnaire.3-', 'guardian.qnnaire.4+', 'questionnaire']
        full_metric_dfs[metric_name][numeric_columns] = full_metric_dfs[metric_name][numeric_columns].round(3)

        print 'For metric: ', metric_name, ', after merging training optimizations, full avg recall df: ', full_metric_dfs[metric_name]

        cols_to_write = ['guardian.qnnaire.3-', 'guardian.qnnaire.4+', 'video.module.1', 'video.module.2', 'train guardian.qnnaire.3-', 'train guardian.qnnaire.4+', 'train video.module.1', 'train video.module.1']





        latex_filename = 'clinical_optimized_performance_'+metric_name+'.tex'

        to_latex_df = cp.deepcopy(full_metric_dfs[metric_name][cols_to_write])
        to_latex_df.columns = ['scenario', 'All '+metric, '$< 4$ '+metric, '$\geq 4$ '+metric]


        to_latex_df.columns = ['$< 4$', '$\geq 4$', 'All']
        to_latex_df = to_latex_df[['All', '$< 4$', '$\geq 4$']]
        to_latex_df.to_latex(buf=latex_filename, index=True)
        to_latex_df.columns = ['All '+metric_name, '$< 4$ '+metric_name, '$\geq 4$ '+metric_name]


        if metric_name in metrics_to_combine:
            if comb_latex_df is None:
                comb_latex_df = cp.deepcopy(to_latex_df)
            else:
                comb_latex_df = comb_latex_df.merge(to_latex_df, left_index=True, right_index=True)

    comb_latex_df.index.name = 'scenario'
    latex_string = comb_latex_df.to_latex(escape=False, index=True)
    #### These lines are to make the table multicell and put the metrics in the multicell contents
    latex_string = latex_string.replace('lrrrrrrrrr', 'p{3.5cm}|p{1cm}p{1cm}p{1cm}|p{1cm}p{1cm}p{1cm}|p{1cm}p{1cm}p{1cm}')
    latex_string = latex_string.replace('\\toprule', '\\hline\n & \\multicolumn{3}{|c|}{AUC} & \\multicolumn{3}{c|}{Average precision} & \\multicolumn{3}{c}{Specificity} \\\\')
    #### This is to correct a multicell display error
    latex_string = latex_string.replace('\\midrule', '\\hline')
    latex_string = latex_string.replace('\\bottomrule', '\\hline')
    ##### This part is to remove the extra column labels that will now go in the header of the table
    latex_string = latex_string.replace(' AUC ', '')
    latex_string = latex_string.replace(' autism_recall ', '')
    latex_string = latex_string.replace(' not_recall ', '')
    latex_string = latex_string.replace(' average_precision_dataset ', '')
    ##### This part is to deal with some weird bug that splits a row
    latex_string = latex_string.replace('scenario                 &          &            &               &                    &                      &                         &                 &                   &                      \\\\', '')
    latex_string = latex_string.replace('scenario                 &          &            &               &                                 &                                   &                                      &                 &                   &                      \\\\', '')
    latex_string = latex_string.replace('{}', 'scenario')


    latex_file_out = open(desc+'_clinical_optimization_results_questionnaire_combined.tex', 'w')
    latex_file_out.write(latex_string)
    latex_file_out.close()


## Load models and clinical data

In [None]:

model_directory = # INSERT PATH TO YOUR OWN MODELS TO VALIDATE
clinical_data_directory = # INSERT PATH TO YOUR OWN CLINICAL VALIDATION DATA
input_video_data = clinical_data_directory + 'clinical_study_video_model_application_response.csv'
input_questionnaire_data = clinical_data_directory + 'clinical_study_guardian_qnnaire_model_application_response.csv'

### This is what we will save out
clinical_data_dfs = {}   ### one dataframe for each algorithm (qnnaires and videos)
models_structure_dict = collections.OrderedDict(    ### a model for each combination of algorithm and scenario
    (algorithm, {}) for algorithm in algorithms
)

def convert_to_int_str(in_val):
    try:
        return str(int(in_val))
    except:
        return 'missing'

### Get data to use
for algorithm, algo_specs in algorithms.iteritems():
    data_filename_to_use = input_video_data if 'video' in algorithm else input_questionnaire_data
    clinical_data_dfs[algorithm] = pd.read_csv(data_filename_to_use)
    filter_column = algo_specs['filter_key']
    filter_value = algo_specs['filter_value']
    clinical_data_dfs[algorithm] = clinical_data_dfs[algorithm][clinical_data_dfs[algorithm][filter_column]==filter_value]
    ### Convert relevant columns to strings:
    for column in clinical_data_dfs[algorithm].columns:
        if 'video_instrument' in column or 'guardian_qnnaire_instrument' in column:
            clinical_data_dfs[algorithm][column] = [convert_to_int_str(ele) for ele in clinical_data_dfs[algorithm][column].values]
            #.astype(str
            
    #### Filter to only IDs that are also in the clinical video dataset
    print 'algorithm ', algorithm, ', num children before video ID filter: ', len(clinical_data_dfs[algorithm].index)
    clinical_data_dfs[algorithm] = clinical_data_dfs[algorithm][clinical_data_dfs[algorithm]['Patient Id'].isin(video_df_ids)]
    print 'algorithm ', algorithm, ', num children after video ID filter: ', len(clinical_data_dfs[algorithm].index)


    
### Get models
for algorithm in algorithms.keys():
    for scenario, scenario_details in clinical_scenarios_dict.iteritems():
        if ages_dict[algorithm] == 'young':
            print 'scenario: ', scenario, ', details: ', scenario_details
            prefix = scenario_details['young_prefix']
        elif ages_dict[algorithm] == 'old':
            prefix = scenario_details['old_prefix']
        this_desc  = algorithm + '_' + scenario
        print 'Get and apply models for algorithm: ', algorithm, ', scenario: ', scenario
        suffix = '.model'
        
        model_filename = get_most_recent_filename_with_prefix_suffix_and_date(prefix=prefix, suffix=suffix,
                                            directory=model_directory, debug=True, max_date=datetime.datetime(2017,1,5))
        
        if scenario == 'scalar_encoding_all_features':
            print 'Cannot apply model to clinical data for scenario: ', scenario
            continue
        if model_filename is None:
            print 'Scenario: ', scenario, ', algorithm: ', algorithm, ', not defined. Skip.'
            continue
        #print 'load model ', model_filename
        models_structure_dict[algorithm][scenario] = load_model(model_filename)
        print 'Got model for ', this_desc
        
        

### Determine model responses and recalls

In [None]:
                    
def get_metric_val(metric_name, AUC, autism_recall, not_recall, autism_precision, not_precision):
    
    print 'metric_name: ', metric_name
    metric_value = None
    if metric_name == 'AUC':
        metric_value = AUC
    elif metric_name == 'average_recall':
        metric_value = (autism_recall + not_recall) / 2.
    elif metric_name == 'average_precision_dataset':
        metric_value = (autism_precision + not_precision) / 2.
    elif metric_name == 'autism_recall':
        metric_value = autism_recall
    elif metric_name == 'not_recall':
        metric_value = not_recall
    else:
        raise NotImplementedError('Metric: '+metric_name+' not implemented yet')   
    return metric_value

def get_optimized_metrics_from_model_responses(responses_2d, class_names, class_priors, truth_vals, desired_autism_recall, desc, find_optimal_dunno=True):
    best_delta_from_desired_so_far = None
    best_model_metrics_so_far = None
    best_threshold_so_far = None
    responses = [ele[0] for ele in responses_2d]
    threshold_vals = np.arange(0.3, 0.9, 0.01)
    #print 'threshold vals: ', threshold_vals
    for threshold in threshold_vals:
        print 'threshold: ', threshold
        print 'responses: ', responses
        y_predicted_without_dunno = np.array(['autism' if response > threshold else 'not' for response in responses])
        y_predicted_with_dunno = np.array(y_predicted_without_dunno)
        model_metrics = get_classifier_performance_metrics(class_names, class_priors, truth_vals, 
                    y_predicted_without_dunno, y_predicted_with_dunno, responses_2d)
        autism_recall = model_metrics['without_dunno']['dataset_recall_per_class']['autism']
        not_recall = model_metrics['without_dunno']['dataset_recall_per_class']['not']
        delta_from_desired = abs(autism_recall - desired_autism_recall)
        if best_delta_from_desired_so_far is None or best_delta_from_desired_so_far > delta_from_desired:
            best_model_metrics_so_far = cp.deepcopy(model_metrics)
            best_delta_from_desired_so_far = delta_from_desired
            best_threshold_so_far = threshold
            print 'best so far'
    
    print 'For ', desc, ', best threshold was: ', best_threshold_so_far, ', with autism recall: ',\
        best_model_metrics_so_far['without_dunno']['dataset_recall_per_class']['autism'], ', not recall: ',\
        best_model_metrics_so_far['without_dunno']['dataset_recall_per_class']['not']
        
    if find_optimal_dunno:
        def get_outcome_with_dunno(response, dunno_range):
            if response < dunno_range[0]: return 'not'
            elif response < dunno_range[1]: return 'dunno'
            else: return 'autism'
            
        low_offset_grid = np.arange(0., 0.2, 0.02)
        high_offset_grid = np.arange(0., 0.2, 0.02)
        coverage_limit = 0.75
#         print 'Try dunno loop'
        best_average_recall_so_far_with_dunno = None
        best_model_metrics_so_far_with_dunno = None
        for low_offset in low_offset_grid:
            low_threshold = best_threshold_so_far - low_offset
            for high_offset in high_offset_grid:
                high_threshold = best_threshold_so_far + high_offset
                dunno_range = [low_threshold, high_threshold]
                y_predicted_without_dunno = np.array(['autism' if response > best_threshold_so_far else 'not' for response in responses])
                y_predicted_with_dunno = np.array([get_outcome_with_dunno(response, dunno_range) for response in responses])
                #print 'zipy zip: ', zip(responses, y_predicted_without_dunno, y_predicted_with_dunno, )
                dunno_model_metrics = get_classifier_performance_metrics(class_names, class_priors, truth_vals,y_predicted_without_dunno, y_predicted_with_dunno, responses_2d)
                autism_recall = dunno_model_metrics['without_dunno']['dataset_recall_per_class']['autism']
                not_recall = dunno_model_metrics['without_dunno']['dataset_recall_per_class']['not']
                coverage = dunno_model_metrics['with_dunno']['dataset_classification_rate']
                autism_recall_with_dunno = dunno_model_metrics['with_dunno']['dataset_recall_per_class_where_classified']['autism']
                not_recall_with_dunno = dunno_model_metrics['with_dunno']['dataset_recall_per_class_where_classified']['not']
#                 print 'For desc ', desc, ', threshold: ', best_threshold_so_far, ', dunno: ', dunno_range, ', coverage: ', coverage
                if coverage < coverage_limit: continue
                average_recall = 0.5*(autism_recall + not_recall)
                average_recall_with_dunno = 0.5*(autism_recall_with_dunno + not_recall_with_dunno)
#                 print 'autism_recall with dunno now: ', autism_recall_with_dunno, ', improved from ', autism_recall
#                 print 'not_recall with dunno now: ', not_recall_with_dunno, ', improved from ', not_recall


                if best_average_recall_so_far_with_dunno is not None and best_average_recall_so_far_with_dunno >= average_recall_with_dunno: continue
                
                print 'get best with dunno'
                best_model_metrics_so_far_with_dunno  = dunno_model_metrics
                best_average_recall_so_far_with_dunno = average_recall_with_dunno
                print 'New best found'
        return best_model_metrics_so_far_with_dunno
    else:
        return best_model_metrics_so_far


def append_metric_results_to_dict_of_lists(clinical_optimized_metric_dict, model_metrics, scenario):
    ''' After appending to the results, results will get turned into a dataframe '''     
    autism_recall = model_metrics['without_dunno']['dataset_recall_per_class']['autism']
    not_recall = model_metrics['without_dunno']['dataset_recall_per_class']['not']
    autism_precision = model_metrics['without_dunno']['dataset_precision_per_class']['autism']
    not_precision = model_metrics['without_dunno']['dataset_precision_per_class']['not']
            
    print 'get recalls and preciions with dunno'
    autism_recall_with_dunno = model_metrics['with_dunno']['dataset_recall_per_class_where_classified']['autism']
    not_recall_with_dunno = model_metrics['with_dunno']['dataset_recall_per_class_where_classified']['not']
    autism_precision_with_dunno = model_metrics['with_dunno']['dataset_precision_per_class_where_classified']['autism']
    not_precision_with_dunno = model_metrics['with_dunno']['dataset_precision_per_class_where_classified']['not']


            
    print 'For this_desc: ', this_desc
    print 'precisions: ', autism_precision, ', ', not_precision
    AUC = model_metrics['without_dunno']['auc']
    for metric_name, metric_details in metrics_to_do.iteritems():
        metric_value = get_metric_val(metric_name, AUC, autism_recall, not_recall, autism_precision, not_precision)
        print 'Metric value for ', this_desc, ', metric: ', metric_name, ': ', metric_value
        #print 'clinical_optimized_avg_recall_dict[algorithm]: ', clinical_optimized_avg_recall_dict[algorithm]
        clinical_optimized_metric_dict[metric_name][algorithm].append(metric_value)
        if scenario != 'engineering_official': continue
        print 'get dunno version of metric:'
        metric_val_with_dunno = get_metric_val(metric_name, AUC, autism_recall_with_dunno, 
                            not_recall_with_dunno, autism_precision_with_dunno, not_precision_with_dunno)
        clinical_optimized_metric_dict[metric_name][algorithm].append(metric_val_with_dunno)
    return clinical_optimized_metric_dict

         AUC = model_metrics['without_dunno']['auc']
         not_recall = model_metrics['without_dunno']['dataset_recall_per_class']['not']
         autism_precision = model_metrics['without_dunno']['reallife_precision_per_class']['autism']
         not_precision = model_metrics['without_dunno']['reallife_precision_per_class']['not']



class_priors = [(1.0/2.0), (1.0/2.0)]

### This is what we will fill in 
old_clinical_optimized_metric_dict = collections.OrderedDict()
for metric_name, metric_details in metrics_to_do.iteritems():
    old_clinical_optimized_metric_dict[metric_name] = collections.OrderedDict(
        (algorithm, []) for algorithm in algorithms
    )

for metric_name, metric_details in metrics_to_do.iteritems():
    old_clinical_optimized_metric_dict[metric_name]['scenario'] = clinical_scenarios + ['engineering_official_dunno']
    old_clinical_optimized_metric_dict[metric_name]['latex_scenario'] = [value['latex_desc'] for value in clinical_scenarios_dict.values()]+['with 25\\% inconclusive allowance']

# metrics_to_do = collections.OrderedDict([
#     ('average_recall', {'desc': 'Average Recall'}),
#     ('autism_recall', {'desc': 'Autism Recall'}),
#     ('not_recall', {'desc': 'Not Recall'})
# ])
for algorithm in algorithms.keys():
    for scenario in clinical_scenarios:
        this_desc  = algorithm + '_' + scenario
        try:    
            print 'get prep fn for ', this_desc
            #print 'data prep fn: ', models_structure_dict[algorithm][scenario]['data_prep_function']
            X,y = models_structure_dict[algorithm][scenario]['data_prep_function'](clinical_data_dfs[algorithm], models_structure_dict[algorithm][scenario])
            if scenario == 'age_binned_official': 
                X = X[sorted(X.columns)]
            
            apply_output = models_structure_dict[algorithm][scenario]['apply_function'](X, y, models_structure_dict[algorithm][scenario])
            
            model_response = [x[0] for x in apply_output['model_response']]
            model_response_2d = apply_output['model_response']

            labels = clinical_data_dfs[algorithm][models_structure_dict[algorithm][scenario]['target']]
            clinical_data_dfs[algorithm]['model_response_'+scenario] = model_response
            clinical_data_dfs[algorithm]['outcome'] = labels
            clinical_data_dfs[algorithm]['classifier_variant'] = algorithm
            class_names = sorted(labels.unique())
            orig_model_metrics = get_classifier_performance_metrics(class_names, class_priors, labels, apply_output['y_predicted_without_dunno'], apply_output['y_predicted_with_dunno'], apply_output['model_response'])
            desired_autism_recall = 0.8
            
            if scenario == 'engineering_official':   ### Override to use our official results instead
                model_response_2d = [[official_response, 1-official_response] for official_response in clinical_data_dfs[algorithm]['Guardian Qnnaire Score'].values]
            find_optimal_dunno = True if scenario == 'engineering_official' else False
            model_metrics = get_optimized_metrics_from_model_responses(model_response_2d, class_names, class_priors, truth_vals=labels, desired_autism_recall=desired_autism_recall, desc=this_desc, find_optimal_dunno=find_optimal_dunno)
            old_clinical_optimized_metric_dict = append_metric_results_to_dict_of_lists(old_clinical_optimized_metric_dict, model_metrics, scenario)


        except Exception as this_message:
            print 'Problem running algorithm ', algorithm, ', scenario: ', scenario
            print 'Error message: ', this_message
            for metric_name, metric_details in metrics_to_do.iteritems():
                old_clinical_optimized_metric_dict[metric_name][algorithm].append(np.nan)
clinical_data_dfs['questionnaire'] = pd.concat([clinical_data_dfs['guardian.qnnaire.3-'], clinical_data_dfs['guardian.qnnaire.4+']], ignore_index=True)
clinical_data_dfs['video'] = pd.concat([clinical_data_dfs['video.module.1', 'video.module.2'], ignore_index=True)


##### Turn this back off when done testing above

old_clinical_metric_dfs = collections.OrderedDict()
for metric_name, metric_details in metrics_to_do.iteritems():
    print old_clinical_optimized_metric_dict
    old_clinical_metric_dfs[metric_name] = pd.DataFrame(old_clinical_optimized_metric_dict[metric_name])
    old_clinical_metric_dfs[metric_name].index.names = [metric_details['desc']]
    old_clinical_metric_dfs[metric_name]['questionnaire'] = ((2.*old_clinical_metric_dfs[metric_name]['guardian.qnnaire.3-']) +\
                                                        (3.*old_clinical_metric_dfs[metric_name]['guardian.qnnaire.4+'])) / 5.
    


#clinical_avg_recall_df['scenario'] = clinical_avg_recall_df.index
print 'old style clinical_metric_dfs: ', old_clinical_metric_dfs

if purpose_of_run == 'for_tables':
    make_clinical_latex_tables(old_clinical_metric_dfs, desc='old')




### Determine training data recalls, merge with clinical results, and write output

Functions to explore classification thresholds

In [None]:
def get_optimized_metrics_dict_from_thresholds_df(thresholds_df, desired_autism_recall, min_coverage=0.7, desc=''):
    ### First restric to autism recall within 2% of desired value
    thresholds_df['autism_recall'] = thresholds_df['sensitivity']
    thresholds_df['not_recall'] = thresholds_df['specificity']
    thresholds_df['average_recall'] = 0.5*(thresholds_df['autism_recall']+thresholds_df['not_recall'])
    thresholds_df['average_precision_dataset'] = 0.5*(thresholds_df['autism_precision']+thresholds_df['not_precision'])
    print 'desc: ', desc, ', autism recalls: ', thresholds_df['autism_recall'].values
    restricted_df = thresholds_df[np.abs(thresholds_df['autism_recall'].values-desired_autism_recall)<0.01]
    print 'desc: ', desc, ', after requiring good match: ', restricted_df['autism_recall'].values

    if len(restricted_df.index)<1:
        restricted_df = thresholds_df[np.abs(thresholds_df['autism_recall'].values-desired_autism_recall)<0.02]
    if len(restricted_df.index)<1 and desired_specificity_bin_width>0.02:
        restricted_df = thresholds_df[np.abs(thresholds_df['autism_recall'].values-desired_autism_recall)<0.04]
    assert len(restricted_df.index)>=1


#     if len(restricted_df.index)<1:
#         restricted_df = thresholds_df[np.abs(thresholds_df['autism_recall'].values-desired_autism_recall)<0.08]
    
    ### If we are handling a "dunno" scenario then apply coverage restriction and return best average recall
    if 'coverage' in restricted_df.columns:
        restricted_df = restricted_df[restricted_df['coverage']>min_coverage]
        print 'After coverage req: ', restricted_df['autism_recall'].values
#         optimization_key = 'average_recall'
#     else:
#         optimization_key = 'AUC'
    optimization_key='not_recall'
    best_dict = restricted_df.sort_values(optimization_key, ascending=False).reset_index(drop=True).ix[0].to_dict()
#     best_results_dict['autism_recall'] = best_results_dict['sensitivity']
#     best_results_dict['not_recall'] = best_results_dict['specificity']
#     best_results_dict['average_recall'] = 0.5*(best_results_dict['autism_recall']+best_results_dict['not_recall'])
#     best_results_dict['average_precision'] = 0.5*(best_results_dict['autism_precision']+best_results_dict['not_precision'])
    print 'for ', desc, ', best results are: ', best_dict
    return best_dict


def explore_classification_thresholds(target_column, response_column, positive_class_name, negative_class_name,
                                      desc=''):
    
    #useless addons that get_classifier_performance_metrics expects but don't apply in this particular case
    y_predicted_probs = [ [x, 1.0-x] for x in response_column]
    outcome_class_priors = [0.5, 0.5] 

    thresholds_to_try = np.arange(np.min(response_column), np.max(response_column), (np.max(response_column) - np.min(response_column))/1000.0)
    
    
    results = []
    for threshold in thresholds_to_try:

        y_predicted_without_dunno = response_column.apply(lambda x: positive_class_name if x>threshold else negative_class_name)
        y_predicted_with_dunno = y_predicted_without_dunno

        metrics = get_classifier_performance_metrics([positive_class_name, negative_class_name], outcome_class_priors, target_column, y_predicted_without_dunno, y_predicted_with_dunno, y_predicted_probs)

        sensitivity = metrics['without_dunno']['dataset_recall_per_class'][positive_class_name]
        specificity = metrics['without_dunno']['dataset_recall_per_class'][negative_class_name]
        autism_precision = metrics['without_dunno']['dataset_precision_per_class'][positive_class_name]
        not_precision = metrics['without_dunno']['dataset_precision_per_class'][negative_class_name]
        AUC = cp.deepcopy(metrics['without_dunno']['auc'])
        if desc != '' and threshold == thresholds_to_try[0]:
            print 'Exploring classification thresholds. For ', desc, ', AUC: ', AUC, '.'

        if sensitivity>0.75 and sensitivity<0.85 and desc != '':
            print 'Old for ', desc, ', threshold: ', threshold, ', autism recall: ', sensitivity, ', not recall: ', specificity
        results+= [[threshold, sensitivity, specificity, autism_precision, not_precision, AUC]]

    output = pd.DataFrame(results, columns=['threshold', 'sensitivity', 'specificity', 'autism_precision', 'not_precision', 'AUC'])
    return output

def explore_dunno_ranges(target_column, response_column, positive_class_name, negative_class_name, desc=''):
    
    #useless addons that get_classifier_performance_metrics expects but don't apply in this particular case
    y_predicted_probs = [ [x, 1.0-x] for x in response_column]
    outcome_class_priors = [0.5, 0.5] 

    dunno_ranges_to_try = []
    for lower_value in np.arange(0.00, 1.01, 0.01):
        for upper_value in np.arange(lower_value, 1.01, 0.01):
            dunno_ranges_to_try += [(lower_value, upper_value)]

    results = []
    for dunno_range in dunno_ranges_to_try:

        y_predicted_with_dunno = response_column.apply(lambda x: positive_class_name if x>dunno_range[1] else (negative_class_name if x<dunno_range[0] else 'dunno' ))
        y_predicted_without_dunno = y_predicted_with_dunno
        
        conclusive_mask = np.array([False if ele=='dunno' else True for ele in y_predicted_with_dunno.values])
        conclusive_probs = response_column.values[conclusive_mask]
        conclusive_targets = target_column.values[conclusive_mask]
        
        
        metrics = get_classifier_performance_metrics([positive_class_name, negative_class_name], outcome_class_priors, target_column, y_predicted_without_dunno, y_predicted_with_dunno, y_predicted_probs)

#         print 'dunno_range: ', dunno_range
#         #print 'metrics: ', metrics
#         print 'positive_class_name: ', positive_class_name
#         print 'negative_class_name: ', negative_class_name

#         if 'with_dunno' not in metrics.keys():
#             continue
#         print 'metrics with dunno: ', metrics['with_dunno']
#         print 'classification rate: ', metrics['with_dunno']['dataset_classification_rate']
#         print 'recalls: ', metrics['with_dunno']['dataset_recall_per_class_where_classified']
        try:
            coverage = metrics['with_dunno']['dataset_classification_rate']
            sensitivity = metrics['with_dunno']['dataset_recall_per_class_where_classified'][positive_class_name]
            specificity = metrics['with_dunno']['dataset_recall_per_class_where_classified'][negative_class_name]
            autism_precision = metrics['with_dunno']['dataset_precision_per_class'][positive_class_name]
            not_precision = metrics['with_dunno']['dataset_precision_per_class'][negative_class_name]
            AUC = metrics['with_dunno']['auc']
        except:
            coverate = 0
            sensitivity = 0
            specificity = 0
            autism_precision = 0
            not_precision = 0
            AUC = 0

        results+= [[dunno_range[0], dunno_range[1], coverage, sensitivity, specificity, autism_precision, not_precision, AUC]]

    
        
        
    output = pd.DataFrame(results, columns=['dunno_from', 'dunno_to', 'coverage', 'sensitivity', 'specificity', 'autism_precision', 'not_precision', 'AUC'])
    #### peek at some of the best results:
    best_grid_search_output = output[(output['coverage']>0.75) & (output['sensitivity']>0.7) & (output['sensitivity']<0.9)].sort_values('AUC', ascending=False)
    print 'For explore_dunno_ranges, ', desc, ', best grid search results = '
    pd.set_option('display.max_colwidth', -1)
    print best_grid_search_output.head(60)
    pd.reset_option('display.max_colwidth')
    return output

def explore_composite_classification_thresholds(dataframe, target_column_name, response_column_name,
        classifier_variant_column_name, positive_class_name, negative_class_name, specificity_bin_width = 0.025,
        desc=''):
    print 'start explore compositve classification thresholds'
    determination_column_name = 'determination'
    
    AUC = metrics.roc_auc_score([ele == 'autism' for ele in dataframe[target_column_name].values], dataframe[response_column_name].values)

    variants = []
    groups = {}
    thresholds = {}
    results = []
    for variant, group in dataframe.groupby(classifier_variant_column_name):
        variants += [variant]
        groups[variant] = group
    
    for variant in variants:
        group = groups[variant]
        thresholds[variant] = np.arange(np.min(group[response_column_name]), np.max(group[response_column_name]), (np.max(group[response_column_name]) - np.min(group[response_column_name]))/100.0)
    threshold_combinations = get_combinations([thresholds[variant] for variant in variants])
    
    n_threshold_combinations = len(threshold_combinations)
    print_level = n_threshold_combinations / 100
    for tidx, threshold_combination in enumerate(threshold_combinations):
        if tidx % print_level == 0:
            print 'start threshold combination ', tidx, ' of ', n_threshold_combinations
        
        total_true_positives = 0
        total_true_negatives = 0
        total_false_positives = 0
        total_false_negatives = 0
        for i in range(0, len(variants)):
            variant = variants[i]
            threshold = threshold_combination[i]
            group = groups[variant]
            group[determination_column_name] = group.apply(lambda row: positive_class_name if row[response_column_name]>threshold else negative_class_name, axis=1)
            
            true_positives = len(group[(group[determination_column_name]==positive_class_name) & (group[target_column_name]==positive_class_name)])
            true_negatives = len(group[(group[determination_column_name]==negative_class_name) & (group[target_column_name]==negative_class_name)])
            false_positives = len(group[(group[determination_column_name]==positive_class_name) & (group[target_column_name]!=positive_class_name)])
            false_negatives = len(group[(group[determination_column_name]==negative_class_name) & (group[target_column_name]!=negative_class_name)])
            
            total_true_positives += true_positives
            total_true_negatives += true_negatives
            total_false_positives += false_positives
            total_false_negatives += false_negatives
            
        sensitivity = float(total_true_positives) / float(total_true_positives+total_false_negatives)
        specificity = float(total_true_negatives) / float(total_true_negatives+total_false_positives)
        autism_precision = float(total_true_positives) / float(total_true_positives+total_false_positives)
        not_precision = float(total_true_negatives) / float(total_true_negatives+total_false_negatives)

        results +=  [[dict(zip(variants, threshold_combination)), sensitivity, specificity, autism_precision, not_precision, AUC]]
        
    grid_search_output = pd.DataFrame(results, columns=['threshold_combination', 'sensitivity', 'specificity', 'autism_precision', 'not_precision', 'AUC'])
    grid_search_output

    grid_search_output['rounded_specificity'] = grid_search_output['specificity'].apply(lambda x: 0 if np.isnan(x) else specificity_bin_width*(int(x/specificity_bin_width)) )
#     grid_search_output['rounded_autism_precision'] = grid_search_output['autism_precision'].apply(lambda x: 0 if np.isnan(x) else specificity_bin_width*(int(x/specificity_bin_width)))
#     grid_search_output['rounded_not_precision'] = grid_search_output['not_precision'].apply(lambda x: 0 if np.isnan(x) else specificity_bin_width*(int(x/specificity_bin_width)))

    sensitivity = grid_search_output.groupby('rounded_specificity')['sensitivity'].max()
    specificity = grid_search_output.groupby('rounded_specificity')['rounded_specificity'].max()
    print 'sensitivity: ', sensitivity
    print 'specificity: ', specificity
    print 'grid_search_output of rounded specificity: ', grid_search_output['rounded_specificity']
    autism_precision = [grid_search_output[(grid_search_output['rounded_specificity']==this_specificity) & (grid_search_output['sensitivity']==this_sensitivity)]['autism_precision'].values[0] for this_specificity, this_sensitivity in zip(specificity.values, sensitivity.values)]
    print 'autism_precision: ', autism_precision
    not_precision = [grid_search_output[(grid_search_output['rounded_specificity']==this_specificity) & (grid_search_output['sensitivity']==this_sensitivity)]['not_precision'].values[0] for this_specificity, this_sensitivity in zip(specificity.values, sensitivity.values)]
    AUC_arr = [grid_search_output[(grid_search_output['rounded_specificity']==this_specificity) & (grid_search_output['sensitivity']==this_sensitivity)]['AUC'].values[0] for this_specificity, this_sensitivity in zip(specificity.values, sensitivity.values)]
    print 'not_precision: ', not_precision
#     autism_precision = grid_search_output.groupby('rounded_autism_precision')['rounded_autism_precision'].max()
#     autism_precision = grid_search_output.groupby('rounded_autism_precision')['rounded_autism_precision'].max()


    output = pd.DataFrame(zip(sensitivity, specificity, autism_precision, not_precision, AUC_arr), columns=['sensitivity', 'specificity', 'autism_precision', 'not_precision', 'AUC'])
    return output


def explore_composite_classification_dunno_ranges(dataframe, target_column_name, response_column_name,
        classifier_variant_column_name, positive_class_name, negative_class_name, specificity_bin_width = 0.025,
        coverage_bin_width=0.025, desc=''):
    def is_conclusive(response, dunno_range):
        if dunno_range[0] <= response <= dunno_range[1]: 
            return False
        return True

    
    print 'Begin exploring composite classification dunno ranges'
    determination_column_name = 'determination'

    AUC = metrics.roc_auc_score([ele == 'autism' for ele in dataframe[target_column_name].values], dataframe[response_column_name].values)

    variants = []
    groups = {}
    dunno_ranges = {}
    results = []
    for variant, group in dataframe.groupby(classifier_variant_column_name):
        variants += [variant]
        groups[variant] = group
    #print 'classifier_variant_column_name: ', classifier_variant_column_name
    #print 'values: ', dataframe[classifier_variant_column_name]
    #print 'variants: ', variants
    
    for variant in variants:
        group = groups[variant]
        dunno_ranges[variant] = []
        for lower_value in np.arange(0.00, 1.1, 0.1):
            for upper_value in np.arange(lower_value, 1.1, 0.1):
                dunno_ranges[variant] += [(lower_value, upper_value)]
    dunno_combinations = get_combinations([dunno_ranges[variant] for variant in variants])


    n_threshold_combinations = len(dunno_combinations)
    print 'n_threshold combinations: ', n_threshold_combinations
    print_level = n_threshold_combinations / 500
    for didx, dunno_combination in enumerate(dunno_combinations):
                
        do_debug = False
#         if didx % print_level == 0:
#             print 'start dunno combination ', didx, ' of ', n_threshold_combinations
#             do_debug = True
        total_inconclusives = 0
        total_true_positives = 0
        total_true_negatives = 0
        total_false_positives = 0
        total_false_negatives = 0

        #print 'Get conclusive mask for dunno_combination: ', dunno_combination
        #print 'responses: ', dataframe[response_column_name].values
        #print 'Variants: ', dataframe[classifier_variant_column_name].values
        variant_indices = [variants.index(classifier_variant) for classifier_variant in dataframe[classifier_variant_column_name].values]
        #print 'variant_indices: ', variant_indices
        conclusive_mask = np.array([True if is_conclusive(response, dunno_combination[variant_index]) else False for variant_index, response in zip(variant_indices, dataframe[response_column_name].values)])
        #print 'Conclusive mask: ', conclusive_mask
        target_where_classified = dataframe[target_column_name].values[conclusive_mask]
        #print 'target_where_classified: ', target_where_classified
        response_where_classified = dataframe[response_column_name].values[conclusive_mask]
        #print 'response_where_classified: ', response_where_classified
        try:
            dunno_AUC = metrics.roc_auc_score([ele == 'autism' for ele in target_where_classified], response_where_classified)
        except:
            dunno_AUC = 0
        #print 'dunno_AUC: ', dunno_AUC

        
        for i in range(0, len(variants)):
            variant = variants[i]
            dunno_range = dunno_combination[i]
            group = groups[variant]
            group[determination_column_name] = group.apply(lambda row: positive_class_name if row[response_column_name]>dunno_range[1] else negative_class_name if row[response_column_name]<dunno_range[0] else 'dunno', axis=1)
            
            inconclusives = len(group[(group[determination_column_name]=='dunno')])
            true_positives = len(group[(group[determination_column_name]==positive_class_name) & (group[target_column_name]==positive_class_name)])
            true_negatives = len(group[(group[determination_column_name]==negative_class_name) & (group[target_column_name]==negative_class_name)])
            false_positives = len(group[(group[determination_column_name]==positive_class_name) & (group[target_column_name]!=positive_class_name)])
            false_negatives = len(group[(group[determination_column_name]==negative_class_name) & (group[target_column_name]!=negative_class_name)])
            
            total_inconclusives += inconclusives
            total_true_positives += true_positives
            total_true_negatives += true_negatives
            total_false_positives += false_positives
            total_false_negatives += false_negatives
            
            if do_debug:
                print 'dunno_range: ', dunno_range
                print 'variant: ', variants[i]
                print 'group: ', list(group[determination_column_name].values)
                print 'total_inconclusives: ', total_inconclusives
                print 'total_true_positives: ', total_true_positives
                print 'total_true_negatives: ', total_true_negatives
                print 'total_false_positives: ', total_false_positives
                print 'total_false_negatives: ', total_false_negatives


        
        inconclusive_rate = float(inconclusives) / float(inconclusives+total_true_positives+total_false_negatives+total_true_negatives+total_false_positives)
        coverage = 1.0 - inconclusive_rate
        sensitivity = 0 if float(total_true_positives+total_false_negatives)==0 else float(total_true_positives) / float(total_true_positives+total_false_negatives)
        specificity = 0 if float(total_true_negatives+total_false_positives)==0 else float(total_true_negatives) / float(total_true_negatives+total_false_positives)
        autism_precision = 0 if float(total_true_positives+total_false_positives)==0 else float(total_true_positives) / float(total_true_positives+total_false_positives)
        not_precision = 0 if float(total_true_negatives+total_false_negatives)==0 else float(total_true_negatives) / float(total_true_negatives+total_false_negatives)

        if do_debug:
            print 'dunno_range: ', dunno_range
            print 'coverage: ', coverage
            print 'sensitivity: ', sensitivity
            print 'specificity: ', specificity
        results +=  [[dict(zip(variants, dunno_combination)), coverage, sensitivity, specificity, autism_precision, not_precision, dunno_AUC]]
        
    grid_search_output = pd.DataFrame(results, columns=['threshold_combination', 'coverage', 'sensitivity', 'specificity', 'autism_precision', 'not_precision', 'AUC'])
    pd.set_option('display.max_rows', 500)
    
    #### peek at some of the best results:
    best_grid_search_output = grid_search_output[(grid_search_output['coverage']>0.75) & (grid_search_output['sensitivity']>0.7) & (grid_search_output['sensitivity']<0.9)].sort_values('AUC', ascending=False)
    print 'For explore_composite_classification_dunno_ranges, ', desc, ', best grid search results = '
    pd.set_option('display.max_colwidth', -1)
    print best_grid_search_output.head(60)
    pd.reset_option('display.max_colwidth')
    
    grid_search_output['rounded_specificity'] = grid_search_output['specificity'].apply(lambda x: 0 if np.isnan(x) else specificity_bin_width*(int(x/specificity_bin_width)) )
    #print 'grid search coverage: ', list(grid_search_output['coverage'].values)
    grid_search_output['rounded_coverage'] = grid_search_output['coverage'].apply(lambda x: 0 if np.isnan(x) else coverage_bin_width*(int(x/coverage_bin_width)) )
    #print 'grid_search_output: ', grid_search_output
    #print 'rounded coverage pre grouping: ', list(grid_search_output['rounded_coverage'].values)
    
    sensitivity = grid_search_output.groupby([ 'rounded_coverage','rounded_specificity'])['sensitivity'].max()
    coverage = grid_search_output.groupby(['rounded_coverage', 'rounded_specificity' ])['rounded_coverage'].max()
    specificity = grid_search_output.groupby(['rounded_coverage', 'rounded_specificity'])['rounded_specificity'].max()
#     autism_precision = grid_search_output[grid_search_output['rounded_specificity']==specificity]['autism_precision'].values[0]
    autism_precision = [grid_search_output[(grid_search_output['rounded_specificity']==this_specificity) & (grid_search_output['sensitivity']==this_sensitivity)]['autism_precision'].values[0] for this_specificity, this_sensitivity in zip(specificity.values, sensitivity.values)]
#     not_precision = grid_search_output[grid_search_output['rounded_specificity']==specificity]['not_precision'].values[0]
    not_precision = [grid_search_output[(grid_search_output['rounded_specificity']==this_specificity) & (grid_search_output['sensitivity']==this_sensitivity)]['not_precision'].values[0] for this_specificity, this_sensitivity in zip(specificity.values, sensitivity.values)]
    AUC_arr = [grid_search_output[(grid_search_output['rounded_specificity']==this_specificity) & (grid_search_output['sensitivity']==this_sensitivity)]['AUC'].values[0] for this_specificity, this_sensitivity in zip(specificity.values, sensitivity.values)]

    #print 'coverage groups: '
    #for group, data in grid_search_output.groupby(['rounded_coverage', 'rounded_specificity' ])['rounded_coverage']:
    #    print 'group: ', group
    #    print 'data: ', data
    #print 'rounded_coverage: ', coverage
    #print 'rounded sensitivity: ', sensitivity
    #print 'rounded specificity: ', specificity
    output = pd.DataFrame(zip(coverage, sensitivity, specificity, autism_precision, not_precision, AUC_arr), columns=['coverage', 'sensitivity', 'specificity', 'autism_precision', 'not_precision', 'AUC'])
    
    return output

# def explore_composite_classification_dunno_ranges(dataframe, target_column_name, response_column_name, classifier_variant_column_name, positive_class_name, negative_class_name, specificity_bin_width = 0.025, coverage_bin_width=0.025):
    
#     determination_column_name = 'determination'


#     variants = []
#     groups = {}
#     dunno_ranges = {}
#     results = []
#     for variant, group in dataframe.groupby(classifier_variant_column_name):
#         variants += [variant]
#         groups[variant] = group
    
#     for variant in variants:
#         group = groups[variant]
#         dunno_ranges[variant] = []
#         for lower_value in np.arange(0.00, 1.1, 0.1):
#             for upper_value in np.arange(lower_value, 1.1, 0.1):
#                 dunno_ranges[variant] += [(lower_value, upper_value)]
#     dunno_combinations = get_combinations([dunno_ranges[variant] for variant in variants])


#     print_level = len(dunno_combinations) / 100


#     for didx, dunno_combination in enumerate(dunno_combinations):
#         do_debug = False
#         if didx %  print_level == 0:
#             print 'start dunno combination ', didx, ' of ', len(dunno_combinations)
#             do_debug = True

#         total_inconclusives = 0
#         total_true_positives = 0
#         total_true_negatives = 0
#         total_false_positives = 0
#         total_false_negatives = 0
#         for i in range(0, len(variants)):
#             variant = variants[i]
#             dunno_range = dunno_combination[i]
#             group = groups[variant]
#             group[determination_column_name] = group.apply(lambda row: positive_class_name if row[response_column_name]>dunno_range[1] else negative_class_name if row[response_column_name]<dunno_range[0] else 'dunno', axis=1)
            
#             inconclusives = len(group[(group[determination_column_name]=='dunno')])
#             true_positives = len(group[(group[determination_column_name]==positive_class_name) & (group[target_column_name]==positive_class_name)])
#             true_negatives = len(group[(group[determination_column_name]==negative_class_name) & (group[target_column_name]==negative_class_name)])
#             false_positives = len(group[(group[determination_column_name]==positive_class_name) & (group[target_column_name]!=positive_class_name)])
#             false_negatives = len(group[(group[determination_column_name]==negative_class_name) & (group[target_column_name]!=negative_class_name)])
            
#             total_inconclusives += inconclusives
#             total_true_positives += true_positives
#             total_true_negatives += true_negatives
#             total_false_positives += false_positives
#             total_false_negatives += false_negatives
            
#             if do_debug:
#                 print 'dunno_range: ', dunno_range
#                 print 'variant: ', variants[i]
#                 print 'group: ', list(group[determination_column_name].values)
#                 print 'total_inconclusives: ', total_inconclusives
#                 print 'total_true_positives: ', total_true_positives
#                 print 'total_true_negatives: ', total_true_negatives
#                 print 'total_false_positives: ', total_false_positives
#                 print 'total_false_negatives: ', total_false_negatives

        
#         inconclusive_rate = float(inconclusives) / float(inconclusives+total_true_positives+total_false_negatives+total_true_negatives+total_false_positives)
#         coverage = 1.0 - inconclusive_rate
#         sensitivity = 0 if float(total_true_positives+total_false_negatives)==0 else float(total_true_positives) / float(total_true_positives+total_false_negatives)
#         specificity = 0 if float(total_true_negatives+total_false_positives)==0 else float(total_true_negatives) / float(total_true_negatives+total_false_positives)
        
#         if do_debug:
#             print 'dunno_range: ', dunno_range
#             print 'coverage: ', coverage
#             print 'sensitivity: ', sensitivity
#             print 'specificity: ', specificity

#         results +=  [[dict(zip(variants, dunno_combination)), coverage, sensitivity, specificity]]
        
#     grid_search_output = pd.DataFrame(results, columns=['threshold_combination', 'coverage', 'sensitivity', 'specificity',])
#     grid_search_output
    
#     grid_search_output['rounded_specificity'] = grid_search_output['specificity'].apply(lambda x: 0 if np.isnan(x) else specificity_bin_width*(int(x/specificity_bin_width)) )
#     grid_search_output['rounded_coverage'] = grid_search_output['coverage'].apply(lambda x: 0 if np.isnan(x) else coverage_bin_width*(int(x/coverage_bin_width)) )

#     sensitivity = grid_search_output.groupby([ 'rounded_coverage','rounded_specificity'])['sensitivity'].max()
#     coverage = grid_search_output.groupby(['rounded_coverage', 'rounded_specificity' ])['rounded_coverage'].max()
#     specificity = grid_search_output.groupby(['rounded_coverage', 'rounded_specificity'])['rounded_specificity'].max()

#     output = pd.DataFrame(zip(coverage, sensitivity, specificity), columns=['coverage', 'sensitivity', 'specificity'])
#     return output


### Make questionnaire ROC curves

In [None]:

### Here we will fill in results for the clinical questionnaire performance tables in the paper
clinical_optimized_metric_dict = collections.OrderedDict()
for metric_name, metric_details in metrics_to_do.iteritems():
    clinical_optimized_metric_dict[metric_name] = collections.OrderedDict(
        (algorithm, []) for algorithm in algorithms.keys()+['questionnaire']
    )

for metric_name, metric_details in metrics_to_do.iteritems():
    clinical_optimized_metric_dict[metric_name]['scenario'] = clinical_scenarios + ['engineering_official_dunno']
    clinical_optimized_metric_dict[metric_name]['latex_scenario'] = [value['latex_desc'] for value in clinical_scenarios_dict.values()]+['with 75\\% coverage']



pd.set_option('display.max_rows', 300)
print 'relevant columns:'
print clinical_data_dfs['questionnaire'][['Patient Id', 'classifier_variant', 'Guardian Qnnaire Score', 'outcome']].sort_values(['Patient Id']).reset_index()
sys.stdout.flush()


print 'mchat_df: ', mchat_df['Patient Id']
print 'cdcl_df: ', cbcl_df['Patient Id']
print 'clinical_data_dfs keys: ', clinical_data_dfs.keys()



print 'before filter on ids, len(mchat df): ', len(mchat_df.index), ', cbcl: ', len(cbcl_df.index)
clinical_ids = clinical_data_dfs['questionnaire']['Patient Id'].values
mchat_df = mchat_df[mchat_df['Patient Id'].isin(clinical_ids)]
cbcl_df = cbcl_df[cbcl_df['Patient Id'].isin(clinical_ids)]
print 'after filter on ids, len(mchat df): ', len(mchat_df.index), ', cbcl: ', len(cbcl_df.index)


for algorithm in algorithms.keys():
    if algorithm in ['video.module.1', 'video.module.2', 'video']: 
        continue
    data_to_plot = []
    ages_desc = 'Age $< 4$ yr, ' if ages_dict[algorithm] == 'young' else 'Age $\\geq 4$ yr'


    for scenario in clinical_scenarios+['model']:
        this_desc = algorithm + '_' + scenario
        this_N = len(clinical_data_dfs[algorithm].index)
        if scenario == 'engineering_official':   ### We'll show official selection results in the ROC curves instead
            continue
        
        if scenario == 'model':
            scenario_desc = 'Questionnaire'
            response_str = 'Guardian Qnnaire Score'
        else:
            scenario_desc = clinical_scenarios_dict[scenario]['latex_desc']
            #this_desc  = algorithm + '_' + clinical_scenarios_dict[scenario]['latex_desc']
            #print 'make plots for ', this_desc
            response_str = 'model_response_'+scenario
        
        if scenario == 'model':
            color = 'orange'
            linestyle = 'dashdot'
            linewidth = 4
        else:
            color = clinical_scenarios_dict[scenario]['color']
            linestyle = clinical_scenarios_dict[scenario]['linestyle']
            linewidth = clinical_scenarios_dict[scenario]['linewidth']
        classifier_info = {'label': ages_desc+', '+scenario_desc, 'color': color,
                           'linestyle': linestyle, 'linewidth': linewidth}
        classifier_data = explore_classification_thresholds(clinical_data_dfs[algorithm]['outcome'],
                            clinical_data_dfs[algorithm][response_str], 'autism', 'not', desc=algorithm+'_'+scenario)
        print 'For desc: ', this_desc, ', classifier_data: ', classifier_data
        print 'For desc: ', this_desc, ', AUC details: ', classifier_data['AUC'].values
        best_results_dict = get_optimized_metrics_dict_from_thresholds_df(classifier_data, desired_autism_recall=0.8,
                                                                    min_coverage=0.75, desc=algorithm+'_'+scenario)
        for metric_name, metric_details in metrics_to_do.iteritems():
            metric_value = best_results_dict[metric_name]
            clinical_optimized_metric_dict[metric_name][algorithm].append(metric_value)
            print 'For ', this_desc, ', metric: ', metric_name, ', append ', metric_value
        print 'For ', this_desc, ', clinical_optimized_metric_dict is now ', clinical_optimized_metric_dict
            
   
        data_to_plot.append((classifier_info, classifier_data))



        
        if scenario == 'model':
            dunno_classifier_info = {'label': ages_desc+', '+scenario_desc, 'color': 'black',
                    'linewidth': linewidth, 'linestyle': 'solid', 'coverage': 0.75}
            dunno_classifier_plotting_data = explore_dunno_ranges(clinical_data_dfs[algorithm]['outcome'],
                    clinical_data_dfs[algorithm][response_str], 'autism', 'not', desc=algorithm+'_'+scenario)
            data_to_plot.append((dunno_classifier_info, dunno_classifier_plotting_data))
            best_results_dict = get_optimized_metrics_dict_from_thresholds_df(dunno_classifier_plotting_data, desired_autism_recall=0.8,
                                        min_coverage=0.75, desc=algorithm+'_'+scenario)
            print 'For model scenario, algorithm: ', algorithm, ', best_results_dict: ', best_results_dict
            for metric_name, metric_details in metrics_to_do.iteritems():
                metric_value = best_results_dict.get(metric_name, np.nan)
                print 'For modle scenario, dunno case, algorithm: ', algorithm, ', metric: ', metric_name, ', metric_value: ', metric_value
                clinical_optimized_metric_dict[metric_name][algorithm].append(metric_value)

    title = ''
    plt, fig = plot_classifier_profiles( data_to_plot , title, ylim=(0.4,1.), legend_font_size=14)
    if purpose_of_run == 'for_plotting':
        filename = 'images/'+algorithm+'_ROC_curves.png'
    else:
        filename = 'images/'+algorithm+'_ROC_curves_fine_binned.png'
    fig.savefig(filename, bbox_inches='tight', pad_inches=0)

    
    
print 'Now do combined plots. Pre-combination, optimized metrics dict looks like this: '
print clinical_optimized_metric_dict
classifier_variant_column_name = 'classifier_variant'


mchat_df_to_plot = mchat_df[mchat_df['Mchat Final Score']>=0]
### Merge to get the needed outcome results
mchat_df_to_plot = mchat_df_to_plot.merge(clinical_data_dfs['questionnaire'], on='Patient Id', how='left', suffixes=('','_dummy'))
mchat_info = {'label': "MCHAT", 'color': 'darkgrey', 'linestyle': 'dashed', 'linewidth': 3}
mchat_plotting_data = explore_classification_thresholds(mchat_df_to_plot['outcome'], mchat_df_to_plot['Mchat Final Score'], 'autism', 'not')
cbcl_df_to_plot = cbcl_df[cbcl_df['CBCL Score']>=0]
cbcl_df_to_plot = cbcl_df_to_plot.merge(clinical_data_dfs['questionnaire'], on='Patient Id', how='left', suffixes=('','_dummy'))
cbcl_info = {'label': "CBCL", 'color': 'lightgrey', 'linestyle': 'dotted', 'linewidth': 3}
cbcl_plotting_data = explore_classification_thresholds(cbcl_df_to_plot['outcome'], cbcl_df_to_plot['CBCL Score'], 'autism', 'not')

data_to_plot = [(mchat_info, mchat_plotting_data), (cbcl_info, cbcl_plotting_data)]


all_scenario_plotting_dfs = collections.OrderedDict()
for scenario in clinical_scenarios+['model']:
    if scenario == 'engineering_official':   ### We'll show official selection results in the ROC curves instead
        continue
        
    if scenario == 'model':
        scenario_desc = 'Aggregate features variant'
        response_str = 'Guardian Qnnaire Score'
    else:
        scenario_desc = clinical_scenarios_dict[scenario]['latex_desc']    
        response_str = 'model_response_'+scenario
    
    if scenario == 'model':
        color = 'orange'
        linestyle = 'dashdot'
        linewidth = 4
    else:
        color = clinical_scenarios_dict[scenario]['color']
        linestyle = clinical_scenarios_dict[scenario]['linestyle']
        linewidth = clinical_scenarios_dict[scenario]['linewidth']
    
    this_N = len(clinical_data_dfs['questionnaire'].index)
    classifier_info = {'label': scenario_desc, 'color': color,
                      'linestyle': linestyle, 'linewidth': linewidth}

    classifier_data = explore_composite_classification_thresholds(clinical_data_dfs['questionnaire'], 'outcome',
                response_str, classifier_variant_column_name, 'autism', 'not', specificity_bin_width=desired_specificity_bin_width)
    if scenario=='model':
        print 'For model, composite classifier info: ', classifier_info
        pd.set_option('display.max_rows', 50000) 
        pd.set_option('display.max_columns', 50000)                
        print 'For model, composite classifier data: ', classifier_data 
        print 'Arose from df ', clinical_data_dfs['questionnaire']
        pd.set_option('display.max_rows', 500)
        pd.set_option('display.max_columns', 50)                


    best_results_dict = get_optimized_metrics_dict_from_thresholds_df(classifier_data, desired_autism_recall=0.8,
                                                                    min_coverage=0.75, desc=algorithm+'_'+scenario)
    print 'combined optimized thresholds dict results: ', best_results_dict
    for metric_name, metric_details in metrics_to_do.iteritems():
        metric_value = best_results_dict.get(metric_name, np.nan)
        clinical_optimized_metric_dict[metric_name]['questionnaire'].append(metric_value)
    print 'for ', (algorithm+'_'+scenario), ', plot classifier_data: ', classifier_data
    data_to_plot.append((classifier_info, classifier_data))
    

    if scenario == 'model':
        
        dunno_classifier_info = {'label': 'Inconclusive results variant', 'color': 'black',
                'linestyle': 'solid', 'linewidth': 5}
        dunno_classifier_plotting_data = explore_composite_classification_dunno_ranges(clinical_data_dfs['questionnaire'],
                'outcome', 'Guardian Qnnaire Score', classifier_variant_column_name, 'autism', 'not', 
                specificity_bin_width=desired_specificity_bin_width, desc='questionnaire model')
        pd.set_option('display.max_rows', 500)
        print 'dunno_classifier_plotting_data: ', dunno_classifier_plotting_data
        data_to_plot.append((dunno_classifier_info, dunno_classifier_plotting_data))
        best_results_dict = get_optimized_metrics_dict_from_thresholds_df(dunno_classifier_plotting_data, desired_autism_recall=0.8,
                                        min_coverage=0.75, desc=algorithm+'_'+scenario)
        print 'For model scenario, best_results_dict: ', best_results_dict
        for metric_name, metric_details in metrics_to_do.iteritems():
            metric_value = best_results_dict.get(metric_name, np.nan)
            print 'for model scenario, append metric for questionairre, metric: ', metric_name, ', value: ', metric_value
            clinical_optimized_metric_dict[metric_name]['questionnaire'].append(metric_value)
        print 'After adding in model scenario, optimized metric dict is ', clinical_optimized_metric_dict


    #def explore_composite_classification_dunno_ranges(dataframe, target_column_name, response_column_name, classifier_variant_column_name, positive_class_name, negative_class_name, specificity_bin_width = 0.025, coverage_bin_width=0.025):

        print 'plot classifier profiles for scenario: ', scenario
        print 'len of data to plot: ', len(data_to_plot)
        print 'data_to_plot: ', data_to_plot
        #title = "Sensitivity-Specificity Tradeoff for questionnaire"
        title = ''
        plt, fig = plot_classifier_profiles( data_to_plot , title, ylim=(0.4,1.))
        if purpose_of_run == 'for_plotting':
            png_filename = 'images/'+scenario+'_questionnaire_ROC.png'
        else:
            png_filename = 'images/'+scenario+'_questionnaire_ROC_fine_binned.png'
        print 'save file with name: ', png_filename

fig.savefig(png_filename, bbox_inches='tight', pad_inches=0)
   

clinical_metric_dfs = collections.OrderedDict()
for metric_name, metric_details in metrics_to_do.iteritems():
    print 'Now make dataframes for use in tables from clinical_optimized_metric_dict: ', clinical_optimized_metric_dict
    clinical_metric_dfs[metric_name] = pd.DataFrame(clinical_optimized_metric_dict[metric_name])
    clinical_metric_dfs[metric_name].index.names = [metric_details['desc']]
if purpose_of_run == 'for_tables':
    pd.set_option('display.max_columns', 200)
    print 'make clinical latex tables for ', clinical_metric_dfs
    make_clinical_latex_tables(clinical_metric_dfs, desc='new')





### Now load the video data

In [None]:
f = open(input_video_data, 'rU')
dictReader = csv.DictReader(f, delimiter=',')
data = []
for row in dictReader:
    data.append(row)

clinical_video_df = pd.DataFrame(data)
clinical_video_df.shape

clinical_video_df['Video Score'] = clinical_video_df['response'].apply(float)
clinical_video_df['Patient Id'] = clinical_video_df['Patient Id'].astype(int)

print clinical_video_df.head(1)

print clinical_video_df['Video Version'].value_counts()

print 'Patient Ids in the video dfs: ', list(clinical_video_df['Patient Id'].values)

In [None]:
def is_missing_value(value):
    try:
        if value is None or np.isnan(value) or value == 'None' or value == 'NaN':
            return True
        else:
            return False
    except:
        return True

combined_clinical_df = clinical_data_dfs['questionnaire'].merge(clinical_video_df, on='Patient Id', how='inner', suffixes=('_qnnaire','_video'))
print 'combined_clinical_df: ', combined_clinical_df

combined_clinical_df['outcome'] = [qnnaire_outcome if is_missing_value(video_outcome) else video_outcome for qnnaire_outcome,
        video_outcome in zip(combined_clinical_df['outcome_qnnaire'].values, combined_clinical_df['outcome_video'].values)]

print 'resulting outcome: ', list(combined_clinical_df['outcome'].values)

combined_clinical_df = combined_clinical_df.merge(mchat_df, left_on='Patient Id', right_on='Patient Id', how='left', suffixes=('','_mchat'))
combined_clinical_df = combined_clinical_df.merge(cbcl_df, left_on='Patient Id', right_on='Patient Id', how='left', suffixes=('','_cbcl'))



### Do logit combination

In [None]:
from sklearn import linear_model

features = ['Guardian Qnnaire Score', 'Video Score']
feature_encoding_map = {feature: 'scalar' for feature in features}
### Sample weights not defined for logistic regression in present version of sklearn. 
### The class_weight='balanced' is the best we can do at the moment.

outcome_classes = ['autism', 'not']
outcome_class_priors =  [(1.0/2.0), (1.0/2.0)]   

logit_mod1_model, features, y_predicted_without_dunno, y_predicted_with_dunno, y_predicted_probs = all_data_model(combined_clinical_df[combined_clinical_df['Video Version']=='module1'], features, feature_encoding_map, target_column='outcome', sample_weight=None,
        dunno_range=[0, 0], model_function=linear_model.LogisticRegression, C=100000.0, penalty='l2', class_weight='balanced')

logit_mod2_model, features, y_predicted_without_dunno, y_predicted_with_dunno, y_predicted_probs = all_data_model(combined_clinical_df[combined_clinical_df['Video Version']=='module2'], features, feature_encoding_map, target_column='outcome', sample_weight=None,
        dunno_range=[0, 0], model_function=linear_model.LogisticRegression, C=100000.0, penalty='l2', class_weight='balanced')

def apply_model(model, data_row):
    #p= exp(β0 + β1*x1 + ... + βk*xk)/(1+exp(β0 + β1*x1 + ... + βk*xk)).
    exponent = math.exp(model.intercept_[0] + model.coef_[0][0]*data_row['Guardian Qnnaire Score'] + model.coef_[0][1]*data_row['Video Score'])
    probability = float(exponent) / float(1+exponent)
    return 1.0 - probability

combined_clinical_df['Logit Combinator Score']  = combined_clinical_df.apply(lambda row: apply_model(logit_mod1_model, row) if row['Video Version']=='module1' else apply_model(logit_mod2_model, row), axis=1)

print 'mchat and cbcl results: ', combined_clinical_df[['Mchat Final Score', 'CBCL Score']]

### Make ROC curves including video 

In [None]:
for this_algorithm in ['guardian.qnnaire.3-', 'guardian.qnnaire.4+', 'All ages']:
    #if this_algorithm != 'All ages': continue
    
    if this_algorithm == 'All ages':
        print 'len combined clinical df: ', len(combined_clinical_df.index)
        df_to_plot = cp.deepcopy(combined_clinical_df)
        do_composite_analysis = True
    else:
        filter_column = algorithms[this_algorithm]['filter_key']
        filter_value = algorithms[this_algorithm]['filter_value']
        df_to_plot = combined_clinical_df[combined_clinical_df[filter_column]==filter_value]
        do_composite_analysis = False
        
    
    this_N = len(df_to_plot.index)
    for method in ['normal', 'dunno']:
        print 'Do method ', method, ' for algorithm: ', this_algorithm
        
        questionnaire_info = {'label': 'Questionnaire', 'color': 'blue',
                            'linewidth': 5, 'linestyle': 'dashed'}
        video_info = {'label': 'Video', 'color': 'red',
                     'linewidth': 5, 'linestyle': 'dotted'}
        combined_info = {'label': 'Combined questionnaire and video', 'color': 'black', 'color': 'black',
                     'linewidth': 5, 'linestyle': 'solid'}
        if method == 'dunno':
            questionnaire_info['coverage'] = 0.75
            video_info['coverage'] = 0.75
            combined_info['coverage'] = 0.75
        if do_composite_analysis:
            questionnaire_classifier_variant_column_name = 'classifier_variant'
            video_classifier_variant_column_name = 'Video Version'
            sys.stdout.flush()
            
            if method == 'dunno':
                
                print 'Do composite analysis for questionnaire'
                questionnaire_plotting_data = explore_composite_classification_dunno_ranges(df_to_plot, 'outcome',
                        'Guardian Qnnaire Score', questionnaire_classifier_variant_column_name, 'autism', 'not',
                         specificity_bin_width=desired_specificity_bin_width, desc='questionnaire')
                print 'Do composite analysis for video'
                sys.stdout.flush()
                video_plotting_data = explore_composite_classification_dunno_ranges(df_to_plot, 'outcome',
                    'Video Score', video_classifier_variant_column_name, 'autism', 'not',
                    specificity_bin_width=desired_specificity_bin_width, desc='video')
                print 'Do composite analysis for combination'
                sys.stdout.flush()

                combined_plotting_data = explore_composite_classification_dunno_ranges(df_to_plot,
                        'outcome', 'Logit Combinator Score', questionnaire_classifier_variant_column_name,
                        'autism', 'not', specificity_bin_width=desired_specificity_bin_width, desc='combined')
            else:   ### method is not dunno
                
#                 def explore_composite_classification_thresholds(dataframe, target_column_name, response_column_name,
#         classifier_variant_column_name, positive_class_name, negative_class_name, specificity_bin_width = 0.025):
                questionnaire_plotting_data = explore_composite_classification_thresholds(df_to_plot, 'outcome',
                        'Guardian Qnnaire Score', questionnaire_classifier_variant_column_name, 'autism', 'not',
                         specificity_bin_width=desired_specificity_bin_width, desc='questionnaire')
                pd.set_option('display.max_rows', 50000) 
                pd.set_option('display.max_columns', 50000)                
                print 'Composite questionnaire plotting data: ', questionnaire_plotting_data
                print 'Arose from df ', df_to_plot
                pd.set_option('display.max_rows', 500)
                pd.set_option('display.max_columns', 50)                


                
                print 'Do composite analysis for video'
                sys.stdout.flush()
                video_plotting_data = explore_composite_classification_thresholds(df_to_plot, 'outcome',
                    'Video Score', video_classifier_variant_column_name, 'autism', 'not',
                    specificity_bin_width=desired_specificity_bin_width, desc='video')
                print 'Do composite analysis for combination'
                sys.stdout.flush()
                combined_plotting_data = explore_composite_classification_thresholds(df_to_plot,
                        'outcome', 'Logit Combinator Score', questionnaire_classifier_variant_column_name,
                        'autism', 'not', specificity_bin_width=desired_specificity_bin_width, desc='combined')
        else:    #### Do non-composite analysis (3- or 4+ instead of age combination)
            sys.stdout.flush()
            
            if method == 'dunno':
                print 'Do non composite analysis for questionnaire'
                questionnaire_plotting_data = explore_dunno_ranges(df_to_plot['outcome'],
                          df_to_plot['Guardian Qnnaire Score'], 'autism', 'not', desc=this_algorithm+' questionnaire')
                print 'Do non composite analysis for video'
                sys.stdout.flush()
                video_plotting_data = explore_dunno_ranges(df_to_plot['outcome'],
                        df_to_plot['Video Score'], 'autism', 'not', desc=this_algorithm +' video')
                print 'Do non composite analysis for combination'
                sys.stdout.flush()
          
                combined_plotting_data = explore_dunno_ranges(df_to_plot['outcome'],
                        df_to_plot['Logit Combinator Score'], 'autism', 'not', desc=this_algorithm+'combined')
            else:   ### method is not dunno
#                 def explore_classification_thresholds(target_column, response_column, positive_class_name, negative_class_name):

                print 'Do non composite analysis for questionnaire'
                questionnaire_plotting_data = explore_classification_thresholds(df_to_plot['outcome'],
                          df_to_plot['Guardian Qnnaire Score'], 'autism', 'not', desc=this_algorithm+' questionnaire')
                pd.set_option('display.max_rows', 50000) 
                pd.set_option('display.max_columns', 50000)  
                print 'for ', this_algorithm, ', plot questionnaire classifier_data: ', questionnaire_plotting_data
                

                print 'Do non composite analysis for video'
            
                sys.stdout.flush()
                video_plotting_data = explore_classification_thresholds(df_to_plot['outcome'],
                        df_to_plot['Video Score'], 'autism', 'not', desc=this_algorithm+' video')
                print 'Do non composite analysis for combination'
                sys.stdout.flush()
                print 'for ', this_algorithm, ', plot video classifier_data: ', video_plotting_data 


          
                combined_plotting_data = explore_classification_thresholds(df_to_plot['outcome'],
                        df_to_plot['Logit Combinator Score'], 'autism', 'not', desc=this_algorithm+' combined')
                print 'for ', this_algorithm, ', combined_plotting_data: ', combined_plotting_data

        title = ''
        plt, fig = plot_classifier_profiles([(mchat_info, mchat_plotting_data), (cbcl_info, cbcl_plotting_data), 
                (questionnaire_info, questionnaire_plotting_data),
                (video_info, video_plotting_data), (combined_info, combined_plotting_data)], 
                title, ylim=(0.4,1.))
        method_file_desc = '' if method == 'normal' else '_dunno'
        if purpose_of_run == 'for_plotting':
            filename = 'images/combined_with_video_ROC_curves_'+this_algorithm.replace(' ', '_')+method_file_desc+'.png'
        else:
            filename = 'images/combined_with_video_ROC_curves_'+this_algorithm.replace(' ', '_')+method_file_desc+'_fine_binned.png'

        fig.savefig(filename, bbox_inches='tight', pad_inches=0)