# Imports

In [209]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from datetime import date, timedelta 
from scipy import stats
from pandas import *
import re
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
plt.style.use(['seaborn-darkgrid'])

import warnings
warnings.filterwarnings('ignore')

# Processing

In [210]:
# Combining all the dataframes
# This will be useful when plotting
def make_pretty(csv_list, ylist):
    df_list = []
    for csv, year in zip(csv_list, ylist):
        df = read_csv(csv).dropna(subset=['name']).drop(['Unnamed: 0'], axis=1).replace(-1, np.nan).set_index('name')
        ycol = np.empty(df.shape[0])
        ycol.fill(year)
        df['year'] = ycol.astype(int)
        df_list.append(df)
    return pd.concat(df_list)

# Make it pretty
csv_list = ['data15_c_d.csv','data16_c_d.csv','data17_c_d.csv','data18_c_d.csv']
ylist = np.arange(2015,2019)
df = make_pretty(csv_list, ylist)

In [262]:
# Remove any variables that have star ratings but no data
# In this case c_health_plan_quality_improvement_star and d_drug_plan_quality_improvement_star
df = df[df.columns[(df.columns == 'c_health_plan_quality_improvement_star') | (df.columns == 'd_drug_plan_quality_improvement_star') == False]]

# QDA and LDA
* QDA tends to work better so the the functions are tuned to that model
* If you want to see LDA outputs just change the classifier (clf)

## QDA

**Cutpoint Dictionary**

In [250]:
# Predict cutpoints
def cutpoints(df, year):
    df = df[df['year'] == year]
    metric_stars = df.columns[df.columns.str.contains('star')]
    metric = metric_stars.str.replace('_star','')
    
    cp_dict, no_data = {}, []
    for met, met_s in zip(metric, metric_stars):
        met_df = df[[met, met_s]].dropna()
        if met_df.shape[0] == 0:
            no_data.append(met)
            continue
        clf = QuadraticDiscriminantAnalysis()
        clf.fit(met_df[[met]], met_df[[met_s]])
        met_df[met_s] = clf.predict(met_df[[met]])
        met_df = met_df.groupby(met_s).mean()
        df_cp = met_df.shift() + met_df.diff()/2
        cp_dict.update({met: df_cp})
    print('The '+', '.join(no_data)+' fields are empty')
    return cp_dict

In [474]:
cp_dict = cutpoints(df, 2018)

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, d_diabetes_treatment, d_high_risk_medication fields are empty


In [265]:
# Example output
# 1 star: up to 65, 2 star:  between 65 and 82.69,  etc.
cp_dict['c_adult_bmi_assessment']

Unnamed: 0_level_0,c_adult_bmi_assessment
c_adult_bmi_assessment_star,Unnamed: 1_level_1
1.0,
2.0,65.0
3.0,82.688462
4.0,92.796299
5.0,97.439571


**Investigation of Training and Validation Accuracy**

In [39]:
# For my own testing
# Not meant to be used in production
def training_validation_acc_printer(df):
    metric_stars = df.columns[df.columns.str.contains('star')]
    metric = metric_stars.str.replace('_star','')
    
    for met, met_s in zip(metric, metric_stars):
        met_df = df[[met, met_s]].dropna()
        if met_df.shape[0] == 0:
            continue
        mdf_train, mdf_val, mdf_train_labels, mdf_val_labels = train_test_split(met_df[[met]], met_df[[met_s]], test_size=0.2, shuffle=shuffle, random_state=100)
        clf = QuadraticDiscriminantAnalysis()
        clf.fit(mdf_train, mdf_train_labels)
        print(met+':', clf.score(mdf_train, mdf_train_labels), clf.score(mdf_val, mdf_val_labels))

In [258]:
# How good is our classifier?
def training_validation_acc(df, year):
    df = df[df['year'] == year]
    metric_stars = df.columns[df.columns.str.contains('star')]
    metric = metric_stars.str.replace('_star','')
    
    train_val_dict, no_data, no_cov = {}, [], []
    for met, met_s in zip(metric, metric_stars):
        met_df = df[[met, met_s]].dropna()
        if (met_df.shape[0] == 0):
            no_data.append(met)
            continue
        mdf_train, mdf_val, mdf_train_labels, mdf_val_labels = train_test_split(met_df[[met]], met_df[[met_s]], test_size=0.2, shuffle=shuffle, random_state=100)
        if (mdf_train_labels[met_s].value_counts().min() < 2):
            no_cov.append(met)
            continue
        clf = QuadraticDiscriminantAnalysis()
        clf.fit(mdf_train, mdf_train_labels)
        fit_list = [clf.score(mdf_train, mdf_train_labels), clf.score(mdf_val, mdf_val_labels)]
        train_val_dict.update({met: fit_list})
    train_val_df = pd.DataFrame.from_dict(train_val_dict).T
    train_val_df.columns = ['Training Accuracy', 'Validation Accuracy']
    print('The '+', '.join(no_data)+' fields are empty and the '+', '.join(no_cov)+' fields have ill defined covariance (ie. single sample in a given class)')
    return train_val_df

In [275]:
# Training and validation error for 2018
val_df18 = training_validation_acc(df, 2018)
val_df18

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, d_diabetes_treatment, d_high_risk_medication fields are empty and the c_older_adults_pain_assessment fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_adult_bmi_assessment,1.0,1.0
c_annual_flu_vaccine,0.952922,0.948052
c_beneficiary_access_and_performance_problems,0.071736,0.057143
c_breast_cancer_screening,1.0,1.0
c_call_center_foreign_language_interpreter_and_tty_availability,0.976296,0.964497
c_colorectal_cancer_screening,1.0,1.0
c_complaints_about_health_plan,0.991987,0.99359
c_controlling_blood_pressure,0.995161,0.99359
c_coordination,0.927987,0.947712
c_customer_service,0.946667,0.92


In [287]:
# Fields on which the model underperforms (2018)
val_df18 = training_validation_acc(df, 2018)
upf_18 = val_df18[val_df18['Training Accuracy'] < .75]
upf_18

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, d_diabetes_treatment, d_high_risk_medication fields are empty and the c_older_adults_pain_assessment fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_beneficiary_access_and_performance_problems,0.071736,0.057143
c_improving_or_maintaining_mental_health,0.007937,0.0
c_plan_all_cause_readmissions,0.003466,0.0
d_beneficiary_access_and_performance_problems,0.064851,0.062176
d_mpf_price_accuracy,0.002747,0.0


In [288]:
# Fields on which the model underperforms (2017)
# c_beneficiary_access_and_performance_problems, d_beneficiary_access_and_performance_problems, d_mpf_price_accuracy underperform both years

val_df17 = training_validation_acc(df, 2017)
upf_17 = val_df17[val_df17['Training Accuracy'] < .75]
upf_17

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, c_improving_bladder_control, c_medication_reconciliation_post_discharge, d_diabetes_treatment fields are empty and the d_appeals_upheld, d_complaints_about_drug_plan, d_high_risk_medication fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_adult_bmi_assessment,0.003263,0.0
c_beneficiary_access_and_performance_problems,0.067449,0.046784
c_complaints_about_health_plan,0.003322,0.0
c_members_choosing_to_leave_plan,0.003396,0.0
c_older_adults_medication_review,0.00738,0.0
c_rating_of_health_quality,0.136784,0.176471
d_beneficiary_access_and_performance_problems,0.060367,0.073298
d_members_choosing_to_leave_plan,0.003115,0.0
d_mpf_price_accuracy,0.002759,0.0


In [289]:
# Fields on which the model underperforms (2016)
# c_beneficiary_access_and_performance_problems, d_beneficiary_access_and_performance_problems, d_mpf_price_accuracy underperform all three years

val_df16 = training_validation_acc(df, 2016)
upf_16 = val_df16[val_df16['Training Accuracy'] < .75]
upf_16

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, c_improving_bladder_control, c_medication_reconciliation_post_discharge, d_diabetes_treatment fields are empty and the  fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_beneficiary_access_and_performance_problems,0.039271,0.044693
c_plan_all_cause_readmissions,0.003497,0.0
d_beneficiary_access_and_performance_problems,0.046077,0.054726
d_mpf_price_accuracy,0.002618,0.0


In [290]:
# Fields on which the model underperforms (2015)
# c_rating_of_health_quality and c_improving_or_maintaining_mental health have both come up in the past
# The three fields that underperformed from 2016-2018 were either empty or sparse (ill defined cov)

val_df15 = training_validation_acc(df, 2015)
upf_15 = val_df15[val_df15['Training Accuracy'] < .75]
upf_15

The c_beneficiary_access_and_performance_problems, c_breast_cancer_screening, c_call_center_foreign_language_interpreter_and_tty_availability, c_medication_reconciliation_post_discharge, d_beneficiary_access_and_performance_problems, d_call_center_foreign_language_interpreter_and_tty_availability, d_mtm_program_completion_rate_cmr fields are empty and the c_improving_or_maintaining_physical_health, c_members_choosing_to_leave_plan, d_mpf_price_accuracy fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_diabetes_kidney_disease_monitoring,0.007825,0.00625
c_getting_appointments_and_quickly,0.03882,0.043478
c_improving_or_maintaining_mental_health,0.003378,0.0
c_rating_of_health_plan,0.01087,0.018634
c_rating_of_health_quality,0.00625,0.0125
d_rating_of_drug_plan,0.012363,0.016393


In [406]:
# Union of underperforming fields from 2015-2018
# This will be important moving forward
upf_tot = set(upf_18.index).union(set(upf_17.index),set(upf_16.index),set(upf_15.index))
upf_tot

{'c_adult_bmi_assessment',
 'c_beneficiary_access_and_performance_problems',
 'c_complaints_about_health_plan',
 'c_diabetes_kidney_disease_monitoring',
 'c_getting_appointments_and_quickly',
 'c_improving_or_maintaining_mental_health',
 'c_members_choosing_to_leave_plan',
 'c_older_adults_medication_review',
 'c_plan_all_cause_readmissions',
 'c_rating_of_health_plan',
 'c_rating_of_health_quality',
 'd_beneficiary_access_and_performance_problems',
 'd_members_choosing_to_leave_plan',
 'd_mpf_price_accuracy',
 'd_rating_of_drug_plan'}

**Are there systematic differences between all the fields and the underperforming ones?**

In [398]:
# Average number of observations per field for full df
df.count().mean()

2751.7314814814813

In [394]:
# Average number of observations per field for underperforming fields
# Doesn't appear significantly different
df[list(upf_tot)].count().mean()

2967.4666666666667

In [402]:
# Mean percentage of observations per star rating for all fields
fdf_star = df[df.columns[df.columns.str.contains('star')]]
pd.DataFrame([fdf_star[i].value_counts(normalize=True) for i in fdf_star.columns]).mean().T

1.0    0.066985
2.0    0.134424
3.0    0.210293
4.0    0.291765
5.0    0.296532
dtype: float64

In [403]:
# Mean percentage of observations per star rating for underperforming fields
# Higher concentration in higher stars
# This relative sparsity in lower stars is likely causing problems with QDA
upf_star = df[list(pd.Series(list(upf_tot)) + '_star')]
pd.DataFrame([upf_star[i].value_counts(normalize=True) for i in upf_star.columns]).mean().T

1.0    0.041690
2.0    0.111524
3.0    0.182205
4.0    0.318562
5.0    0.346018
dtype: float64

## LDA

In [404]:
# Same function as above just using an LDA classifier
def training_validation_acc_LDA(df, year):
    df = df[df['year'] == year]
    metric_stars = df.columns[df.columns.str.contains('star')]
    metric = metric_stars.str.replace('_star','')
    
    train_val_dict, no_data, no_cov = {}, [], []
    for met, met_s in zip(metric, metric_stars):
        met_df = df[[met, met_s]].dropna()
        if (met_df.shape[0] == 0):
            no_data.append(met)
            continue
        mdf_train, mdf_val, mdf_train_labels, mdf_val_labels = train_test_split(met_df[[met]], met_df[[met_s]], test_size=0.2, shuffle=shuffle, random_state=100)
        if (mdf_train_labels[met_s].value_counts().min() < 2):
            no_cov.append(met)
            continue
        clf = LinearDiscriminantAnalysis()
        clf.fit(mdf_train, mdf_train_labels)
        fit_list = [clf.score(mdf_train, mdf_train_labels), clf.score(mdf_val, mdf_val_labels)]
        train_val_dict.update({met: fit_list})
    train_val_df = pd.DataFrame.from_dict(train_val_dict).T
    train_val_df.columns = ['Training Accuracy', 'Validation Accuracy']
    print('The '+', '.join(no_data)+' fields are empty and the '+', '.join(no_cov)+' fields have ill defined covariance (ie. single sample in a given class)')
    return train_val_df

In [441]:
# Now let's check how LDA performs on the fields on which QDA underperforms
# 2018: Does reasonably well
val_df18_LDA = training_validation_acc_LDA(df, 2018)
val_df18_LDA.loc[list(upf_tot)]

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, d_diabetes_treatment, d_high_risk_medication fields are empty and the c_older_adults_pain_assessment fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_rating_of_health_plan,0.832792,0.88961
c_beneficiary_access_and_performance_problems,1.0,1.0
c_getting_appointments_and_quickly,0.863636,0.831169
c_complaints_about_health_plan,0.961538,0.974359
d_rating_of_drug_plan,0.795322,0.75
c_plan_all_cause_readmissions,0.920277,0.931034
d_members_choosing_to_leave_plan,0.880303,0.89759
c_diabetes_kidney_disease_monitoring,1.0,1.0
c_improving_or_maintaining_mental_health,0.968254,0.984127
c_older_adults_medication_review,0.918367,0.945946


In [442]:
# 2017: Does reasonably well
val_df17_LDA = training_validation_acc_LDA(df, 2017)
val_df17_LDA.loc[list(upf_tot)]

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, c_improving_bladder_control, c_medication_reconciliation_post_discharge, d_diabetes_treatment fields are empty and the d_appeals_upheld, d_complaints_about_drug_plan, d_high_risk_medication fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_rating_of_health_plan,0.928819,0.937931
c_beneficiary_access_and_performance_problems,1.0,1.0
c_getting_appointments_and_quickly,0.911458,0.910345
c_complaints_about_health_plan,0.993355,0.986755
d_rating_of_drug_plan,0.845802,0.871951
c_plan_all_cause_readmissions,0.876091,0.881944
d_members_choosing_to_leave_plan,0.982866,0.993789
c_diabetes_kidney_disease_monitoring,0.991497,0.965986
c_improving_or_maintaining_mental_health,0.903614,0.888
c_older_adults_medication_review,0.98155,0.985294


In [443]:
# 2016: Does reasonably well
val_df16_LDA = training_validation_acc_LDA(df, 2016)
val_df16_LDA.loc[list(upf_tot)]

The c_cardiovascular_cholesterol_screening, c_diabetes_cholesterol_controlled, c_diabetes_cholesterol_screening, c_improving_bladder_control, c_medication_reconciliation_post_discharge, d_diabetes_treatment fields are empty and the  fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_rating_of_health_plan,0.906355,0.92
c_beneficiary_access_and_performance_problems,1.0,1.0
c_getting_appointments_and_quickly,0.942568,0.945946
c_complaints_about_health_plan,0.937086,0.907895
d_rating_of_drug_plan,0.827637,0.83432
c_plan_all_cause_readmissions,0.986014,1.0
d_members_choosing_to_leave_plan,0.86627,0.905325
c_diabetes_kidney_disease_monitoring,1.0,1.0
c_improving_or_maintaining_mental_health,0.909789,0.961832
c_older_adults_medication_review,0.988971,0.985294


In [444]:
# 2015: Does reasonably well
val_df15_LDA = training_validation_acc_LDA(df, 2015)
val_df15_LDA.loc[list(upf_tot)]

The c_beneficiary_access_and_performance_problems, c_breast_cancer_screening, c_call_center_foreign_language_interpreter_and_tty_availability, c_medication_reconciliation_post_discharge, d_beneficiary_access_and_performance_problems, d_call_center_foreign_language_interpreter_and_tty_availability, d_mtm_program_completion_rate_cmr fields are empty and the c_improving_or_maintaining_physical_health, c_members_choosing_to_leave_plan, d_mpf_price_accuracy fields have ill defined covariance (ie. single sample in a given class)


Unnamed: 0,Training Accuracy,Validation Accuracy
c_rating_of_health_plan,0.931677,0.913043
c_beneficiary_access_and_performance_problems,,
c_getting_appointments_and_quickly,0.93323,0.906832
c_complaints_about_health_plan,0.914286,0.908571
d_rating_of_drug_plan,0.751374,0.73224
c_plan_all_cause_readmissions,0.977636,0.987261
d_members_choosing_to_leave_plan,0.896552,0.934066
c_diabetes_kidney_disease_monitoring,0.992175,0.99375
c_improving_or_maintaining_mental_health,1.0,1.0
c_older_adults_medication_review,0.933333,0.946667


**Comparison betweend QDA and LDA**

In [452]:
# Mean training and validation accuracy for QDA from 2015-2018 (starting with 2018) 
print(val_df18.mean())
print(val_df17.mean())
print(val_df16.mean())
print(val_df15.mean())

Training Accuracy      0.850384
Validation Accuracy    0.848375
dtype: float64
Training Accuracy      0.757886
Validation Accuracy    0.752047
dtype: float64
Training Accuracy      0.866826
Validation Accuracy    0.866823
dtype: float64
Training Accuracy      0.820645
Validation Accuracy    0.817084
dtype: float64


In [450]:
# Mean training and validation accuracy for LDA from 2015-2018 (starting with 2018) 
# Is LDA categorically better? I don't think it is -- QDA's averages are being pulled down by the fields it fails on
print(val_df18_LDA.mean())
print(val_df17_LDA.mean())
print(val_df16_LDA.mean())
print(val_df15_LDA.mean())

Training Accuracy      0.928534
Validation Accuracy    0.929886
dtype: float64
Training Accuracy      0.937281
Validation Accuracy    0.935314
dtype: float64
Training Accuracy      0.926277
Validation Accuracy    0.930118
dtype: float64
Training Accuracy      0.918945
Validation Accuracy    0.914775
dtype: float64


In [464]:
# Mean training and validation accuracy for QDA from 2015-2018 (starting with 2018) DISCLUDING the failing fields
print(val_df18.loc[list(set(val_df18.index).difference(upf_tot))].mean())
print(val_df17.loc[list(set(val_df17.index).difference(upf_tot))].mean())
print(val_df16.loc[list(set(val_df16.index).difference(upf_tot))].mean())
print(val_df15.loc[list(set(val_df15.index).difference(upf_tot))].mean())

Training Accuracy      0.956351
Validation Accuracy    0.954438
dtype: float64
Training Accuracy      0.959035
Validation Accuracy    0.951524
dtype: float64
Training Accuracy      0.948387
Validation Accuracy    0.947004
dtype: float64
Training Accuracy      0.957512
Validation Accuracy    0.951740
dtype: float64


In [465]:
# Mean training and validation accuracy for QDA from 2015-2018 (starting with 2018) DISCLUDING the fields QDA failed
# It's clear that QDA is significantly better if we disclude the failing fields
print(val_df18_LDA.loc[list(set(val_df18.index).difference(upf_tot))].mean())
print(val_df17_LDA.loc[list(set(val_df17.index).difference(upf_tot))].mean())
print(val_df16_LDA.loc[list(set(val_df16.index).difference(upf_tot))].mean())
print(val_df15_LDA.loc[list(set(val_df15.index).difference(upf_tot))].mean())

Training Accuracy      0.926523
Validation Accuracy    0.926819
dtype: float64
Training Accuracy      0.927775
Validation Accuracy    0.925337
dtype: float64
Training Accuracy      0.917857
Validation Accuracy    0.920395
dtype: float64
Training Accuracy      0.917617
Validation Accuracy    0.913639
dtype: float64


# Conclusion

In [472]:
# So let's use a composite approach
# QDA will be used for most of the fields and LDA will be used for the fields QDA historically underperforms

# Now this is good performance on the full set of fields!
print(pd.concat([val_df18.loc[list(set(val_df18.index).difference(upf_tot))], val_df18_LDA.loc[list(upf_tot)]]).mean())
print(pd.concat([val_df17.loc[list(set(val_df17.index).difference(upf_tot))], val_df17_LDA.loc[list(upf_tot)]]).mean())
print(pd.concat([val_df16.loc[list(set(val_df16.index).difference(upf_tot))], val_df16_LDA.loc[list(upf_tot)]]).mean())
print(pd.concat([val_df15.loc[list(set(val_df15.index).difference(upf_tot))], val_df15_LDA.loc[list(upf_tot)]]).mean())

Training Accuracy      0.948419
Validation Accuracy    0.948298
dtype: float64
Training Accuracy      0.957377
Validation Accuracy    0.952149
dtype: float64
Training Accuracy      0.946630
Validation Accuracy    0.947857
dtype: float64
Training Accuracy      0.948136
Validation Accuracy    0.942653
dtype: float64
