In [1]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from src.utils.general import get_db_conn, get_boto3_session, load_model_s3, load_matrix_s3
from src.utils import project_constants as constants

In [4]:
cred_file = '../conf/local/credentials.yaml'
db_conn=get_db_conn(cred_file)
s3_session = get_boto3_session(cred_file)

#### mean ratios between bins

In [11]:
q = """
    select distinct related_likelihood_bins 
    from test_results.crosstabs_bins_3month
"""
bin_combos = pd.read_sql(q, db_conn)

In [31]:
q = """select *
    from test_results.crosstabs_bins_3month
    where metric='mean_ratio' and
    model_id = 16465
"""

mean_ratios = pd.read_sql(q, db_conn)

#### Will pass vs Will not pass

In [35]:
def get_top_k_mean_ratio(bin_labels, k=20):
    msk = mean_ratios['related_likelihood_bins'] == bin_labels
    df = mean_ratios[msk].sort_values('value', ascending=False)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    m2 = df['value'].isna()
    m3 = df['feature_name'].str.contains('_state_')
    m4 = df['feature_name'].str.contains('_bill_type_')
    return df[~m2 & ~m3 & ~m4][:k]

In [36]:
get_top_k_mean_ratio('will_pass, will_not_pass', 20)

Unnamed: 0,model_id,as_of_date,metric,related_likelihood_bins,feature_name,value
2972,16465,2018-03-08,mean_ratio,"will_pass, will_not_pass",bl_sp_entity_id_all_sponsors_frac_lc_max,177.100775
10112,16465,2018-03-29,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_1week_action_count,63.193472
7772,16465,2018-03-22,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_1week_action_count,60.103179
752,16465,2018-03-01,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_1week_action_count,49.25266
3092,16465,2018-03-08,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_1week_action_count,46.774227
3098,16465,2018-03-08,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_2week_action_count,37.318289
5432,16465,2018-03-15,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_1week_action_count,36.637289
7778,16465,2018-03-22,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_2week_action_count,36.445406
10118,16465,2018-03-29,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_2week_action_count,36.110555
10110,16465,2018-03-29,mean_ratio,"will_pass, will_not_pass",evnt_cts_entity_id_1week_DISTINCT chamber_count,29.903038


#### Very likely vs Will_not_pass

In [37]:
get_top_k_mean_ratio('very_likely, will_not_pass', 20)

Unnamed: 0,model_id,as_of_date,metric,related_likelihood_bins,feature_name,value
476,16465,2018-03-01,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_sponsors_frac_lc_max,546.763712
9836,16465,2018-03-29,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_sponsors_frac_lc_max,248.849456
2816,16465,2018-03-08,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_sponsors_frac_lc_max,127.888043
5156,16465,2018-03-15,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_sponsors_frac_lc_max,103.547477
9843,16465,2018-03-29,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_uc_ctrl_prty_frac_imp,61.120919
9829,16465,2018-03-29,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_dems_frac_imp,61.120919
9833,16465,2018-03-29,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_other_frac_imp,61.120919
9841,16465,2018-03-29,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_total_sponsors_imp,61.120919
9835,16465,2018-03-29,mean_ratio,"very_likely, will_not_pass",bl_sp_entity_id_all_reps_frac_imp,61.120919
596,16465,2018-03-01,mean_ratio,"very_likely, will_not_pass",evnt_cts_entity_id_1week_action_count,47.6991


#### Very likey vs unlikely

In [40]:
get_top_k_mean_ratio('will_pass, unlikely', 20)

Unnamed: 0,model_id,as_of_date,metric,related_likelihood_bins,feature_name,value
8396,16465,2018-03-22,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_1week_action_count,18.932924
10736,16465,2018-03-29,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_1week_action_count,15.432126
3596,16465,2018-03-08,mean_ratio,"will_pass, unlikely",bl_sp_entity_id_all_sponsors_frac_lc_max,14.114618
3716,16465,2018-03-08,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_1week_action_count,12.943459
6056,16465,2018-03-15,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_1week_action_count,11.430082
8402,16465,2018-03-22,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_2week_action_count,11.145663
10734,16465,2018-03-29,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_1week_DISTINCT chamber_count,10.894903
3722,16465,2018-03-08,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_2week_action_count,10.703648
8398,16465,2018-03-22,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_1week_important_sum,10.362152
10742,16465,2018-03-29,mean_ratio,"will_pass, unlikely",evnt_cts_entity_id_2week_action_count,10.044503
