In [1]:
import pandas as pd
import numpy as np
import sys
from sklearn.metrics.pairwise import pairwise_distances



# class Pipeline_3():

#     def __init__(self):
#         pass


def find_area():
    area_import = pd.read_csv('../data/clean_files/area_FL.csv')
    area=area_import.copy()
    area.drop(['Unnamed: 0','County','County_Name','ServiceAreaId'], axis=1, inplace=True)
    area.dropna(how='any', inplace=True)
    area['IssuerId']=area['IssuerId'].astype(int)
    return area

def find_benefits():
    benefits = pd.read_csv('../data/clean_files/benefits_covered_FL.csv')
    benefits['IssuerId']=benefits['PlanId'].str.slice(stop=5)
    benefits=benefits.drop_duplicates(['IssuerId'])
    benefits.drop(['PlanId'], axis=1, inplace=True)
    benefits['IssuerId']=benefits['IssuerId'].astype(int)
    return benefits

def find_attributes():
    attributes = pd.read_csv('../data/clean_files/attr_FL.csv')
    attributes=attributes.drop_duplicates('IssuerId')
    #grabbing network names here instead of importing network_FL.csv
    plan_names=attributes[['IssuerId', 'Issuer_Name']]
    attributes.drop(['PlanId','Unnamed: 0','Issuer_ID','Issuer_Denial_rate','Plan_denial_rate',
    'Disenrollment_Rate','Issuer_Denial_rate','Plan_denial_rate','Disenrollment_Rate','StandardComponentId','NetworkId',
    'ServiceAreaId','PlanType','MetalLevel','PlanVariantMarketingName','Issuer_Name','No Management Program'], axis=1, inplace=True)
   
    return attributes, plan_names

def find_matrix(benefit_df,attribute_df):
    ben_attr_merged = benefit_df.merge(attribute_df, how='left', on='IssuerId')
    ben_attr_merged['Osteo']=  ben_attr_merged['Osteoporosis']+ ben_attr_merged['Osteoporosis Treatment']
    ben_attr_merged= ben_attr_merged.drop(['Osteoporosis','Osteoporosis Treatment'],axis=1)
    ben_attr_merged.rename(columns={'Osteo':'Osteoporosis'}, inplace=True)

    features= ['IssuerId','Bone Marrow Transplant','Chemotherapy','Radiation',
               'Cardiac and Pulmonary Rehabilitation','Heart Disease Management Program',
               'High Blood Pressure & High Cholesterol Management Program','Diabetes Care Management',
               'Diabetes Education','Dialysis','Infusion Therapy','Genetic Testing Lab Services',
               'Imaging (CT/PET Scans, MRIs)','Laboratory Outpatient and Professional Services',
               'X-rays and Diagnostic Imaging','Eye Glasses for Adults','Eye Glasses for Children',
               'Routine Eye Exam (Adult)','Routine Eye Exam for Children',
               'Durable Medical Equipment','Enteral/Parenteral and Oral Nutrition Therapy',
               'Habilitation Services','Home Health Care Services','Hospice Services',
               'Pain Management Program','Osteoporosis','Prosthetic Devices',
               'Skilled Nursing Facility','Emergency Room Services',
               'Emergency Transportation/Ambulance',
               'Inpatient Hospital Services (e.g., Hospital Stay)',
               'Inpatient Physician and Surgical Services','Allergy Injections',
               'Allergy Testing','Asthma Management Program',
               'Preventive Care/Screening/Immunization','Transplant',
               'Delivery and All Inpatient Services for Maternity Care','Nutrition/Formulas',
               'Pregnancy Management Program','Prenatal and Postnatal Care',
               'Well Baby Visits and Care','Depression Management Program',
               'Mental Health Office Visit','Mental/Behavioral Health Inpatient Services',
               'Mental/Behavioral Health Outpatient Services',
               'Other Practitioner Office Visit (Nurse, Physician Assistant)',
               'Outpatient Facility Fee (e.g., Ambulatory Surgery Center)',
               'Outpatient Surgery Physician/Surgical Services',
               'Primary Care Visit to Treat an Injury or Illness','Routine Foot Care',
               'Specialist Visit','Telehealth','Urgent Care Centers or Facilities',
               'Low Back Pain Management Program','Outpatient Observation',
               'Outpatient Rehabilitation Services','Reconstructive Surgery',
               'Rehabilitative Occupational and Rehabilitative Physical Therapy',
               'Rehabilitative Speech Therapy','Substance Abuse Disorder Inpatient Services',
               'Substance Abuse Disorder Outpatient Services','Substance Abuse Office Visit',
               'Chiropractic Care','Fitness Center Membership','Gym Access',
               'Hyperbaric Oxygen Therapy','Nutritional Counseling',
               'Treatment for Temporomandibular Joint Disorders',
               'Weight Loss Management Program']
    # reorganizes columns in order of flask app
    plans_matrix_df= ben_attr_merged[features]
    
    # pops plan labels for reference after compare and then converts it to the matrix for the compare
    indices=plans_matrix_df['IssuerId']
    #plans_matrix_df.drop('IssuerId',axis=1, inplace=True)
    plans_matrix_df=plans_matrix_df.astype(int)

   
    return (plans_matrix_df,indices)

 

In [2]:
area=find_area()
benefits=find_benefits()
attributes, plan_names = find_attributes()
make_matrix, plan_indexes=find_matrix(benefits, attributes)

In [3]:
make_matrix

Unnamed: 0,IssuerId,Bone Marrow Transplant,Chemotherapy,Radiation,Cardiac and Pulmonary Rehabilitation,Heart Disease Management Program,High Blood Pressure & High Cholesterol Management Program,Diabetes Care Management,Diabetes Education,Dialysis,...,Substance Abuse Disorder Inpatient Services,Substance Abuse Disorder Outpatient Services,Substance Abuse Office Visit,Chiropractic Care,Fitness Center Membership,Gym Access,Hyperbaric Oxygen Therapy,Nutritional Counseling,Treatment for Temporomandibular Joint Disorders,Weight Loss Management Program
0,12379,0,1,1,0,1,0,0,1,1,...,1,1,0,1,0,0,0,1,1,0
1,16842,1,1,1,0,1,1,1,1,1,...,1,1,0,1,0,0,0,1,1,0
2,21663,0,1,1,0,1,1,0,1,1,...,1,1,0,1,0,0,0,1,1,0
3,30252,1,1,1,0,1,1,1,1,1,...,1,1,0,1,0,0,0,1,1,0
4,36194,0,1,1,1,1,0,0,1,1,...,1,1,1,1,1,0,1,1,1,0
5,40572,0,1,1,0,0,0,0,1,1,...,1,1,0,1,0,0,0,1,1,0
6,48121,0,1,1,0,0,0,0,1,1,...,1,1,0,1,0,0,0,1,1,0
7,54172,1,1,1,0,1,1,1,1,1,...,1,1,0,1,0,0,0,1,1,0
8,56503,1,1,1,0,1,1,1,1,1,...,1,1,0,1,0,1,0,1,1,1


In [5]:
area.IssuerId.unique()

array([21663, 30252, 16842, 36194, 56503, 40572, 54172, 12379, 48121])

In [6]:
plan_names

Unnamed: 0,IssuerId,Issuer_Name
0,12379,Bright Health Insurance Company of Florida
152,15833,Guardian Life Insurance Company of America
160,15980,Humana Insurance Company
161,16842,Blue Cross and Blue Shield of Florida
281,17121,BEST Life and Health Insurance Company
289,21663,Celtic Insurance Company
321,30115,Florida Combined Life Insurance Company
329,30252,"Health Options, Inc."
433,36194,"Health First Commercial Plans, Inc."
659,40572,Oscar Insurance Company of Florida
