In [1]:
import pandas as pd
import numpy as np
import sqlite3

# groups_test

In [2]:
%%time
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT *
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT npi
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT 
            to_npi,
            referral_id,
            qf.entity_type_code,
            qf.taxonomy_code,
            classification,
            specialization,
            qf."provider_organization_name_(legal_business_name)" AS organization,
            qf.provider_first_line_business_practice_location_address AS address_line1,
            qf.provider_second_line_business_practice_location_address AS address_line2,
            qf.provider_business_practice_location_address_city_name AS city,
            qf.provider_business_practice_location_address_state_name AS state,
            qf.provider_business_practice_location_address_postal_code AS zip
        FROM qualifying_refs AS qf
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""
with sqlite3.connect('../data/hopteam.sqlite') as db:
    groups_test = pd.read_sql(query, db)
    
groups_test

Wall time: 45.8 s


Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
0,1013012616,18854880,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
1,1013012616,131966153,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
2,1013012616,146169993,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
3,1902804271,320488,2.0,208VP0014X,Pain Medicine,Interventional Pain Medicine,"PAIN MANAGEMENT GROUP, P.C.",5801 CROSSINGS BLVD,,ANTIOCH,TN,37013
4,1902804271,6754370,2.0,208VP0014X,Pain Medicine,Interventional Pain Medicine,"PAIN MANAGEMENT GROUP, P.C.",5801 CROSSINGS BLVD,,ANTIOCH,TN,37013
...,...,...,...,...,...,...,...,...,...,...,...,...
69422,1669872735,30083148,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69423,1669872735,114150220,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69424,1669872735,178563129,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69425,1669872735,184096138,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563


# group_providers

In [31]:
%%time
# profiles of all providers
# with referrals to metro nashville groups (entity 2) having
# referrals of over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT 
                referral_id
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            referral_id,
            entity_type_code,
            "provider_last_name_(legal_name)" || ', ' || provider_first_name AS name,
            taxonomy_code,
            classification,
            specialization,
            "provider_organization_name_(legal_business_name)" AS organization,
            provider_first_line_business_practice_location_address AS address_line1,
            provider_second_line_business_practice_location_address AS address_line2,
            provider_business_practice_location_address_city_name AS city,
            provider_business_practice_location_address_state_name AS state,
            provider_business_practice_location_address_postal_code AS zip     
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('../data/hopteam.sqlite') as db:
    providers_test = pd.read_sql(query, db)
    
providers_test

Wall time: 18.3 s


Unnamed: 0,from_npi,referral_id,entity_type_code,name,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
0,1821080961,18854880,1.0,"SHERWOOD, WILLIAM",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
1,1841282779,131966153,1.0,"RHODY, KEVIN",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
2,1770575607,146169993,1.0,"COOPER, STEVEN",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
3,1508804337,320488,1.0,"SHARMA, VINEET",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
4,1538109103,6754370,1.0,"NAU, PAUL",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69422,1558355941,30083148,1.0,"JOSEPH, JOJU",207RN0300X,Internal Medicine,Nephrology,,270 EAST MAIN STREET,SUITE 200,GALLATIN,TN,37066
69423,1023099074,114150220,1.0,"LAMBERT, HEATHER",363LW0102X,Nurse Practitioner,Women's Health,,507 GORDONSVILLE HWY,SUITE 203,GORDONSVILLE,TN,38563
69424,1184619124,178563129,1.0,"RUTHERFORD, RICHARD",207Q00000X,Family Medicine,,,133 HOSPITAL DR,SUITE 500,CARTHAGE,TN,37030
69425,1205931565,184096138,1.0,"NOBLE, MICHAEL",207Q00000X,Family Medicine,,,37 PALMER ST,,CALAIS,ME,04619


# Comparing test results with Alex's results

In [18]:
(groups_test.loc[(groups_test['organization'] == 'VANDERBILT UNIVERSITY MEDICAL CENTER') 
#          & 
         #~(groups_test['classification'].isin(['Internal Medicine', 'Pharmacy'])
#          &
#           (groups_test['specialization'].isnull()))
        ]
        .drop_duplicates(['classification','specialization'])
        .sort_values(['classification','specialization'])
)

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
41143,1740319847,30227209,2.0,261QE0700X,Clinic/Center,End-Stage Renal Disease (ESRD) Treatment,VANDERBILT UNIVERSITY MEDICAL CENTER,2906 FOSTER CREIGHTON DR STE 100,,NASHVILLE,TN,37204
18185,1306889597,1003005,2.0,282N00000X,General Acute Care Hospital,,VANDERBILT UNIVERSITY MEDICAL CENTER,1411 W. BADDOUR PARKWAY,,LEBANON,TN,37087
61594,1104202761,690514,2.0,207R00000X,Internal Medicine,,VANDERBILT UNIVERSITY MEDICAL CENTER,1301 22ND AVE S,,NASHVILLE,TN,37232
65179,1285091330,962232,2.0,3336C0003X,Pharmacy,Community/Retail Pharmacy,VANDERBILT UNIVERSITY MEDICAL CENTER,1211 MEDICAL CENTER DR,TVC 1815,NASHVILLE,TN,37232
65173,1649637794,1530270,2.0,333600000X,Pharmacy,,VANDERBILT UNIVERSITY MEDICAL CENTER,1215 21ST AVE S,ROOM 1006,NASHVILLE,TN,37232
18178,1821030842,29056973,2.0,273R00000X,Psychiatric Unit,,VANDERBILT UNIVERSITY MEDICAL CENTER,500 PARK AVENUE,,LEBANON,TN,37087
18182,1215979190,117291950,2.0,273Y00000X,Rehabilitation Unit,,VANDERBILT UNIVERSITY MEDICAL CENTER,500 PARK AVENUE,,LEBANON,TN,37087


In [22]:
groups_test.loc[(groups_test['organization'] == 'VANDERBILT UNIVERSITY MEDICAL CENTER') 
         & 
         (groups_test['classification'].isin(['General Acute Care Hospital']))
         &
         (groups_test['specialization'] == 'None')].shape

(0, 12)

# Reconciling groups data with Alex's group data

In [23]:
groups_alex = pd.read_csv('../data/teammates/groups_alex.csv').drop(columns = ['Unnamed: 0'])

In [24]:
groups_alex.shape

(45595, 11)

In [25]:
groups_test.shape

(69427, 12)

In [26]:
checker = groups_test['referral_id'].to_frame().assign(df_label_joshua = 'Joshua').merge(groups_alex['referral_id'].to_frame().assign(df_label_alex = 'Alex'), how = 'outer')

test data has all the rows that Alex has

In [27]:
checker[checker['df_label_joshua'].isnull()]

Unnamed: 0,referral_id,df_label_joshua,df_label_alex


test data has 23,832 rows that Alex's data doesn't have

In [28]:
checker[checker['df_label_alex'].isnull()]

Unnamed: 0,referral_id,df_label_joshua,df_label_alex
102,20166848,Joshua,
103,45071264,Joshua,
104,148635690,Joshua,
117,99871103,Joshua,
118,123331410,Joshua,
...,...,...,...
69370,116773375,Joshua,
69371,131865100,Joshua,
69372,156111327,Joshua,
69373,184731114,Joshua,


In [32]:
providers_test.merge(checker.loc[checker['df_label_alex'].isnull(), ['referral_id']]).drop_duplicates(subset = ['from_npi'])

Unnamed: 0,from_npi,referral_id,entity_type_code,name,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
0,1043524283,20166848,1.0,"YI, ANNA",152W00000X,Optometrist,,,5323 MOUNT VIEW RD,,ANTIOCH,TN,37013
1,1437598992,45071264,1.0,"KRAGENBRINK, KYLE",152W00000X,Optometrist,,,5323 MOUNT VIEW RD,,ANTIOCH,TN,37013
2,1093753303,148635690,1.0,"WELCH, DEREK",207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,,5301 VIRGINIA WAY,SUITE 300,BRENTWOOD,TN,37027
3,1649584962,99871103,1.0,"POWERS, ALAN",208D00000X,General Practice,,,600 12TH AVE S,#709,NASHVILLE,TN,37203
4,1700274974,123331410,1.0,"JENKENS, KIETH",363LF0000X,Nurse Practitioner,Family,,843 SUMPTER RD # 424,,VAN BUREN TWP,MI,48111
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23736,1629116843,85619453,1.0,"SEEBACH, JENNIFER",207L00000X,Anesthesiology,,,677 CHURCH ST NE,,MARIETTA,GA,30060
23740,1750320453,142569086,1.0,"GORDON, TIMOTHY",207W00000X,Ophthalmology,,,1050 N JAMES CAMPBELL BLVD STE 100,,COLUMBIA,TN,38401
23747,1245343003,190906237,1.0,"ATNIP, CHARLES",207W00000X,Ophthalmology,,,1050 N JAMES CAMPBELL BLVD,SUITE 100,COLUMBIA,TN,38401
23751,1366777864,208046834,1.0,"MCCURRY, BRIE",367500000X,"Nurse Anesthetist, Certified Registered",,,4230 HARDING RD,SUITE 435,NASHVILLE,TN,37205
