# Import

In [1]:
import pandas as pd
import numpy as np
import sqlite3

# Query for broad communities
Write similar query to those in hop_03, but do not restrict entity types on from_npi nor to_npi.

In [5]:
%%time
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT *
            FROM metro_entity
            INNER JOIN referral_to
            ON metro_entity.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        profile_from AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
        )
        SELECT
            rf.from_npi,
            qf.referral_id,
            pf.entity_type_code AS entity_from,
            pf."provider_last_name_(legal_name)" || ', ' || pf.provider_first_name AS name_from,
            pf."provider_organization_name_(legal_business_name)" AS organization_from,
            pf.taxonomy_code AS taxonomy_code_from,
            pf.classification AS classification_from,
            pf.specialization AS specialization_from,
            pf.provider_first_line_business_practice_location_address AS address_line1_from,
            pf.provider_second_line_business_practice_location_address AS address_line2_from,
            pf.provider_business_practice_location_address_city_name AS city_from,
            pf.provider_business_practice_location_address_state_name AS state_from,
            pf.provider_business_practice_location_address_postal_code AS zip_from,
            to_npi,
            qf.entity_type_code AS entity_to,
            qf."provider_last_name_(legal_name)" || ', ' || qf.provider_first_name AS name_to,
            qf."provider_organization_name_(legal_business_name)" AS organization_to,
            qf.taxonomy_code AS taxonomy_code_to,
            qf.classification AS classification_to,
            qf.specialization AS specialization_to,
            qf.provider_first_line_business_practice_location_address AS address_line1_to,
            qf.provider_second_line_business_practice_location_address AS address_line2_to,
            qf.provider_business_practice_location_address_city_name AS city_to,
            qf.provider_business_practice_location_address_state_name AS state_to,
            qf.provider_business_practice_location_address_postal_code AS zip_to,
            patient_count,
            transaction_count,
            average_day_wait,
            std_day_wait
        FROM referral_from AS rf
        INNER JOIN profile_from AS pf
        ON rf.from_npi = pf.npi
        INNER JOIN qualifying_refs AS qf
        USING (referral_id);

"""

with sqlite3.connect('../data/hopteam.sqlite') as db:
    referrals_big = pd.read_sql(query, db)

Wall time: 37.3 s


In [8]:
referrals_big['from_npi'].nunique()

38950

In [9]:
referrals_big['to_npi'].nunique()

9194

In [10]:
def build_address(df, to_from = 'from'):
    to_from = to_from
    address = ((df[f'address_line1_{to_from}'] + ', ' + df[f'address_line2_{to_from}']).mask(pd.isna, df[f'address_line1_{to_from}'])
         + ', ' 
         + df[f'city_{to_from}'] 
         + ', ' 
         + df[f'state_{to_from}']
         + ' '
         + df[f'zip_{to_from}']
    )
    return address

In [12]:
referrals_big['address_from'] = build_address(referrals_big, 'from')
referrals_big['address_to'] = build_address(referrals_big, 'to')

In [13]:
referrals_big['state_from'].unique()

array(['TN', 'AL', 'MS', 'WV', 'OK', 'SC', 'OH', 'KY', 'CO', 'FL', 'MI',
       'NC', 'KS', 'GA', 'MD', 'NY', 'WI', 'NJ', 'CA', 'MO', 'IL', 'IN',
       'NH', 'MN', 'WA', 'VA', 'AZ', 'DC', 'PA', 'DE', 'RI', 'MA', 'LA',
       'NE', 'NV', 'IA', 'ID', 'OR', 'CT', 'VI', 'AR', 'TX', 'UT', 'ME',
       'NM', 'AA', 'AK', 'HI', 'MT', 'ND', 'VT', 'SD', 'GU', 'PR', 'AE',
       'WY', 'ISTANBUL'], dtype=object)

In [16]:
referrals_big.to_csv('../data/neo4j/referrals_big.csv', index = False)