In [1]:
import pandas as pd
import sqlite3

In [2]:
query = '''
WITH hospitals_tn AS(
    SELECT 
        npi,
        entity_type_code AS entity,
        `provider_organization_name_(legal_business_name)` AS organization,
        SUBSTR(provider_business_practice_location_address_postal_code, 1, 5) AS zipcode,
        taxonomy_code
    FROM npi_data
    WHERE 
        entity_type_code = 2
)
SELECT
    from_tax.grouping AS from_grouping,
    from_tax.classification AS from_classification,
    from_tax.specialization AS from_specialization,
    to_npi,
    htn.organization AS to_organization,
    to_tax.grouping AS to_grouping,
    to_tax.classification AS to_classification,
    to_tax.specialization AS to_specialization,
    SUM(patient_count) AS total_patient
FROM doc_graph
INNER JOIN hospitals_tn AS htn
ON to_npi = htn.npi
INNER JOIN npi_data AS fro
ON from_npi = fro.npi
LEFT JOIN taxonomy AS from_tax
ON fro.taxonomy_code = from_tax.code
LEFT JOIN taxonomy AS to_tax
ON htn.taxonomy_code = to_tax.code
WHERE htn.zipcode IN (
    SELECT zip
    FROM zipcode
    WHERE cbsa = 34980
)
GROUP BY 
    from_tax.grouping,
    from_tax.specialization,
    from_tax.classification,
    to_npi,
    htn.organization,
    to_tax.grouping,
    to_tax.classification,
    to_tax.specialization
ORDER BY total_patient DESC
'''

In [3]:
with sqlite3.connect('../data/referrals.sqlite') as db: 
    referrals = pd.read_sql(query, db)

In [18]:
referrals.to_csv('../data/referrals.csv', index = False)

In [5]:
query = '''
WITH hospitals_tn AS(
    SELECT 
        npi,
        entity_type_code AS entity,
        `provider_organization_name_(legal_business_name)` AS organization,
        SUBSTR(provider_business_practice_location_address_postal_code, 1, 5) AS zipcode,
        taxonomy_code
    FROM npi_data
    WHERE 
        entity_type_code = 2
)
SELECT
    from_npi,
    fro.`provider_organization_name_(legal_business_name)` AS from_organization,
    from_tax.grouping AS from_grouping,
    from_tax.classification AS from_classification,
    from_tax.specialization AS from_specialization,
    to_npi,
    htn.organization AS to_organization,
    to_tax.grouping AS to_grouping,
    to_tax.classification AS to_classification,
    to_tax.specialization AS to_specialization,
    patient_count
FROM doc_graph
INNER JOIN hospitals_tn AS htn
ON to_npi = htn.npi
INNER JOIN npi_data AS fro
ON from_npi = fro.npi
LEFT JOIN taxonomy AS from_tax
ON fro.taxonomy_code = from_tax.code
LEFT JOIN taxonomy AS to_tax
ON htn.taxonomy_code = to_tax.code
WHERE htn.zipcode IN (
    SELECT zip
    FROM zipcode
    WHERE cbsa = 34980
) AND
    lower(to_tax.grouping) LIKE '%hospital%'
'''

In [6]:
with sqlite3.connect('../data/referrals.sqlite') as db: 
    hos_referrals = pd.read_sql(query, db)

In [11]:
hos_referrals.to_csv('../data/hos_referrals.csv', index = False)

In [2]:
query = '''
WITH hospitals_tn AS(
    SELECT 
        npi,
        entity_type_code AS entity,
        `provider_organization_name_(legal_business_name)` AS organization,
        SUBSTR(provider_business_practice_location_address_postal_code, 1, 5) AS zipcode,
        taxonomy_code
    FROM npi_data
    WHERE SUBSTR(provider_business_practice_location_address_postal_code, 1, 5) IN (
        SELECT zip
        FROM zipcode
        WHERE cbsa = 34980
    )
)
SELECT
    from_npi,
    to_npi,
    patient_count
FROM doc_graph
INNER JOIN hospitals_tn AS htn
ON to_npi = htn.npi
'''

In [3]:
with sqlite3.connect('../data/referrals.sqlite') as db: 
    communities = pd.read_sql(query, db)

In [5]:
communities.to_csv('../data/communities_all.csv', index = False)

In [27]:
query = '''
SELECT npi
FROM npi_data
WHERE entity_type_code = 2 AND
    provider_business_practice_location_address_city_name = 'NASHVILLE' AND
    provider_business_practice_location_address_state_name = 'TN'
'''

In [28]:
with sqlite3.connect('../data/referrals.sqlite') as db: 
    nashville = pd.read_sql(query, db)

In [30]:
nashville.to_csv('../data/nashville_npi.csv', index = False)