In [69]:
import pandas as pd
import json
import requests
import math
import urllib.request
from bs4 import BeautifulSoup as BS

## Design tree
### Patient inputs:
1. Procedure
2. Location
3. Insurance
4. Max distance allowed

### Patient displayed:
1. Sorted list by prices
2. Distances
3. Ratings on a web scraped version


In [2]:
def extract_doctor_card(req):
    result = req["results"][0]
    doctor = {"name": [], "credential": [], "enumeration_date": [], "taxonomy": [], "zipcode": [], "address": [], "address_purpose": [], "phone": []}

    if result["enumeration_type"] == "NPI-1":
        doctor["name"] = result["basic"]["first_name"] + " " + result["basic"]["last_name"]
        doctor["credential"] = result["basic"]["credential"]

    else:
        doctor["name"] = result["basic"]["organization_name"]
        doctor["credential"] = "organization"
        
    doctor["enumeration_date"] = result["basic"]["enumeration_date"]

    doctor["taxonomy"] = result["taxonomies"][0]["desc"]
    doctor["zipcode"] = result["addresses"][0]["postal_code"][:5]
    doctor["address"] = result["addresses"][0]["address_1"]
    doctor["address_purpose"] = result["addresses"][0]["address_purpose"]
    doctor["phone"] = result["addresses"][0]["telephone_number"]
    return doctor

def configure_npi_request(npi_id):
    request_str = f"https://npiregistry.cms.hhs.gov/api/?version=2.1&number={npi_id}"
    return request_str

In [120]:
def getHaversineDistance(p1, p2):
    R = 6378137; # Earth’s mean radius in meter
    dLat = rad(p2[0] - p1[0]);
    dLong = rad(p2[1] - p1[1]);
    a = (math.sin(dLat / 2) * math.sin(dLat / 2) + math.cos(rad(p1[0])) * math.cos(rad(p2[0])) * 
         math.sin(dLong / 2) * math.sin(dLong / 2))
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = R * c #
    return d # // returns the distance in meter

def meters2miles(meters):
    return meters * 0.000621371

def rad(x):
    return x * math.pi / 180.0;

def get_lat_long(zipcode):
    uri = f'https://public.opendatasoft.com/api/records/1.0/search/?q={zipcode}&dataset=georef-united-states-of-america-zcta5'
    return requests.get(uri).json()["records"][0]["fields"]["geo_point_2d"]

def get_distance(doctor_card, patient_card):
    p_latlong = get_lat_long(patient_card["zipcode"])
    d_latlong = get_lat_long(doctor_card["zipcode"])
    
    d = getHaversineDistance(p_latlong, d_latlong)
    return meters2miles(d)
    
def find_matching_relations(procedure_name, price_df):
    find_matches = []
    for i, n in enumerate(price_df.name):
        if n == procedure_name:
            find_matches.append(True)
        else:
            find_matches.append(False)

    matches = price_df.loc[find_matches]
    print(f"found {len(matches)} price points")
    return matches

def get_cpt_name(cpt_code):
    url = f"https://www.aapc.com/codes/cpt-codes/{cpt_code}"
    html = urllib.request.urlopen(url).read()
    soup = BS(html)
    cpt_procuedure = soup.title.split("-")[1][1:-1]
    return cpt_procuedure
    
def get_icd10x_code(icd_code):
    url = f"https://www.icd10data.com/search?s={icd_code}"
    html = urllib.request.urlopen(url).read()
    soup = BS(html)
    diagnosis = str(soup.find_all("h2"))
    if diagnosis:
        diagnosis = diagnosis.split(">")[1].split("<")[0]
    else:
        print("code failure")
    return diagnosis

def get_icd9_code(code):
    url = f"https://www.icd10data.com/Convert/{code}"
    html = urllib.request.urlopen(url).read()
    soup = BS(html)
    component = str(soup.find_all("ul", {"class":"ulConversion"}))
    diagnosis = component.split("</a>")[1].split("<")[0][1:]
    return diagnosis

In [130]:
providers = pd.read_csv("../../data/aetna_piecewise/provider_references.csv")
prices = pd.read_csv("../../data/aetna_piecewise/procedure_costs_100000.csv", skiprows=range(1, 40000), nrows=40000)

In [132]:
print(len(prices.name.unique()), len(prices))
prices.name.unique()

147 40000


array(['REVISION,FEMORAL ANASTOMOSIS',
       'DILATION OF AORTIC VALVE WITH INTRALUM DEV, PERC APPROACH',
       'REPAIR CAST PARTIAL FRAMEWORK, MANDIBULAR',
       '142/REV AND 75.8 ICD9', '110/REV AND 75.7 ICD9',
       'PART HOSP-PSYCH REV 912 &  ICD10DX  F93.8',
       'STER CEFUR SOD PR 750MG/12MX', 'ADULT SIZE DISP INCONT-XLARG',
       'REPAIR OF HIP DISLOCATION', '151/REV & 72.7/ICD9',
       'PROS REP VENTRC DEF-CLOS', 'IP Detox REV 128 & ICD10 F11.259',
       'BYPASS L COM CAROTID TO R EXTRACRAN ART W SYNTH SUB, OPEN',
       'BYPASS R POPLIT ART TO PERON ART W AUTOL ART, PERC ENDO',
       'DRUG TEST PRSMV DIR OPT OBS', 'ANESTH,CARDIAC ELECTROP',
       'DILATE R EXT CAROTID W DRUG-ELUT INTRA, PERC ENDO',
       'BYPASS PORTAL VEIN TO LOWER VEIN, OPEN APPROACH',
       'BYPASS L FEMOR ART TO POST TIB ART W AUTOL VN, PERC ENDO',
       'BYPASS L INT ILIAC ART TO R EXT ILIA W AUTOL ART, OPEN',
       'REMOVAL OF MONITORING DEVICE FROM UPPER VEIN, OPEN APPROACH',
       'INSE

In [133]:
matches = find_matching_relations('HIB-MENCY VACCINE 4 DOSE IM', prices)

found 75 price points


In [135]:
sorted_matches = matches.sort_values("negotiated_rate")
sorted_matches[["negotiated_rate", "provider_references"]]

Unnamed: 0,negotiated_rate,provider_references
27166,24.95,[505156]
27201,24.95,[337195]
27239,24.95,[605142]
27204,24.95,[360648]
27205,24.95,"[305930, 762874]"
...,...,...
27186,60.90,"[393401, 433573, 469487, 499111, 524415, 679713]"
27234,60.90,[109952]
27191,60.90,[350168]
27237,60.90,[605009]


In [167]:
provider_ids = json.loads(list(sorted_matches["provider_references"])[5])
print(f"There are {len(provider_ids)} providers for this provider reference id")
npis = list(providers.loc[providers.provider_id.isin(provider_ids)]["npi_list"])
print(f"There are {len(npis)} providers for this NPI")
npi = json.loads(npis[0])[0]
print(npi)


There are 1 providers for this provider reference id
There are 1 providers for this NPI
1225569643


In [168]:
r = requests.get(configure_npi_request(npi))
doctor = extract_doctor_card(r.json())

In [169]:
doctor

{'name': 'ROUNDYS SUPERMARKETS INC',
 'credential': 'organization',
 'enumeration_date': '2017-03-27',
 'taxonomy': 'Durable Medical Equipment & Medical Supplies',
 'zipcode': '53089',
 'address': 'N65W24838 MAIN ST',
 'address_purpose': 'LOCATION',
 'phone': '414-231-5959'}

In [146]:
input_card = {"zipcode": 94086, "insurance": "Aetna", "procedure": "ELBOW"}

In [121]:
get_distance(doctor, input_card)

379.7142673439573

In [145]:
doctor

{'name': 'MONICA CECILE NICHOLS',
 'credential': 'MD',
 'enumeration_date': '2006-10-10',
 'gender': 'F',
 'taxonomy': 'Psychiatry & Neurology, Psychiatry',
 'zipcode': '92563',
 'address': '28078 BAXTER RD STE 230',
 'address_purpose': 'LOCATION',
 'phone': '951-824-6116'}

'Cocaine dependence with cocaine-induced mood disorder'

In [90]:
str(soup.get_text()).split("F14.24")

['\n\n\nSearch Page 1/1: F10.20\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nToggle navigation\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch All ICD-10\n\nToggle Dropdown\n\n\n\nSearch All ICD-10\n\n\nICD-10-CM Diagnosis Codes\nICD-10-PCS Procedure Codes\n\nICD-10-CM Diagnosis Index\nICD-10-CM External Causes Index\n\nICD-10-CM Table of Drugs\nICD-10-CM Table of Neoplasms\n\nHCPCS Codes\n\nICD-9-CM Diagnosis Codes\nICD-9-CM Procedure Codes\n\nSearch All Data\n\n\n\n\n\n\n\n\n\n2023/2022\n\n\n\nCodes \n\nICD-10-CM Codes\nICD-10-PCS Codes\nLegacy ICD-9-CM Codes\n\n\n\nIndexes \n\nICD-10-CM Index\nICD-10-CM External Causes Index\n\nTable of Drugs\nTable of Neoplasms\n\n\nConversion\nDRG\n\nRules \n\nICD-10-CM\nNewborn Codes\nPediatric Codes\nAdult Codes\nMaternity Codes\nFemale Only Diagnosis Codes\nMale Only Diagnosis Codes\nManifestation Codes\nPOA Exempt Codes\nQuestionable Admission Codes\nBillable/Specific Codes\nNon-Billable/Non-Specific Codes\nICD-10-PCS\nFemale Only Procedure Codes\nM

In [118]:
diagnosis

'Delivery of Products of Conception, External Approach'