In [30]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import json
import requests
import math
import urllib.request
from bs4 import BeautifulSoup as BS
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from healthpricefinder.utils import extract_doctor_card, get_distance

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Design tree
### Patient inputs:
1. Procedure
2. Location
3. Insurance
4. Max distance allowed

### Patient displayed:
1. Sorted list by prices
2. Distances
3. Ratings on a web scraped version


In [31]:
providers = pd.read_csv("../../data/aetna_piecewise/provider_references.csv")
prices = pd.read_csv("../../data/aetna_piecewise/procedure_costs_1_330000.csv")

In [32]:
def find_procedures_from_substring(prices, substring):
    names = prices.name.map(lambda x: str(x).lower())
    nose = prices.loc[names.str.contains(substring, na=False)]
    return nose.name.value_counts()

In [33]:
find_procedures_from_substring(prices, "delivery")

VAGINAL DELIVERY ONLY-NO ANT    1399
DELIVERY OF PLACENTA             527
Name: name, dtype: int64

In [34]:
print(len(prices.name.unique()), len(prices))
prices.name.unique()

1522 628099


array(['REV 138 & ICD10PX HZ97ZZZ', 'NO DOC CUR FUNCT ASSESS', nan, ...,
       'N.GONORRHOEAE, DNA, AMP PROB', 'VERT CORP,TRANS;THOR, SINGLE',
       'DILATE OF L INT ILIAC ART WITH 2 INTRALUM DEV, OPEN APPROACH'],
      dtype=object)

In [35]:
prices.name.value_counts()

EXPLORATION OF A FOOT JOINT                                   6577
REMOVAL OF SKIN LESION                                        6526
SKULL BASE SURGERY                                            5154
REMOVE & GRAFT WRIST LESION                                   4292
RECONSTRUCTION OF EYELID                                      4246
                                                              ... 
BURN OF CORNEA AND CONJUNCTIVAL SAC, UNSP EYE, INIT ENCNTR       1
130/REV & 10E0XZZ/ICD10PX                                        1
DEEP 3 DEG BRN HEAD-MULT                                         1
112/REV & 74.91/ICD9                                             1
CORROSION OF UNSP DEGREE OF UNSPECIFIED LOWER LEG, SEQUELA       1
Name: name, Length: 1521, dtype: int64

In [36]:
def find_procedure_prices(prices, procedure_name, billing_code_modifier=None, plot=False):
    

    matches = find_matching_relations(procedure_name, prices)
    if billing_code_modifier is None:
        matches = matches.loc[matches.billing_code_modifier.isna()]
    sorted_matches = matches.sort_values("negotiated_rate")
    
    price_data = list(sorted_matches["negotiated_rate"])
    if plot:
        fig, ax = plt.subplots(figsize=(10, 5))
        plot = sns.histplot(price_data, bins=100, stat="probability", ax=ax)
        plot.set_xticks([1000, 5000, 10000, 50000])
    
    median = np.median(price_data)
    mean = np.mean(price_data)
    print(f"There are {len(sorted_matches)} providers offering care for {procedure_name}")
    print(f"The median price for care is: {median} while the average is: {mean}")
    print(f"Finding you the lowest cost providers")
    return sorted_matches

In [37]:
def find_provider(sorted_matches, idx):
    df = sorted_matches.groupby("provider_references").sample(1).reset_index(drop=True)
    provider_references = list(df["provider_references"])
    if idx >= len(provider_references):
        return None
    provider_ids = json.loads(provider_references[idx])
    npis = list(providers.loc[providers.provider_id.isin(provider_ids)]["npi_list"])
    # print(f"There are {len(npis)} providers for this provider_id")
    if len(npis) == 0:
        return None
    npi = json.loads(npis[0])[0]
    if len(str(npi)) != 10:
        return None
    print(f"Rate with provider with NPI id: {npi} is going to be: ", list(sorted_matches["negotiated_rate"])[idx])
    doctor = extract_doctor_card(npi)
    return doctor

In [38]:
sorted_matches = find_procedure_prices(prices, "VAGINAL DELIVERY ONLY-NO ANT")

found 1399 price points
There are 871 providers offering care for VAGINAL DELIVERY ONLY-NO ANT
The median price for care is: 1311.0 while the average is: 2922.42328358209
Finding you the lowest cost providers


In [39]:
# sorted_matches.head(10)

In [40]:
def providers_with_distances(sorted_matches, num_options, patient_zipcode):
    providers_near = pd.DataFrame()
    price_data = list(sorted_matches["negotiated_rate"])
    i = -1
    if len(price_data) < num_options:
        num_options = len(price_data)
    while i <= num_options:
        i += 1
        doc = find_provider(sorted_matches, i)
        if doc:
            distance = get_distance(doc["zipcode"], patient_zipcode)
            if distance is None:
                # doc_name = doc["name"]
                # print(f"skipped provider named: {doc_name} because the distance was: {distance}, price was: {price_data[i]}")
                continue
            doc["distance"] = distance
            doc["procedure_price"] = price_data[i]
            providers_near = pd.concat([providers_near, pd.DataFrame(doc, index=[i])])


    return providers_near

In [41]:
providers_dist = providers_with_distances(sorted_matches, 50, 94086)

Rate with provider with NPI id: 1275720377 is going to be:  147.0
Rate with provider with NPI id: 1417957473 is going to be:  298.0


KeyboardInterrupt: 

In [None]:
providers_dist

In [None]:
providers_dist.loc[providers_dist["distance"] < 50]

In [None]:
providers_dist

In [145]:
doctor

{'name': 'MONICA CECILE NICHOLS',
 'credential': 'MD',
 'enumeration_date': '2006-10-10',
 'gender': 'F',
 'taxonomy': 'Psychiatry & Neurology, Psychiatry',
 'zipcode': '92563',
 'address': '28078 BAXTER RD STE 230',
 'address_purpose': 'LOCATION',
 'phone': '951-824-6116'}

'Cocaine dependence with cocaine-induced mood disorder'

In [90]:
str(soup.get_text()).split("F14.24")

['\n\n\nSearch Page 1/1: F10.20\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nToggle navigation\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch All ICD-10\n\nToggle Dropdown\n\n\n\nSearch All ICD-10\n\n\nICD-10-CM Diagnosis Codes\nICD-10-PCS Procedure Codes\n\nICD-10-CM Diagnosis Index\nICD-10-CM External Causes Index\n\nICD-10-CM Table of Drugs\nICD-10-CM Table of Neoplasms\n\nHCPCS Codes\n\nICD-9-CM Diagnosis Codes\nICD-9-CM Procedure Codes\n\nSearch All Data\n\n\n\n\n\n\n\n\n\n2023/2022\n\n\n\nCodes \n\nICD-10-CM Codes\nICD-10-PCS Codes\nLegacy ICD-9-CM Codes\n\n\n\nIndexes \n\nICD-10-CM Index\nICD-10-CM External Causes Index\n\nTable of Drugs\nTable of Neoplasms\n\n\nConversion\nDRG\n\nRules \n\nICD-10-CM\nNewborn Codes\nPediatric Codes\nAdult Codes\nMaternity Codes\nFemale Only Diagnosis Codes\nMale Only Diagnosis Codes\nManifestation Codes\nPOA Exempt Codes\nQuestionable Admission Codes\nBillable/Specific Codes\nNon-Billable/Non-Specific Codes\nICD-10-PCS\nFemale Only Procedure Codes\nM

In [118]:
diagnosis

'Delivery of Products of Conception, External Approach'