In [1]:
import pandas as pd
import requests

# Constants and Configurations
BASE_URL = "http://clinicaltables.nlm.nih.gov/api/npi_org/v3/search"
MAX_RESULTS_PER_PAGE = 1000
MAX_PAGES = 19
SEARCH_TERMS = [
    "physician", "surgeon", "cardiologist", "neurologist", "dermatologist", "orthopedist",
    "urologist", "ophthalmologist", "gynecologist", "pediatrician", "anesthesiologist",
    "psychiatrist", "radiologist", "oncologist", "pharmacist", "nurse", "therapist",
    "audiologist", "dentist", "Rheumatologist", "Endocrinologist", "Pulmonologist",
    "Immunologist", "Infectious Disease Specialist", "Otolaryngologist", "Geriatrician",
    "Sleep Medicine Specialist", "Sports Medicine Physician", "Podiatrist"
]

# Initialize collected NPIs set
collected_npis = set()

# Function to fetch data for a specific page
def fetch_data_for_page(page):
    offset = (page - 1) * MAX_RESULTS_PER_PAGE
    search_term = SEARCH_TERMS[page - 1]
    params = {
        "terms": search_term,
        "maxList": MAX_RESULTS_PER_PAGE,
        "df": "NPI,name.full,provider_type,addr_practice.full",
        "sf": "NPI,name.full,provider_type,addr_practice.full",
        "cf": "NPI",
        "ef": "NPI,name.full,provider_type,addr_practice.full,addr_practice.phone,addr_practice.email",
        "ListStart": offset
    }

    response = requests.get(BASE_URL, params=params)

    if response.status_code == 200:
        data = response.json()
        total_results, codes, extra_data = data[0], data[1], data[2]

        # Create a DataFrame from the extra data
        df = pd.DataFrame(extra_data)

        # Filter out duplicates by checking NPIs
        df = df[~df['NPI'].isin(collected_npis)]

        # Update the set of collected NPIs
        collected_npis.update(df['NPI'])

        return df
    else:
        print(f"API request for page {page} failed with status code: {response.status_code}")
        return None

# Fetch data for all pages
results = []
for page in range(1, MAX_PAGES + 1):
    page_data = fetch_data_for_page(page)
    if page_data is not None:
        results.append(page_data)

# Concatenate all DataFrames from different pages into a single DataFrame
final_df = pd.concat(results, ignore_index=True)

# Remove duplicate rows
final_df = final_df.drop_duplicates()

# Filter data for 'AZ' in the practice address
final_df_AZ = final_df[final_df['addr_practice.full'].str.contains('AZ')]

# Print the first few rows of the final DataFrame
final_df

Unnamed: 0,NPI,name.full,provider_type,addr_practice.full,addr_practice.phone,addr_practice.email
0,1649399197,ST JOSEPH MERCY HOSPITAL,Physician/Internal Medicine,5301 E HURON RIVER DR MIXED SPECIALTY PHYSICIA...,(734) 712-3456,
1,1710918982,"AIKEN PULMONARY ASSOCIATES, LLC",Physician/Internal Medicine,"68 PHYSICIAN DR, AIKEN, SC 29801",(803) 643-1090,
2,1548358633,PHIPPS PHARMACY INC,Medical Supply Company with Pharmacist,"25 PHYSICIAN LANE, SOUTHAVEN, MS 38671",(662) 349-9481,
3,1295743631,AIKEN REGIONAL MEDICAL CENTERS LLC,Physician/Pediatric Medicine,"60 PHYSICIAN DR, AIKEN, SC 29801",(803) 641-5000,
4,1528150398,DESOTO FAMILY MEDICAL CENTER,Physician/Family Practice,"75 PHYSICIAN LANE, SOUTHAVEN, MS 38671",(662) 393-7722,
...,...,...,...,...,...,...
4504,1053715268,LAUREL PEDIATRIC DENTISTRY,Dentist,"13916 BALTIMORE AVE, LAUREL, MD 20707",(301) 498-6511,
4505,1306241948,NORTHVIEW FAMILY DENTISTRY,Dentist,"2700 5 MILE RD NE #202, GRAND RAPIDS, MI 49525",(616) 364-8769,
4506,1255736799,NOVI DENTISTS PLLC,Dentist,"40255 GRAND RIVER AVE SUITE 200, NOVI, MI 48375",(248) 442-0400,
4507,1649676354,CHALLGREN DENTISTRY PC,Dentist,"170 PROFESSIONAL CT, LAFAYETTE, IN 47905",(765) 446-9606,


In [6]:
#Visualize Arizona Specific Data Leads
final_df_AZ

Unnamed: 0,NPI,name.full,provider_type,addr_practice.full,addr_practice.phone,addr_practice.email
10,1992826325,CARRAWAY SURGICAL ASSOCIATES,Specialist,"1600 CARRAWAY BLVD PHYSICIAN PLAZA SUITE 200, ...",(205) 502-3600,
16,1396768362,STATE OF SOUTH DAKOTA DIVISION OF OASI,Physician/Pediatric Medicine,"201 S LLOYD ST PHYSICIAN PLAZA SUITE E202, ABE...",(605) 626-2630,
95,1275617474,WASHINGTON PHYSICIAN SERVICES,Physician/Pediatric Medicine,"2001 WATERDAM PLAZA DR STE 105, MCMURRAY, PA 1...",(724) 942-6499,
137,1487678736,PERSONAL HOME PHYSICIAN,Physician/Family Practice,"CARR. 31 JUNCOS PLAZA LOCAL D-2, JUNCOS, PR 00777",(787) 713-6505,
170,1184847873,COOPER PHYSICIAN OFFICES,Physician/Internal Medicine,"1 PLAZA DR SUITE 103, SEWELL, NJ 08080",(856) 270-4080,
...,...,...,...,...,...,...
4391,1336293877,DM FAMILY DENTISTRY,Dentist,"2028 N TREKELL RD #107 OR 108, CASA GRANDE, AZ...",(520) 426-0404,
4487,1386033629,FLAGSTAFF MODERN DENTISTRY,Dentist,"304 S REGENT ST, SUITE 101, FLAGSTAFF, AZ 86001",(928) 226-7494,
4492,1033503859,ARTISTIC DENTISTRY LLC,Dentist,"5639 E 5TH ST SUITE E-F, TUCSON, AZ 85711",(520) 745-0030,
4496,1548468044,HIREMATH FAMILY DENTISTRY,Dentist,"750 E PUSCH VIEW LN STE 100, ORO VALLEY, AZ 85737",(520) 797-9524,
