<a href="https://colab.research.google.com/github/holtnk/ASDLeads/blob/main/ASD_Lead_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

To Run:

Windows: Ctrl + F9

Mac: Command + Ctrl + F9

Note: You may need to also press the fn key if your function row is multipurpose

Enter your zip code when prompted

In [1]:
pip install requests




In [2]:
pip install geopy




In [3]:
pip install pandas



In [4]:
import requests
import csv
from geopy.geocoders import Nominatim
from geopy.distance import geodesic

taxonomy_search_list = [
    'Adolescent and Children Mental Health Clinic/Center',
    'Center',
    'Child & Adolescent Psychiatric/Mental Health Clinical Nurse Spe Child & Family Psychiatric/Mental Health Clinical Nurse Specialist',
    'Child & Adolescent Psychiatric/Mental Health Registered Nurse Behavioral Neurology & Neuropsychiatry Physician',
    'Child & Adolescent Psychiatry Physician',
    'Children\'s Hospital',
    'Clinic',
    'Clinic/Center',
    'Clinical Child & Adolescent Psychologist',
    'Clinical Neuropsychologist',
    'Clinical Psychologist',
    'Cognitive & Behavioral Psychologist',
    'Developmental',
    'Diagnostic Neuroimaging (Psychiatry & Neurology) Physician',
    'Family Care',
    'Family Medicine',
    'Family Psychologist',
    'General Practice',
    'Health Psychologist',
    'Health Service Psychologist',
    'Intellectual & Developmental Disabilities Psychologist',
    'Mental Health',
    'Neurology with Special Qualifications in Child Neurology Physici',
    'Pediatrics',
    'Prescribing (Medical) Psychologist',
    'Primary Care',
    'Psychiatry Physician',
    'Psychoanalysis Psychologist',
    'Psychologist',
    'School Psychologist'
]

VALID_STATE_LIST = [
    'AA','AE','AK','AL','AP','AR','AS','AZ',
    'CA','CO','CT','DC','DE','FL','FM','GA',
    'GU','HI','IA','ID','IL','IN','KS','KY',
    'LA','MA','MD','ME','MH','MI','MN','MO',
    'MP','MS','MT','NC','ND','NE','NH','NJ',
    'NM','NV','NY','OH','OK','OR','PA','PR',
    'RI','SC','SD','TN','TX','UT','VA','VI',
    'VT','WA','WI','WV','WY'
]

RADIUS = 100


def collect_leads(zip_code):
    all_leads = []  # List to store all leads
    invalid_taxonomies = []  # List to store taxonomy descriptions without valid leads

    # Load and accumulate leads for all taxonomy descriptions
    for taxonomy_desc in taxonomy_search_list:
        providers = search_providers_by_zip(zip_code, taxonomy_desc)
        if providers:
            for provider in providers:
                # Safely fetch provider details
                basic_info = provider.get('basic', {})
                addresses = provider.get('addresses', [{}])
                taxonomies = provider.get('taxonomies', [])

                # Safely retrieve first address details
                first_address = addresses[0] if addresses else {}

                # Safely retrieve taxonomy descriptions
                taxonomy_descs = [taxonomy.get('desc', '') for taxonomy in taxonomies if taxonomy.get('desc') is not None]

                lead = {
                    'Org Name': basic_info.get('organization_name', ''),
                    'NPI': provider.get('number', ''),
                    'Address': first_address.get('address_1', ''),
                    'City': first_address.get('city', ''),
                    'State': first_address.get('state', ''),
                    'Zip': first_address.get('postal_code', ''),
                    'Phone': first_address.get('telephone_number', ''),
                    'Taxonomy Codes': ', '.join([taxonomy.get('code', '') for taxonomy in taxonomies if taxonomy.get('code')]),
                    'Taxonomy Description': ', '.join(taxonomy_descs)
                }

                all_leads.append(lead)  # Append lead to all_leads list
        else:
            invalid_taxonomies.append(taxonomy_desc)

    # Write leads to CSV
    if all_leads:
        with open('Leads.csv', 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['Org Name', 'NPI', 'Address', 'City', 'State', 'Zip', 'Phone', 'Taxonomy Codes', 'Taxonomy Description']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_leads)
            print(f"Added {len(all_leads)} leads to Leads.csv")
    else:
        print("No leads found for any taxonomy description.")

    # Print or further process invalid taxonomy descriptions
    if invalid_taxonomies:
        print("\nTaxonomy descriptions without valid leads:")
        for desc in invalid_taxonomies:
            print(desc)
        print("\nPlease check these for proper spelling/classification and try again. Note that word order, abbreviations, and/or capitalizations may play a factor.")



def search_providers_by_zip(zip_code, taxonomy_desc):
    base_url = 'https://npiregistry.cms.hhs.gov/api/'
    url = f'{base_url}?state={state}&limit=200&taxonomy_description={taxonomy_desc}&version=2.1'
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            results = data.get('results', [])
            return results
        else:
            print(f'Error fetching data for {taxonomy_desc}. Status code: {response.status_code}')
            return None
    except requests.exceptions.RequestException as e:
        print(f'Error fetching data for {taxonomy_desc}: {str(e)}')
        return None


def is_within_radius(origin_zip_code, to_zip_code):
    geolocator = Nominatim(user_agent="zipcode_distance_calculator")

    # Isolate to the first 5 digits of the zip code
    origin_zip_code = origin_zip_code[:5]
    to_zip_code = to_zip_code[:5]

    origin = geolocator.geocode(f"{origin_zip_code}, USA")
    destination = geolocator.geocode(f"{to_zip_code}, USA")

    if not origin:
        raise ValueError(f"Cannot find location for zip code {origin_zip_code}")
    if not destination:
        raise ValueError(f"Cannot find location for zip code {to_zip_code}")

    distance = geodesic((origin.latitude, origin.longitude), (destination.latitude, destination.longitude)).miles
    print("Distance to ", to_zip_code,":", distance)
    return distance <= RADIUS


if __name__ == "__main__":
    zip_code = input("Enter your zip code: ")
    state = input("Enter your state abbreviation: ")
    while state not in VALID_STATE_LIST:
      state = input("Invalid entry. Please enter your two-character state abbreviation in all caps: ")
    print(f"Searching for providers within {RADIUS} miles of {zip_code}")
    collect_leads(zip_code)
    print("Search completed")


Enter your zip code: 98144
Enter your state abbreviation: TX
Searching for providers within 100 miles of 98144
Added 3172 leads to Leads.csv

Taxonomy descriptions without valid leads:
Adolescent and Children Mental Health Clinic/Center
Center
Children's Hospital
Clinical Psychologist
Diagnostic Neuroimaging (Psychiatry & Neurology) Physician
Family Care
Family Psychologist
Health Psychologist
Health Service Psychologist
Neurology with Special Qualifications in Child Neurology Physici
Prescribing (Medical) Psychologist
Psychiatry Physician
Psychoanalysis Psychologist
School Psychologist

Please check these for proper spelling/classification and try again. Note that word order, abbreviations, and/or capitalizations may play a factor.
Search completed
