In [1]:
import os
import requests
import time
import pandas as pd
from dotenv import load_dotenv

In [2]:
# Load API key from .env
load_dotenv()
API_KEY = os.getenv("GOOGLE_PLACES_API_KEY")

if API_KEY is None:
    raise ValueError("API_KEY not found in the environment")

In [3]:
parent_folder = 'webscraping outputs'

files = [
    f'{parent_folder}/Illinois_Adult_Day_Health_Providers.csv',
    f'{parent_folder}/Illinois_Behavior_Intervention_Providers.csv',
    f'{parent_folder}/Illinois_CILA_Providers.csv',
    f'{parent_folder}/Illinois_CLF_Providers.csv',
    f'{parent_folder}/Illinois_Community_Day_Providers.csv',
    f'{parent_folder}/Illinois_Home_Vehicle_Mod_Providers.csv',
    f'{parent_folder}/Illinois_ICF_Providers.csv'
]

In [4]:
# Read all CSV files with data we need to verify as dataframes, storing them in a single array
dfs = []

for file in files:
    df = pd.read_csv(file)
    dfs.append(df)

In [5]:
def find_provider_by_address_or_phone(address, phone_number):
    base_url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json'
    
    # Search by address
    address_params = {
        'input': address,
        'inputtype': 'textquery',
        'fields': 'place_id,name,formatted_address',
        'key': API_KEY
    }
    
    address_response = requests.get(base_url, params=address_params).json()
    
    # If a result is found by phone, return it
    if address_response.get('status') == 'OK':
        return address_response

    # If no result is found, search by phone number
    phone_params = {
        'input': phone_number,
        'inputtype': 'phonenumber',
        'fields': 'place_id,name,formatted_phone_number',
        'key': API_KEY
    }
    phone_response = requests.get(base_url, params=phone_params).json()

    # Return the address response (may also return an empty result)
    return address_response


In [6]:
def verify_provider(provider_name, address, phone_number):
    result = find_provider_by_address_or_phone(address, phone_number)
    status = result.get('status')

    if status == 'OK':
        candidates = result.get('candidates', [])
        for candidate in candidates:
            candidate_name = candidate.get('name')
            candidate_phone = candidate.get('formatted_phone_number')
            if provider_name.lower().strip() in candidate_name.lower().strip() or candidate_name.lower().strip() in provider_name.lower().strip():
                return True, candidate  # Place exists
        return False, None  # Place not found
    elif status == 'ZERO_RESULTS':
        return False, None  # No matching places
    else:
        # Handle other possible statuses
        print(f"Error: {status}")
        return False, None

In [7]:
# Illinois_Adult_Day_Health_Providers.csv
adult_day_health_df = dfs[0][dfs[0]['State'] == 'IL']
verified_providers = []

for index, row in adult_day_health_df.iterrows():
    provider_name = row['Provider Name']
    provider_address = row['Street Address']
    provider_phone_number = row['Phone Number']

    exists, data = verify_provider(provider_name, provider_address, provider_phone_number)

    if exists:
        verified_providers.append({
            'Provider Name': provider_name,
            'Address': provider_address,
            'Phone Number': provider_phone_number,
            'verified': exists,
        })
        print(f"Place Verified: {provider_name}")
    else:
        print(f"Place Not Found: {provider_name}")

    # Sleep to respect rate limits
    time.sleep(1)

# Save results
verified_df = pd.DataFrame(verified_providers)
verified_df.to_csv('Verified_Illinois_Adult_Day_Health_Providers.csv', index=False)

Place Not Found: ADVOCATE HEALTH AND HOSPITALS
Place Verified: CATHOLIC CHARITIES OF THE ARCHDIOCESE OF CHICAGO
Place Not Found: COMMUNITY ADULT DAY CENTER
Place Not Found: EUROPEAN SERVICES AT HOME, INC
Place Not Found: HEART TO HEART SERVICES INC
Place Not Found: HUMAN RESOURCES OF EDGAR AND CLARK COUNTIES
Place Not Found: PRESENCE LIFE CONNECTIONS


In [8]:
# Illinois_Adult_Day_Health_Providers.csv
clf_df = dfs[3][dfs[3]['State'] == 'IL']
verified_providers = []

for index, row in clf_df.iterrows():
    provider_name = row['Provider Name']
    provider_address = row['Street Address']
    provider_phone_number = row['Phone Number']

    exists, data = verify_provider(provider_name, provider_address, provider_phone_number)

    if exists:
        verified_providers.append({
            'Provider Name': provider_name,
            'Address': provider_address,
            'Phone Number': provider_phone_number,
            'verified': exists,
        })
        print(f"Place Verified: {provider_name}")
    else:
        print(f"Place Not Found: {provider_name}")

    # Sleep to respect rate limits
    time.sleep(1)

# Save results
verified_df = pd.DataFrame(verified_providers)
verified_df.to_csv('Verified_CLF_Providers.csv', index=False)

Place Not Found: AVENUES TO INDEPENDENCE
Place Not Found: COMMUNITY WORKSHOP & TRAINING
Place Not Found: CROSSPOINT HUMAN SERVICES
Place Verified: GLENKIRK
Place Not Found: LAMBS FARM
Place Not Found: LITTLE CITY FOUNDATION
Place Not Found: OPPORTUNITY HOUSE INC
Place Verified: SHORE COMMUNITY SERVICES, INC
Place Not Found: ST MARY OF PROVIDENCE SCHOOL
