In [None]:
import pandas as pd
import requests
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define a function to fetch NPI data
def fetch_npi_data(npi_number):
    """Fetches data from the NPI registry for a given NPI number.
    
    Link to description : https://npiregistry.cms.hhs.gov/api-page"""

    # url = f'https://npiregistry.cms.hhs.gov/api/?number={npi_number}&limit=10&version=2.0'
    url = f"https://npiregistry.cms.hhs.gov/api/?number={npi_number}&enumeration_type=&taxonomy_description=&name_purpose=&first_name=&use_first_name_alias=&last_name=&organization_name=&address_purpose=&city=&state=&postal_code=&country_code=&limit=&skip=&pretty=&version=2.1"
    
    try:
        # Send GET request
        response = requests.get(url)
        response.raise_for_status()  # Will raise an HTTPError for bad responses
        
        data = response.json()
        
        # Check if the response contains results
        if not data.get('results'):
            logging.warning(f"No results found for NPI: {npi_number}")
            return None
        
        return data['results'][0]
    
    except requests.exceptions.RequestException as e:
        logging.error(f"Request failed for NPI: {npi_number} with error: {e}")
        return None

# Extract basic info from the NPI result
def extract_basic_info(result):
    """Extracts basic info (first name, last name, credential, last updated)"""
    basic_info = result.get('basic', {})
    return {
        'first_name': basic_info.get('first_name', ''),
        'last_name': basic_info.get('last_name', ''),
        'credential': basic_info.get('credential', ''),
        'last_updated': basic_info.get('last_updated', '')
    }

# Extract address information
def extract_address_info(result):
    """Extracts mailing and physical address information"""
    addresses = result.get('addresses', [])
    address_data = {}

    # Mailing address
    if len(addresses) > 0:
        mailing_address = addresses[0]
        address_data.update({
            'Mailing_address_1': mailing_address.get('address_1', ''),
            'Mailing_city': mailing_address.get('city', ''),
            'Mailing_state': mailing_address.get('state', ''),
            'Mailing_postal_code': mailing_address.get('postal_code', ''),
            'Mailing_telephone_number': mailing_address.get('telephone_number', '')
        })
    
    # Physical address (if available)
    if len(addresses) > 1:
        physical_address = addresses[1]
        address_data.update({
            'Physical_address_1': physical_address.get('address_1', ''),
            'Physical_city': physical_address.get('city', ''),
            'Physical_state': physical_address.get('state', ''),
            'Physical_postal_code': physical_address.get('postal_code', ''),
            'Physical_telephone_number': physical_address.get('telephone_number', '')
        })
    
    return address_data

# Extract taxonomy (specialties) information
def extract_taxonomy_info(result):
    """Extracts primary and secondary taxonomy (specialty) information"""
    taxonomies = result.get('taxonomies', [])
    taxonomy_data = {
        'primary_specialty': taxonomies[0].get('desc', '') if len(taxonomies) > 0 else '',
        'secondary_specialty': taxonomies[1].get('desc', '') if len(taxonomies) > 1 else ''
    }
    return taxonomy_data

# Process each NPI number and return a DataFrame of results
def process_npi_numbers(npi_numbers):
    """Processes a list of NPI numbers and returns a DataFrame with the extracted data."""
    all_data = []
    
    for npi_number in npi_numbers:
        logging.info(f"Processing NPI: {npi_number}")
        
        # Fetch data for the current NPI number
        result = fetch_npi_data(npi_number)
        
        if result is None:
            continue  # Skip if there's no data for this NPI number
        
        # Extract details
        basic_info = extract_basic_info(result)
        address_info = extract_address_info(result)
        taxonomy_info = extract_taxonomy_info(result)
        
        # Combine all extracted data into one dictionary
        combined_info = {"npi_number": npi_number, **basic_info, **address_info, **taxonomy_info}
        all_data.append(combined_info)
    
    # Return a DataFrame
    return pd.DataFrame(all_data)

def main():
    # # Reading file into a DataFrame
    df = pd.read_csv("NPRES NPI.csv")
    
    # # Get NPI numbers as a list of strings
    npi_numbers = df['NPI Number'].astype(str).to_list()
    
    # Process NPI numbers and get the result DataFrame
    result_df = process_npi_numbers(npi_numbers)
    
    # Output the resulting DataFrame
    logging.info(f"Processed {len(result_df)} records and saved to csv.")
    result_df.to_csv("result.csv", index=False)
    print(result_df)

if __name__ == "__main__":
    main()
