In [2]:
import pandas as pd
import numpy as np
from faker import Faker
import random
import datetime

# Initialize Faker
fake = Faker()
Faker.seed(0)
np.random.seed(0)

# Number of records to generate
num_records = 10000

# Sample data for random selection
genders = ['M', 'F']
service_types = ['OPD', 'IPD', 'OPT', 'OPT', 'OPD', 'IPD', 'OPD']
provider_types = ['CON', 'MED', 'PROC', 'INV', 'SUN', 'CN']
claim_types = ['OP Claim', 'IP Claim', 'Pre-auth claim', 'Service billed Negative Amount']
statuses = ['S', 'P', 'D']  # Submitted, Pending, Denied
yes_no = ['Yes', 'No']

# Diagnosis codes and descriptions
diagnosis_codes = [
    ('J02', 'Acute pharyngitis'),
    ('H52.1', 'Myopia'),
    ('E11', 'Type 2 diabetes mellitus'),
    ('M54.5', 'Low back pain'),
    ('I10', 'Essential hypertension'),
    ('K02', 'Dental caries'),
    ('N39.0', 'Urinary tract infection'),
    ('J45.9', 'Asthma, unspecified'),
    ('E78.5', 'Hyperlipidemia, unspecified'),
    ('I21.9', 'Acute myocardial infarction, unspecified'),
    ('Z23', 'Encounter for immunization'),
    ('H52.4', 'Presbyopia'),
    ('A09', 'Infectious gastroenteritis'),
    ('N76.0', 'Acute vaginitis'),
    ('R10.4', 'Other and unspecified abdominal pain'),
    ('Z00.1', 'Routine child health examination'),
]

currency_codes = ['USD', 'EUR', 'KES', 'GBP']

# Generate synthetic data
data = []

for _ in range(num_records):
    claim = {
        'ClaimID': fake.unique.random_int(min=100000000, max=999999999),
        'PatientID': fake.unique.bothify(text='PID########'),
        'MemberID': fake.unique.bothify(text='MID########'),
        'ClaimCode': fake.bothify(text='CC########'),
        'ProviderCode': fake.bothify(text='PC######'),
        'PolicyNumber': fake.bothify(text='PN######'),
        'Age': random.randint(0, 90),
        'Gender': random.choice(genders),
        'AmountBilled': round(random.uniform(50.00, 20000.00), 2),
        'ReferenceNumber': fake.bothify(text='REF########'),
        'DateOfService': fake.date_between(start_date='-1y', end_date='today'),
        'TotalAmount': round(random.uniform(50.00, 20000.00), 2),
        'Units': random.randint(1, 5),
        'ServiceDate': fake.date_between(start_date='-1y', end_date='today'),
        'ServiceType': random.choice(service_types),
        'ProviderID': fake.bothify(text='PRV########'),
        'ProviderType': random.choice(provider_types),
        'ProviderDescription': fake.job(),
        'ProcedureCode': fake.bothify(text='PROC########'),
        'ServiceDescription': fake.sentence(nb_words=5),
        'Quantity': random.randint(1, 10),
        'UnitPrice': round(random.uniform(10.00, 5000.00), 2),
        'DiagnosisCode': random.choice(diagnosis_codes)[0],
        'SecondaryDiagnosisCode': random.choice(diagnosis_codes)[0],
        'DiagnosisDescription': random.choice(diagnosis_codes)[1],
        'Source': 'SMART-EDI',
        'Currency': random.choice(currency_codes),
        'ClaimDate': fake.date_between(start_date='-1y', end_date='today'),
        'ClaimType': random.choice(claim_types),
        'PreAuthorization': random.choice(yes_no),
        'Timestamp1': fake.iso8601(),
        'Timestamp2': fake.iso8601(),
        'Status': random.choice(statuses),
        'Timestamp3': fake.iso8601(),
    }
    data.append(claim)

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('synthetic_healthcare_claims.csv', index=False)

print("Synthetic dataset generated and saved as 'synthetic_healthcare_claims.csv'.")

Synthetic dataset generated and saved as 'synthetic_healthcare_claims.csv'.


In [4]:
synth_data = pd.read_csv('synthetic_healthcare_claims.csv')

In [5]:
synth_data

Unnamed: 0,ClaimID,PatientID,MemberID,ClaimCode,ProviderCode,PolicyNumber,Age,Gender,AmountBilled,ReferenceNumber,...,DiagnosisDescription,Source,Currency,ClaimDate,ClaimType,PreAuthorization,Timestamp1,Timestamp2,Status,Timestamp3
0,513653999,PID60487647,MID59382421,CC94892411,PC578156,PN593877,11,F,4791.84,REF84080160,...,Presbyopia,SMART-EDI,EUR,2024-06-16,Pre-auth claim,No,2022-12-08T12:37:20.554871,2003-02-16T20:33:27.909633,D,2002-04-10T13:15:27.340656
1,577803329,PID19659342,MID32094711,CC22018684,PC833969,PN477515,58,F,11290.87,REF91795330,...,Type 2 diabetes mellitus,SMART-EDI,GBP,2024-02-11,OP Claim,Yes,2023-04-25T00:05:40.811413,2009-06-10T07:08:00.341154,P,1996-04-27T09:21:06.412337
2,880776980,PID00869141,MID31456208,CC70916345,PC792302,PN258419,3,M,9207.18,REF72076984,...,Low back pain,SMART-EDI,EUR,2024-07-27,Service billed Negative Amount,No,1970-01-31T13:49:06.646020,1980-07-24T17:08:41.843209,P,1988-05-14T01:43:22.617785
3,357070932,PID37696069,MID60271427,CC87890075,PC470638,PN120665,42,M,13000.06,REF03008913,...,"Hyperlipidemia, unspecified",SMART-EDI,KES,2024-03-26,Service billed Negative Amount,Yes,1990-02-24T20:39:43.260367,2001-03-02T08:37:49.196373,P,2016-08-26T15:44:10.217660
4,904377386,PID97765823,MID69402245,CC55159004,PC229456,PN824173,82,M,18597.53,REF04281465,...,Acute vaginitis,SMART-EDI,KES,2023-12-24,OP Claim,No,1970-06-07T11:46:23.829754,1991-08-18T18:09:42.897156,P,1998-07-04T04:10:55.250316
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,193594019,PID54581615,MID52340698,CC69331229,PC489249,PN530458,29,M,2076.55,REF78728126,...,Myopia,SMART-EDI,GBP,2024-05-29,Service billed Negative Amount,No,1993-03-09T18:31:34.331411,2010-11-02T10:30:01.192543,D,1997-11-07T13:46:32.496979
9996,207192490,PID94356829,MID70286939,CC35360819,PC017045,PN410140,71,F,19256.66,REF83023286,...,Essential hypertension,SMART-EDI,KES,2024-03-27,Service billed Negative Amount,Yes,2006-03-19T17:38:25.412843,2018-08-11T03:57:43.799963,P,2008-12-15T17:26:19.192318
9997,392642245,PID67708100,MID56328441,CC24616438,PC319438,PN951133,77,F,1328.04,REF85602989,...,"Acute myocardial infarction, unspecified",SMART-EDI,KES,2024-01-26,IP Claim,Yes,2003-06-20T19:42:49.550597,2006-12-22T19:21:52.908336,S,2020-02-06T02:52:34.684478
9998,942548742,PID96040919,MID02292249,CC08765328,PC254983,PN911557,89,M,16821.09,REF72683580,...,Urinary tract infection,SMART-EDI,KES,2024-07-03,Pre-auth claim,No,1975-06-14T22:42:38.974580,2018-04-18T16:17:53.249590,S,1971-04-28T16:58:10.665248


In [15]:
import requests
import os

def fetch_icd_codes():
    # WHO API credentials
    client_id = os.getenv("WHO_API_KEY")
    client_secret = os.getenv("WHO_API_SECRET")
    token_endpoint = 'https://icdaccessmanagement.who.int/connect/token'
    scope = 'icdapi_access'
    grant_type = 'client_credentials'

    # Obtain the OAUTH2 token
    payload = {
        'client_id': client_id,
        'client_secret': client_secret,
        'scope': scope,
        'grant_type': grant_type
    }
    
    try:
        token_response = requests.post(token_endpoint, data=payload, verify=True)
        token_response.raise_for_status()
        token = token_response.json().get('access_token')
        if not token:
            print("Failed to fetch access token. Response:", token_response.json())
            return []
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching access token: {e}")
        print("Response content:", e.response.text)
        return []
    except Exception as e:
        print(f"Unexpected error: {e}")
        return []

    # Access ICD API to fetch ICD codes
    uri = 'https://id.who.int/icd/entity'
    headers = {
        'Authorization': f'Bearer {token}',
        'Accept': 'application/json',
        'Accept-Language': 'en',
        'API-Version': 'v2'
    }
    
    try:
        response = requests.get(uri, headers=headers, verify=True)
        response.raise_for_status()
        icd_data = response.json()

        # Extract codes and titles
        icd_codes = [
            (entity['theCode'], entity['title']['@value'])
            for entity in icd_data.get('childEntities', [])
            if 'theCode' in entity and 'title' in entity
        ]
        print(f"Fetched {len(icd_codes)} ICD codes.")
        return icd_codes
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching ICD codes: {e}")
        print("Response content:", e.response.text)
        return []
    except Exception as e:
        print(f"Unexpected error: {e}")
        return []

# Example usage
if __name__ == "__main__":
    codes = fetch_icd_codes()
    if codes:
        print("Sample ICD Codes:", codes[:5])
    else:
        print("No ICD codes fetched.")

Error fetching access token: 400 Client Error: Bad Request for url: https://icdaccessmanagement.who.int/connect/token
Response content: {"error":"invalid_client"}
No ICD codes fetched.


In [12]:
fetch_icd_codes()

Error fetching access token: 400 Client Error: Bad Request for url: https://icdaccessmanagement.who.int/connect/token




[]