In [5]:
from difflib import get_close_matches
import os, json
import pandas as pd

phyDB = pd.read_excel("../Physician Database 500 PRS.xlsx")
prodDB = pd.read_excel("../Product Database 500 PRS.xlsx")
pres_to_phyID = {}
pres_to_medID = {}

for file in os.listdir():
    if not file.endswith(".json"):
        continue

    with open(file) as f:
        data = json.load(f)
    
    # Getting Doctor Names
    matches = get_close_matches(data.get("doctor", "").upper(), phyDB["PHY_NM"])
    if matches:
        guess = matches[0]
        # corresponding PHY_ID
        phy_id = phyDB[phyDB["PHY_NM"] == guess]["PHY_ID"].values[0]
        # print(f"PHY_ID: {phy_id}; Guess: {guess}")
        pres_to_phyID[file.split(".")[0]] = phy_id

    # Getting Medication Names
    medications_matches = []
    medications_match_IDs = []
    medications = [med["name"] for med in data.get("medication", [])]
    for med in medications:
        matches = get_close_matches(med.upper(), prodDB["NAME"])
        if matches:
            medications_matches.append(matches[0])
            medications_match_IDs.append(prodDB[prodDB["NAME"] == matches[0]]["VC2"].values[0])
            pres_to_medID[file.split(".")[0]] = medications_match_IDs
    # print(f"Detected: {medications}")
    # print(f"Medication Matches: {medications_matches}")
    # print(f"Medication Match IDs: {medications_match_IDs}")
print(f"Doctor detected in {len(pres_to_phyID)} out of {100} prescriptions.")
print(f"Medications detected in {len(pres_to_medID)} out of {100} prescriptions.")

Doctor detected in 71 out of 100 prescriptions.
Medications detected in 40 out of 100 prescriptions.


### Checking Doctor Name Predictions

In [6]:
presDB = pd.read_excel('../500 PRS + Slip Data.xlsx', sheet_name='300 PRS Data')
presDB = presDB[["PRS_ID", "PHY_ID", "VC2"]]

actual_pres_to_phyID = {}
actual_pres_to_medID = {}

for presID, phyID, medID in presDB.values:
    actual_pres_to_phyID[str(presID)] = phyID
    actual_pres_to_medID[str(presID)] = actual_pres_to_medID.get(str(presID), []) + [medID]
    

assert len(actual_pres_to_phyID) == len(actual_pres_to_medID)
print(f"TOTAL PRESCRIPTIONS: {len(actual_pres_to_phyID)}")

TOTAL PRESCRIPTIONS: 300


In [7]:
# compare the two dictionaries
score = 0
for k, v in pres_to_phyID.items():
    if v != actual_pres_to_phyID[k]:
        print(f"Error: {k} - {v} != {actual_pres_to_phyID[k]}")
    else:
        score += 1

print(f"Accuracy: {score}/{len(pres_to_phyID)} doctors detected correctly!")

Error: PRS208C5013216 - JSR17625 != JSR33331
Error: PRS208C5012550 - BAR35550 != BOG12278
Error: PRS208C4023687 - RNG00957 != RNG23419
Error: PRS208C5009028 - CTG29217 != DHA01867
Error: PRS208C4018627 - COM00007 != DHK59212
Error: PRS208C4002621 - RAJ08508 != RNG25655
Error: PRS208C4026487 - DHK44896 != DHA08903
Error: PRS208C4027163 - MMS35214 != DHA05818
Accuracy: 63/71 doctors detected correctly!


### Checking Medication Predictions

In [8]:
pres_to_medStat = {}
for pres in pres_to_medID:
    pres_to_medStat[pres] = {"detected_correctly":0, "detected_incorrectly":0, "not_detected":0}
    for med in pres_to_medID[pres]:
        if med in actual_pres_to_medID[pres]:
            pres_to_medStat[pres]["detected_correctly"] += 1
        else:
            pres_to_medStat[pres]["detected_incorrectly"] += 1

    pres_to_medStat[pres]["not_detected"] = len(actual_pres_to_medID[pres]) - pres_to_medStat[pres]["detected_correctly"]

avg_detected_correctly = sum([v["detected_correctly"]/(v["detected_correctly"] + v["detected_incorrectly"] + v["not_detected"]) for v in pres_to_medStat.values()]) / len(pres_to_medStat) * 100
avg_detected_incorrectly = sum([v["detected_incorrectly"]/(v["detected_correctly"] + v["detected_incorrectly"] + v["not_detected"]) for v in pres_to_medStat.values()]) / len(pres_to_medStat) * 100
avg_not_detected = sum([v["not_detected"]/(v["detected_correctly"] + v["detected_incorrectly"] + v["not_detected"]) for v in pres_to_medStat.values()]) / len(pres_to_medStat) * 100
print(f"Of the {len(pres_to_medID)} out of 100 prescriptions with medications detected:")
print(f"Average % of medications detected correctly per pres: {avg_detected_correctly:.2f}%")
print(f"Average % of medications detected incorrectly per pres: {avg_detected_incorrectly:.2f}%")
print(f"Average % of medications not detected per pres: {avg_not_detected:.2f}%")

Of the 40 out of 100 prescriptions with medications detected:
Average % of medications detected correctly per pres: 13.41%
Average % of medications detected incorrectly per pres: 13.77%
Average % of medications not detected per pres: 72.81%
