In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
projectDir = Path("/home/_shared/jscliu/project/2025/Flagship/analysis/secondary/PGx/03.eval_prescription")
dataDir = projectDir/"data"
summaryDir = projectDir/"summary"

# Input
prescription_counts = dataDir/"total_CPIC_prescription_count_Dec23_Nov24.csv"
all_records = projectDir/"../02.consolidate_genotypes/summary/pgx_allParticipants_allGenes.csv"

# Output
prescription_count_comp_csv = summaryDir/"CPIC_total-vs-actionable_prescription_count_Dec23_Nov24.csv"

In [3]:
all_records_df = pd.read_csv(all_records)
all_records_df = all_records_df.loc[
    (all_records_df['founder_status']=="Founder") & (all_records_df['treated_ethnicity']=="Chinese"), :
].copy()

In [4]:
prescription_cnt_df = pd.read_csv(prescription_counts)

In [6]:
cnt = 20
print(f"{prescription_cnt_df.head(cnt)['Prescription_count'].sum() = :,}")
prescription_cnt_df = prescription_cnt_df.head(cnt).loc[prescription_cnt_df['Remarks']!="No significance", :]

prescription_cnt_df.head(cnt)['Prescription_count'].sum() = 4,201,632


In [8]:
drug_actionable_d:dict = {
    "pantoprazole": {"CYP2C19": ["CYP2C19 Ultrarapid Metabolizer", "CYP2C19 Likely Intermediate Metabolizer", "CYP2C19 Intermediate Metabolizer", "CYP2C19 Poor Metabolizer"]}, 
    "atorvastatin": {"SLCO1B1": ["SLCO1B1 Possible Decreased Function", "SLCO1B1 Decreased Function", "SLCO1B1 Poor Function"]}, 
    "simvastatin": {"SLCO1B1": ["SLCO1B1 Possible Decreased Function", "SLCO1B1 Decreased Function", "SLCO1B1 Poor Function"]}, 
    "tramadol": {"CYP2D6": ["UM (ultrarapid metabolizer)", "IM (intermediate metabolizer)", "PM (poor metabolizer)"]}, 
    "metoprolol": {"CYP2D6": ["PM (poor metabolizer)"]}, 
    "rosuvastatin": {"SLCO1B1": ["SLCO1B1 Possible Decreased Function", "SLCO1B1 Decreased Function", "SLCO1B1 Poor Function"], "ABCG2": ["ABCG2 Poor Function"]}, 
    "sertraline": {"CYP2C19": ["CYP2C19 Likely Intermediate Metabolizer", "CYP2C19 Intermediate Metabolizer", "CYP2C19 Poor Metabolizer"]}, 
    "clopidogrel": {"CYP2C19": ["CYP2C19 Likely Intermediate Metabolizer", "CYP2C19 Intermediate Metabolizer", "CYP2C19 Poor Metabolizer"]}, 
    "quetiapine": {"CYP3A4": ["CYP3A4 Poor Metabolizer"]}, 
    "lansoprazole": {"CYP2C19": ["CYP2C19 Ultrarapid Metabolizer", "CYP2C19 Likely Intermediate Metabolizer", "CYP2C19 Intermediate Metabolizer", "CYP2C19 Likely Poor Metabolizer", "CYP2C19 Poor Metabolizer"]}
}
special_drugs = ["allopurinol", 'ibuprofen']

In [9]:
# Handle "regular" drugs
def get_actionable_rate(drug, drug_actionable_d, all_records_df):
    actionable_participants:set = set()
    for gene, actionable_phenotypes in drug_actionable_d[drug].items():
        actionable_participants = actionable_participants | set(all_records_df.loc[all_records_df[f'{gene}_phenotype'].isin(actionable_phenotypes), 'sre_participant_id'].tolist())
    actionable_rate = len(actionable_participants)/len(all_records_df)
    return actionable_rate
drug_actionable_rate:dict = { drug: get_actionable_rate(drug, drug_actionable_d, all_records_df) for drug in prescription_cnt_df['Drug'] if drug not in special_drugs }

In [11]:
# Special drugs - allopurinol
allopurinol_df = all_records_df.copy()
allopurinol_df['is_HLAB5801_carrier'] = allopurinol_df.apply(
    lambda r: True if "HLA-B*58:01" in [r['HLA-B_haplotype_1'], r['HLA-B_haplotype_2']] else False, 
    axis=1
)

In [13]:
allopurinol_actionable_rate = len(allopurinol_df.loc[allopurinol_df['is_HLAB5801_carrier'], :]) / len(allopurinol_df)
drug_actionable_rate = drug_actionable_rate | {"allopurinol": allopurinol_actionable_rate}

In [15]:
# Special drugs - ibuprofen
actionable_phenotypes = ["CYP2C9 Intermediate Metabolizer", "CYP2C9 Poor Metabolizer"]
ibuprofen_tmp1_df = all_records_df.loc[
    all_records_df[f'CYP2C9_phenotype']=="CYP2C9 Intermediate Metabolizer", 'sre_participant_id'
].tolist()

In [16]:
cyp2c9_av_d:dict = {
    "*1": 1,
    "*11": 0.5,
    "*11+rs1057910": np.nan,
    "*13": 0,
    "*14": 0.5,
    "*16": 0.5,
    "*19": np.nan,
    "*2": 0.5,
    "*27": np.nan,
    "*29": 0.5,
    "*3": 0,
    "*31": 0.5,
    "*33": 0,
    "*34": np.nan,
    "*36": np.nan,
    "*37": 0.5,
    "*39": 0,
    "*4": 0.5,
    "*41": np.nan,
    "*42": 0,
    "*43": 0,
    "*44": 0.5,
    "*45": 0,
    "*46": 0.5,
    "*48": np.nan,
    "*5": 0.5,
    "*50": 0.5,
    "*51": np.nan,
    "*52": 0,
    "*55": 0.5,
    "*56": np.nan,
    "*59": np.nan,
    "*60": np.nan,
    "*63": np.nan,
    "*73": np.nan,
    "*77": np.nan,
    "*78": np.nan,
    "*79": np.nan,
    "*8": 0.5,
    "*82": np.nan
}

In [17]:
ibuprogen_records_df = all_records_df.loc[
    :, 
    ['sre_participant_id'] + [x for x in all_records_df.columns.tolist() if 'CYP2C9' in x]
].copy()

ibuprogen_records_df['CYP2C9_AS'] = ibuprogen_records_df.apply(
    lambda r: cyp2c9_av_d[r['CYP2C9_haplotype_1']] + cyp2c9_av_d[r['CYP2C9_haplotype_2']] if all([not pd.isna(x) for x in [r.CYP2C9_haplotype_1, r.CYP2C9_haplotype_2]]) else np.nan, 
    axis=1
)

In [18]:
tmp_1 = len(ibuprogen_records_df.loc[(ibuprogen_records_df['CYP2C9_phenotype']=='CYP2C9 Intermediate Metabolizer') & (ibuprogen_records_df['CYP2C9_AS']==1), :])
tmp_2 = len(ibuprogen_records_df.loc[ibuprogen_records_df['CYP2C9_phenotype']=='CYP2C9 Poor Metabolizer', :])
ibuprofen_actionable_rate = (tmp_1 + tmp_2) / len(ibuprogen_records_df)

In [19]:
drug_actionable_rate = drug_actionable_rate | {"ibuprofen": ibuprofen_actionable_rate}

In [20]:
drug_actionable_rate_df = pd.DataFrame(drug_actionable_rate, index=['actionable_rate']).T

In [21]:
prescription_cnt_df = prescription_cnt_df.merge(drug_actionable_rate_df, left_on='Drug', right_index=True)

In [22]:
prescription_cnt_df['actionable_prescription_count'] = prescription_cnt_df.apply(lambda r: r['Prescription_count']*r['actionable_rate'], axis=1)

## Export to CSV

In [25]:
plot_df = prescription_cnt_df.rename(columns={
    "Prescription_count": "total_prescription_count_2024", 
    "actionable_prescription_count": "predicted_actionable_prescription_count_2024"
}).reset_index(drop=True)

In [33]:
plot_df.loc[:, ['Drug', 'total_prescription_count_2024', 'predicted_actionable_prescription_count_2024']].to_csv(prescription_count_comp_csv, index=False)