In [1]:
# Libraries
import pandas as pd
pd.set_option('display.max_columns', 40)
pd.set_option('display.width', 2000)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import re

### Import

In [2]:
# -----------------------------
# Import
# -----------------------------

path = r'data/bnf_drug_indications.csv'
bnf_drug_indications = pd.read_csv(path)

path = r'data/prescribing_medications_morethan5.csv'
prescribing_medications_count = pd.read_csv(path)

path = r'data/snomed_diagnosis_codes_morethan5.csv'
snomed_diagnosis_codes = pd.read_csv(path)

path = r'data/snomed_problem_codes_morethan5.csv'
snomed_problem_codes = pd.read_csv(path)

path = r'data/snomed_riskfactor_codes_morethan5.csv'
snomed_riskfactor_codes = pd.read_csv(path)

In [3]:
prescribing_medications_count

Unnamed: 0,MEDICATION_NAME_SHORT,count
0,atorvastatin,13854
1,omeprazole,9658
2,paracetamol,9062
3,salbutamol,8049
4,lansoprazole,8022
...,...,...
1438,Naftidrofuryl,5
1439,oxytetracycline,5
1440,oxygen,5
1441,Almotriptan,5


## Analysis

In [111]:
# Create ICHT list
prescribing_list = prescribing_medications_count.MEDICATION_NAME_SHORT.to_list()

In [112]:
# Create bnf list
bnf_list = bnf_drug_indications.drug_name.to_list()

In [114]:
# Code to match up BNF and ICHT drug/medication strings - first single words then multi words then some exstras 
pd.options.mode.chained_assignment = None

final_single_drug_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
final_multi_drug_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])


for medication in prescribing_list:
    # Split up string 
    medication_strings = medication.split(" ")
    for drug in bnf_list:
        drug_strings = drug.split("-")
        if len(medication_strings) == 1:
            if len(drug_strings) == 1:
                if drug_strings[0].casefold() == medication_strings[0].casefold():
                    temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
                    # Need to add indication first or get nan
                    temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == drug_strings[0]]['indication']
                    temp_df['drug_name'] = drug_strings[0]
                    temp_df['prescribing_medication'] = medication_strings[0]
                    final_single_drug_df = pd.concat([final_single_drug_df, temp_df], ignore_index=True)
        else:
            if len(drug_strings) == len(medication_strings):
                # Remove strings less than 3 characters long
                new_medication_strings = [i for i in medication_strings if len(i) > 3]
                new_drug_strings = [i for i in drug_strings if len(i) > 3]
                # Convert to lowercase
                new_medication_strings = [x.lower() for x in new_medication_strings]
                new_drug_strings = [x.lower() for x in new_drug_strings]
                if set(new_medication_strings).issubset(set(new_drug_strings)):
                    temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
                    # Need to add indication first or get nan
                    temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == drug]['indication']
                    temp_df['drug_name'] = drug
                    temp_df['prescribing_medication'] = medication
                    final_multi_drug_df = pd.concat([final_multi_drug_df, temp_df], ignore_index=True)

final_single_drug_df.drop_duplicates(inplace=True)
final_single_drug_df.reset_index(inplace=True, drop=True)

final_multi_drug_df.drop_duplicates(inplace=True)
final_multi_drug_df.reset_index(inplace=True, drop=True)  


In [None]:
# Other most connonly used drugs that need to be manually added (looked at top 12%)
#Emollients, topical
#emollients, topical
#Macrogol 3350 with
#Co-codamol
#budesonide-formoterol
#phenoxymethylpenicillin
#fluticasone-salmeterol
#hypromellose ophthalmic
#morphine sulfate
#Morphine sulfate
#co-trimoxazole
#emtricitabine-tenofovir
#latanoprost ophthalmic
#sodium alginate-potassium bicarbonate
#co-dydramol
#mometasone nasal - Mometasone furoate
#co-careldopa

In [176]:
final_other_drug_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Emollients-urea-containing']['indication']
temp_df['drug_name'] = 'Emollients-urea-containing'
temp_df['prescribing_medication'] = 'Emollients, topical'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Emollients-urea-containing']['indication']
temp_df['drug_name'] = 'Emollients-urea-containing'
temp_df['prescribing_medication'] = 'emollients, topical'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'].str.contains('macrogol-3350-with', case=False)]['indication']
temp_df['drug_name'] = bnf_drug_indications[bnf_drug_indications['drug_name'].str.contains('macrogol-3350-with', case=False)]['drug_name']
temp_df['prescribing_medication'] = 'macrogol 3350 with electrolytes'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Co-codamol']['indication']
temp_df['drug_name'] = 'Co-codamol'
temp_df['prescribing_medication'] = 'co-codamol'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Co-codamol']['indication']
temp_df['drug_name'] = 'Co-codamol'
temp_df['prescribing_medication'] = 'Co-codamol'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Budesonide-with-formoterol']['indication']
temp_df['drug_name'] = 'Budesonide-with-formoterol'
temp_df['prescribing_medication'] = 'budesonide-formoterol'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Phenoxymethylpenicillin']['indication']
temp_df['drug_name'] = 'Phenoxymethylpenicillin'
temp_df['prescribing_medication'] = 'phenoxymethylpenicillin potassium  (penicillin V) (contains penicillin)'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Phenoxymethylpenicillin']['indication']
temp_df['drug_name'] = 'Phenoxymethylpenicillin'
temp_df['prescribing_medication'] = 'PhenoxymethylpeniciLLIN (Penicillin V) (contains penicillin)'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'].str.contains('Fluticasone-with-salmeterol', case=False)]['indication']
temp_df['drug_name'] = 'Fluticasone-with-salmeterol'
temp_df['prescribing_medication'] = 'fluticasone-salmeterol'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Hypromellose']['indication']
temp_df['drug_name'] = 'Hypromellose'
temp_df['prescribing_medication'] = 'hypromellose ophthalmic'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Hypromellose']['indication']
temp_df['drug_name'] = 'Hypromellose'
temp_df['prescribing_medication'] = 'Hypromellose ophthalmic'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Morphine']['indication']
temp_df['drug_name'] = 'Morphine'
temp_df['prescribing_medication'] = 'morphine sulfate'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Morphine']['indication']
temp_df['drug_name'] = 'Morphine'
temp_df['prescribing_medication'] = 'Morphine sulfate'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Co-trimoxazole']['indication']
temp_df['drug_name'] = 'Co-trimoxazole'
temp_df['prescribing_medication'] = 'co-trimoxazole'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Co-trimoxazole']['indication']
temp_df['drug_name'] = 'Co-trimoxazole'
temp_df['prescribing_medication'] = 'Co-trimoxazole'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'].str.contains('Emtricitabine-with-tenofovir')]['indication']
temp_df['drug_name'] = bnf_drug_indications[bnf_drug_indications['drug_name'].str.contains('Emtricitabine-with-tenofovir')]['drug_name']
temp_df['prescribing_medication'] = 'emtricitabine-tenofovir'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Latanoprost']['indication']
temp_df['drug_name'] = 'Latanoprost'
temp_df['prescribing_medication'] = 'latanoprost ophthalmic'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Latanoprost-with-timolol']['indication']
temp_df['drug_name'] = 'Latanoprost-with-timolol'
temp_df['prescribing_medication'] = 'latanoprost-timolol ophthalmic'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Sodium-alginate-with-potassium-bicarbonate']['indication']
temp_df['drug_name'] = 'Sodium-alginate-with-potassium-bicarbonate'
temp_df['prescribing_medication'] = 'sodium alginate-potassium bicarbonate'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Dihydrocodeine-with-paracetamol']['indication']
temp_df['drug_name'] = 'Dihydrocodeine-with-paracetamol'
temp_df['prescribing_medication'] = 'co-dydramol'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Dihydrocodeine-with-paracetamol']['indication']
temp_df['drug_name'] = 'Dihydrocodeine-with-paracetamol'
temp_df['prescribing_medication'] = 'Co-dydramol'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Mometasone-furoate']['indication']
temp_df['drug_name'] = 'Mometasone-furoate'
temp_df['prescribing_medication'] = 'mometasone nasal'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Mometasone-furoate']['indication']
temp_df['drug_name'] = 'Mometasone-furoate'
temp_df['prescribing_medication'] = 'Mometasone nasal'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Co-careldopa']['indication']
temp_df['drug_name'] = 'Co-careldopa'
temp_df['prescribing_medication'] = 'co-careldopa'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

temp_df = pd.DataFrame(columns = ['drug_name', 'indication', 'prescribing_medication'])
temp_df['indication'] = bnf_drug_indications[bnf_drug_indications['drug_name'] == 'Co-careldopa']['indication']
temp_df['drug_name'] = 'Co-careldopa'
temp_df['prescribing_medication'] = 'Co-careldopa'
final_other_drug_df = pd.concat([final_other_drug_df, temp_df], ignore_index=True)

final_other_drug_df.drop_duplicates(inplace=True)
final_other_drug_df.reset_index(inplace=True, drop=True)  

In [178]:
# Combine 
final_drug_df = pd.concat([final_single_drug_df, final_multi_drug_df, final_other_drug_df], ignore_index=True)

In [179]:
final_drug_df

Unnamed: 0,drug_name,indication,prescribing_medication
0,Atorvastatin,Primary hypercholesterolaemia in patients who ...,atorvastatin
1,Atorvastatin,Combined (mixed) hyperlipidaemia in patients w...,atorvastatin
2,Atorvastatin,Heterozygous familial hypercholesterolaemia in...,atorvastatin
3,Atorvastatin,Homozygous familial hypercholesterolaemia in ...,atorvastatin
4,Atorvastatin,Primary prevention of cardiovascular events in...,atorvastatin
...,...,...,...
4173,Co-careldopa,Parkinson's disease (patients not receiving le...,Co-careldopa
4174,Co-careldopa,Parkinson's disease (patients transferring fro...,Co-careldopa
4175,Co-careldopa,Severe Parkinson's disease inadequately contro...,Co-careldopa
4176,Co-careldopa,Parkinson's disease (for fine adjustment of Si...,Co-careldopa


In [182]:
# Number of drugs
bnf_drug_indications['drug_name'].nunique()
prescribing_medications_count['MEDICATION_NAME_SHORT'].nunique()
final_drug_df['drug_name'].nunique()
final_drug_df['prescribing_medication'].nunique()

print('\n')
# Number of indications
len(bnf_drug_indications)
#len(prescribing_medications_count)
len(final_drug_df)

1695

1443

555

902





5479

4178

In [195]:
# Save 
#final_drug_df.to_csv('bnf_drug_indications_and_prescribing_medications.csv', index=False)

In [206]:
# Save just indication - for input into metamap
#final_drug_df['indication'].to_csv('indication.csv', index=False)