# Combine curations of CSH and AJG and prepare PK template

In [1]:
import pandas
import sklearn.metrics

In [2]:
ajg_df = pandas.read_table('ajg/curation-AJG.tsv')
csh_df = pandas.read_csv('csh/curation-CSH.csv')
for initials, df in ('AJG', ajg_df), ('CSH', csh_df):
    df.rename(columns={'classification': initials, 'notes': initials + '_notes'}, inplace=True)

In [3]:
template_df = csh_df.merge(ajg_df)

In [4]:
# Number of disagreements
(template_df.AJG != template_df.CSH).sum()

444

In [5]:
# Cohen's Kappa
sklearn.metrics.cohen_kappa_score(template_df.AJG, template_df.CSH)

0.49861896320136934

In [6]:
template_df.head()

Unnamed: 0,drug,disease,CSH,CSH_notes,drug_url,disease_url,AJG,AJG_notes
0,Abacavir,acquired immunodeficiency syndrome,DM,,http://www.drugbank.ca/drugs/DB01048,http://www.disease-ontology.org/term/DOID%3A635,DM,
1,Abiraterone,prostate cancer,DM,,http://www.drugbank.ca/drugs/DB05812,http://www.disease-ontology.org/term/DOID%3A10283,DM,
2,Acamprosate,alcohol dependence,DM,(works as well as naltrexone),http://www.drugbank.ca/drugs/DB00659,http://www.disease-ontology.org/term/DOID%3A00...,DM,
3,Acarbose,type 2 diabetes mellitus,DM,(glucosidase inhibitor),http://www.drugbank.ca/drugs/DB00284,http://www.disease-ontology.org/term/DOID%3A9352,SYM,
4,Acebutolol,hypertension,DM,(cardioselective beta blocker,http://www.drugbank.ca/drugs/DB01193,http://www.disease-ontology.org/term/DOID%3A10763,SYM,


In [7]:
template_df['PK'] = ''
template_df['PK_notes'] = ''
template_df['tie'] = template_df.CSH != template_df.AJG
template_df = template_df[['drug', 'disease', 'CSH', 'AJG', 'PK', 'CSH_notes', 'AJG_notes', 'PK_notes', 'drug_url', 'disease_url', 'tie']]
template_df = template_df.sort_values(['tie', 'drug', 'disease'], ascending=[0, 1, 1])

In [8]:
template_df.head()

Unnamed: 0,drug,disease,CSH,AJG,PK,CSH_notes,AJG_notes,PK_notes,drug_url,disease_url,tie
3,Acarbose,type 2 diabetes mellitus,DM,SYM,,(glucosidase inhibitor),,,http://www.drugbank.ca/drugs/DB00284,http://www.disease-ontology.org/term/DOID%3A9352,True
4,Acebutolol,hypertension,DM,SYM,,(cardioselective beta blocker,,,http://www.drugbank.ca/drugs/DB01193,http://www.disease-ontology.org/term/DOID%3A10763,True
7,Acetazolamide,epilepsy syndrome,NOT,SYM,,hm. Can be used for seizures but not great drug,,,http://www.drugbank.ca/drugs/DB00819,http://www.disease-ontology.org/term/DOID%3A1826,True
13,Acetylsalicylic acid,gout,NOT,SYM,,not indicated. Asa can increase risk of gout,,,http://www.drugbank.ca/drugs/DB00945,http://www.disease-ontology.org/term/DOID%3A13189,True
16,Acetylsalicylic acid,osteoarthritis,SYM,DM,,,,,http://www.drugbank.ca/drugs/DB00945,http://www.disease-ontology.org/term/DOID%3A8398,True


In [9]:
template_df.to_csv('pk/template-pk.tsv', sep='\t', index=False)
template_df.to_excel('pk/template-pk.xlsx', sheet_name='curation', index=False)