### Author - Ajaya Kumar Sahoo

#### This code computes the weight of evidence (WOE) for AOPs using their KER evidence 

In [1]:
import pandas as pd
import numpy as np

In [2]:
KER_table = pd.read_csv('KER_details.tsv',sep='\t') 
# this file contain information on the AOP identifier, KER identifier, upstream KE identifier, downstream KE identifiers, and weight of evidence for each KER

print(KER_table.shape)

KER_table = KER_table.replace(np.nan,'',regex=True)

KER_table.head()

In [3]:
AOP_list = [i for i in open('AOP_list.tsv')] # list of AOPs for which WOE needs to be calculated


In [4]:
# getting the KERs for the list of AOPs

KER_table_AOPs = pd.DataFrame(KER_table[KER_table['AOP Identifier'].isin(AOP_list)])

print(KER_table_AOPs.shape)

print(len(set(KER_table_AOPs['KER Identifier'])))
KER_table_AOPs.head()

In [5]:
# create an empty list containing the header row of the proposed dataframe
AOP_evidence = [['AOP_iden','No_of_KERs','F(high)','F(Moderate)','F(Low)','F(Not Specified)']]

#Loop through the different AOPs and get the corresponding information
for aop_iden in AOP_list:
    # creating a temporary dataframe containing only the entries from the aop_iden
    temp_df = pd.DataFrame(KER_table_AOPs[KER_table_AOPs['AOP Identifier'] == aop_iden])

    no_of_kers = len(temp_df['KER Identifier'])
    
    f_high = temp_df[temp_df['Weight of Evidence'] == 'High'].shape[0]/no_of_kers
    
    f_moderate = temp_df[temp_df['Weight of Evidence'] == 'Moderate'].shape[0]/no_of_kers
    
    f_low = temp_df[temp_df['Weight of Evidence'] == 'Low'].shape[0]/no_of_kers
    
    f_not_specified = temp_df[temp_df['Weight of Evidence'] == 'Not Specified'].shape[0]/no_of_kers
    
    AOP_evidence.append([aop_iden,no_of_kers,f_high,f_moderate,f_low,f_not_specified])
    
AOP_evidence_df = pd.DataFrame(AOP_evidence[1:],columns = AOP_evidence[0])
print(AOP_evidence_df.shape)
AOP_evidence_df.head()

In [6]:
def woe_score(row):
    if row['F(high)'] >= 0.5:
        return 'High'
    else:
        if row['F(high)'] + row['F(Moderate)'] >= 0.5:
            return 'Moderate'
        else:
            if row['F(high)'] + row['F(Moderate)'] + row['F(Low)'] >= 0.5:
                return 'Low'
            else:
                return 'Not Specified'

In [7]:
AOP_evidence_df['WoE_score'] = AOP_evidence_df.apply(lambda row: woe_score(row),axis=1)
print(AOP_evidence_df.shape)
AOP_evidence_df.head()

In [8]:
AOP_evidence_df.to_csv('AOPs_WoE.tsv',sep='\t',index=None,encoding='UTF-8') # output file