# GO enrichment analysis

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from scipy.stats import fisher_exact

In [None]:
mechanism_list = [
'antibiotic inactivation',       
'antibiotic target alteration',
'antibiotic efflux', 
'antibiotic target replacement',
'antibiotic target protection'
]

path = 'Results/'
if not os.path.exists(path):
    print("make" + path)
    os.makedirs(path)

## Run static tests.

In [None]:
def get_lists_of_GO(interpro_with_GO, mechanism):
    interpro_with_GO_per_mechanism = interpro_with_GO[interpro_with_GO['mechanism'] == mechanism].fillna('-')
    GO_list = []
    for GO in interpro_with_GO_per_mechanism['GO']:
        try:
            if GO != '-':
                GO_list += GO.split('|')
        except AttributeError :
            pass
    return_tuple = (GO_list,interpro_with_GO_per_mechanism)
    return return_tuple

In [None]:
interpro = pd.read_csv(path + 'result_attention-intensive_regions.csv')
interpro_unique = interpro.drop('Start', axis=1).drop('End', axis=1).groupby(['ID','Accession'], as_index=False).first()
interpro_with_GO_all = interpro_unique[(interpro_unique['GO'] != '-') & (interpro['GO'].notna())]
interpro_with_GO_significant = interpro_with_GO_all[interpro_with_GO_all['Significance']]

In [None]:
result_all = pd.DataFrame()
num_of_test = 0
for mechanism in mechanism_list:
    significant_GO_list,significant_interpro_per_mechanism = get_lists_of_GO(interpro_with_GO_significant, mechanism)
    all_GO_list,all_interpro_per_mechanism = get_lists_of_GO(interpro_with_GO_all,mechanism)

    GO_list = list(set(all_GO_list))
    result_dict = {}
    num_significant_regions = len(significant_interpro_per_mechanism)
    num_all_regions = len(interpro_per_mechanism)
    for GO in GO_list:
        
        num_withGO_significant = significant_interpro_per_mechanism['GO'].str.contains(GO).sum()
        num_withGO_ns = all_interpro_per_mechanism['GO'].str.contains(GO).sum() - num_withGO_significant
        num_withoutGO_significant = num_significant_regions - num_withGO_significant
        num_withoutGO_ns = num_all_regions - num_withGO_significant - num_withGO_ns - num_withoutGO_significant

        data = np.array([[num_withGO_significant, num_withGO_ns],[num_withoutGO_significant, num_withoutGO_ns]])
        result_dict[GO] = [fisher_exact(data,alternative='greater')[1]] + data.flatten().tolist()

    result_df = pd.DataFrame(result_dict, index=['p-value', 'w/ GO and significant','w/ GO and NOT significant','w/o GO and significant','w/o GO and NOT significant']).T
    result_df['mechanism']=mechanism
    result_all = pd.concat([result_all,result_df])
    num_of_test += len(GO_list)
    

## Get GO terms from QuickGO

In [None]:
def get_go_term(go_id):
    api_url = f"https://www.ebi.ac.uk/QuickGO/services/ontology/go/terms/{go_id}"
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        go_term = data['results'][0]['name']
        return go_term
    else:
        return None

GO_list = list(map(get_go_term,result_all.index))
result_all['GO term'] = GO_list

In [None]:
result_true = result_all[result_all['p-value']<0.05/num_of_test]
result_false = result_all[result_all['p-value']>=0.05/num_of_test]
result_all = pd.concat([result_true,result_false])
result_all.to_csv(path + 'result_GO_analysis.csv')

## Visualizationï¼šFig5

In [None]:
result_viz = result_true[result_true['w/ GO and significant']>150]
result_viz['-log10(p-value)'] = -result_viz['p-value'].apply(np.log10)
result_viz = result_viz.rename(columns  ={'mechanism':'Resistance mechanism'}).loc[:,['GO term','Resistance mechanism','-log(p-value)']]

In [None]:
sns.set(style="whitegrid") 
plt.figure(figsize=(15, 15))
mechanism_list_new = mechanism_list[:3] + [mechanism_list[4]]
sns.barplot(GO_viz['-log(p-value)'],GO_viz['GO term'], hue = GO_viz['Resistance mechanism'], hue_order= mechanism_list_new, palette = 'Set2', dodge = False)
plt.legend(title="Resistance mechanism", title_fontsize="xx-large",fontsize="xx-large")
plt.yticks(fontsize="xx-large")
plt.xticks(fontsize="xx-large")
plt.xlabel('-log10(p-value)',fontsize="xx-large")
plt.ylabel('')