In [None]:
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 500)

# SET PLOTTING SETTINGS
SMALL_SIZE = 16
MEDIUM_SIZE = 20
BIGGER_SIZE = 24

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Build dataframes and dictionaries with GO terms

In [None]:
GO_slim_terms = {}
GO_complete_terms = {}
experiments = ['Fkh1 log', 'Fkh1 stat', 'Fkh2 log', 'Fkh2 stat']
for exp in experiments:
    
    ### Read the slim terms
    df_slim = pd.read_csv("../Tables/Panther/"+exp+" GO-slim biological process.txt", header=5, sep="\t")
    df_slim = df_slim.set_index("PANTHER GO-Slim Biological Process")
    df_slim.columns = ["# genome-wide","#","Expected",
                  "over/under","enrichment","raw P-value", "FDR"]
    
    ### Read the complete terms
    df_complete = pd.read_csv("../Tables/Panther/"+exp+" GO biological process complete.txt", header=5, sep="\t")
    df_complete = df_complete.set_index("GO biological process complete")
    df_complete.columns = ["# genome-wide","#","Expected",
                  "over/under","enrichment","raw P-value", "FDR"]
        
    ### Store dataframes in dictionary
    GO_slim_terms[exp] = df_slim
    GO_complete_terms[exp] = df_complete

# Significant GO-slim terms

In [None]:
for exp in experiments: 
    df = GO_slim_terms[exp]
    
    print("##### Significant GO-slim terms:",exp,"#####")
    display(df[df['FDR'] <= 0.05].sort_values('FDR',ascending=True))

# Significant GO terms 

In [None]:
for exp in experiments: 
    df = GO_complete_terms[exp]
    
    print("##### Significant GO complete terms:",exp,"#####")
    display(df[df['FDR'] <= 0.05].sort_values('FDR',ascending=True))

# Group enriched terms per experiment

## GO-slim terms

In [None]:
### Gather the data
enriched_terms = {}
all_enriched_slim_terms = []
for exp in experiments:
    df = GO_slim_terms[exp]
    enriched_terms[exp] = df[df['FDR'] <= 0.05].sort_values('FDR',ascending=True).index.tolist()
    all_enriched_slim_terms.extend(df[df['FDR'] <= 0.05].sort_values('FDR',ascending=True).index.tolist())

all_enriched_slim_terms = sorted(list(set(all_enriched_slim_terms)))

### Divide in groups
d_enriched = {'all':[], 'fkh1':[], 'fkh2':[],
              'log':[], 'stat':[],
              'fkh1log':[], 'fkh1stat':[], 'fkh2log':[], 'fkh2stat':[],
              'other':[]}
for term in all_enriched_slim_terms:
    occurence = []
    for exp in experiments:
        occurence.append(term in enriched_terms[exp])
        
    if all(occurence): 
        d_enriched['all'].append(term)
    # fkh specific
    elif all(occurence[0:2]) and not occurence[2] and not occurence[3]: 
        d_enriched['fkh1'].append(term)
    elif all(occurence[2:4]) and not occurence[0] and not occurence[1]: 
        d_enriched['fkh2'].append(term)
    # phase specific
    elif occurence[0] and occurence[2] and not occurence[1] and not occurence[3]: 
        d_enriched['log'].append(term)
    elif occurence[1] and occurence[3] and not occurence[0] and not occurence[2]: 
        d_enriched['stat'].append(term)
    # experiment specific
    elif occurence[0] and not occurence[1] and not occurence[2] and not occurence[3]: 
        d_enriched['fkh1log'].append(term)
    elif occurence[1] and not occurence[0] and not occurence[2] and not occurence[3]: 
        d_enriched['fkh1stat'].append(term)
    elif occurence[2] and not occurence[0] and not occurence[1] and not occurence[3]: 
        d_enriched['fkh2log'].append(term)
    elif occurence[3] and not occurence[0] and not occurence[1] and not occurence[2]: 
        d_enriched['fkh2stat'].append(term)
    # other
    else: 
        d_enriched['other'].append(term)
        
display(pd.Series(d_enriched).to_frame())

## GO complete terms

In [None]:
### Gather the data
enriched_terms = {}
all_enriched_complete_terms = []
for exp in experiments:
    df = GO_complete_terms[exp]
    enriched_terms[exp] = df[df['FDR'] <= 0.05].sort_values('FDR',ascending=True).index.tolist()
    all_enriched_complete_terms.extend(df[df['FDR'] <= 0.05].sort_values('FDR',ascending=True).index.tolist())

all_enriched_complete_terms = sorted(list(set(all_enriched_complete_terms)))

### Divide in groups
d_enriched = {'all':[], 'fkh1':[], 'fkh2':[],
              'log':[], 'stat':[],
              'fkh1log':[], 'fkh1stat':[], 'fkh2log':[], 'fkh2stat':[],
              'other':[]}
for term in all_enriched_complete_terms:
    occurence = []
    for exp in experiments:
        occurence.append(term in enriched_terms[exp])
        
    if all(occurence): 
        d_enriched['all'].append(term)
    # fkh specific
    elif all(occurence[0:2]) and not occurence[2] and not occurence[3]: 
        d_enriched['fkh1'].append(term)
    elif all(occurence[2:4]) and not occurence[0] and not occurence[1]: 
        d_enriched['fkh2'].append(term)
    # phase specific
    elif occurence[0] and occurence[2] and not occurence[1] and not occurence[3]: 
        d_enriched['log'].append(term)
    elif occurence[1] and occurence[3] and not occurence[0] and not occurence[2]: 
        d_enriched['stat'].append(term)
    # experiment specific
    elif occurence[0] and not occurence[1] and not occurence[2] and not occurence[3]: 
        d_enriched['fkh1log'].append(term)
    elif occurence[1] and not occurence[0] and not occurence[2] and not occurence[3]: 
        d_enriched['fkh1stat'].append(term)
    elif occurence[2] and not occurence[0] and not occurence[1] and not occurence[3]: 
        d_enriched['fkh2log'].append(term)
    elif occurence[3] and not occurence[0] and not occurence[1] and not occurence[2]: 
        d_enriched['fkh2stat'].append(term)
    # other
    else: 
        d_enriched['other'].append(term)
        
display(pd.Series(d_enriched).to_frame())

# Barplot of gene counts for selected GO-slim terms

In [None]:
### Gather the number of occurences for each process of interest
target_counts = []
index_interest = ['cell cycle (GO:0007049)','mitotic cell cycle (GO:0000278)',
                  'chromosome segregation (GO:0007059)','nuclear chromosome segregation (GO:0098813)',
                  'DNA metabolic process (GO:0006259)',
                  'cell wall organization (GO:0071555)',
                  'phospholipid metabolic process (GO:0006644)']

for exp in experiments:
    df = GO_complete_terms[exp]
    target_counts.append(df.loc[index_interest]['#'].values)

print(target_counts)

### Make the plot
y = sorted([i for i in range(len(index_interest))],reverse=True)

fig = plt.figure()
ax = plt.gca()

a = 0.75
for i in range(len(index_interest)):
    f1log = ax.barh(y[i]+0.3, width=target_counts[0][i],height=0.2,color='b',align='center',alpha=a)
    f1stat = ax.barh(y[i]+0.1, width=target_counts[1][i],height=0.2,color='g',align='center',alpha=a)
    f2log = ax.barh(y[i]-0.1, width=target_counts[2][i],height=0.2,color='r',align='center',alpha=a)
    f2stat = ax.barh(y[i]-0.3, width=target_counts[3][i],height=0.2,color='k',align='center',alpha=a)

ax.set_xlabel('# of target genes')
ax.set_yticks(y)
ax.set_yticklabels(index_interest)
ax.legend([f1log, f1stat, f2log, f2stat],['Fkh1 log','Fkh1 stat','Fkh2 log','Fkh2 stat'],loc=4)

fig.set_size_inches(10,6)
fig.savefig('../Figures/GO-slim_term_target_counts.png',bbox_inches='tight', dpi=300)

plt.show()

# Summary table for Supplementary information

In [None]:
df_tot = pd.DataFrame()
for exp in experiments:
    df = GO_complete_terms[exp]
    
    # only the FDR column
    df = df['FDR'].to_frame()
    df.columns = [exp+" FDR"]
    
    df_tot  = df_tot.merge(df, how='outer', left_index=True, right_index=True) # outer = union of indices

df_tot.to_excel("../Tables/Panther/Panther_summary.xlsx")