In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import seaborn as sns
from scipy.stats import fisher_exact

In [15]:
exp = pd.read_csv('../ref/CpSilv_gb_annotations_and_expression_data.csv').set_index('Unnamed: 0')
exp = exp[~exp['Beyhan_exp'].isna()]
spherule_genes = exp[(exp['Beyhan_exp']=='spherule_upreg')].index.tolist()
hyphae_genes = exp[(exp['Beyhan_exp']=='hyphae_upreg')].index.tolist()

len(spherule_genes),len(hyphae_genes), len(exp)

(1082, 1200, 7301)

### Transmembrane transporters

In [16]:
### Are spherule upregulated genes enriched for transmembrane transporters? - NO

spherule_tm = exp[(exp['Beyhan_exp']=='spherule_upreg') & ~(exp['TransMembrane'].isna())].shape[0]
spherule_not_tm = exp[(exp['Beyhan_exp']=='spherule_upreg') & (exp['TransMembrane'].isna())].shape[0]

not_spherule_tm = exp[(exp['Beyhan_exp']!='spherule_upreg') & ~(exp['TransMembrane'].isna())].shape[0]
not_spherule_not_tm = exp[(exp['Beyhan_exp']!='spherule_upreg') & (exp['TransMembrane'].isna())].shape[0]


p = fisher_exact(np.array([[spherule_tm, spherule_not_tm,], [not_spherule_tm, not_spherule_not_tm]]),
                alternative='two-sided')[1]

print(np.array([[spherule_tm, spherule_not_tm,], [not_spherule_tm, not_spherule_not_tm]]))
print('spherule prop in category: {}, spherule prop in genome: {}'.format(round(spherule_tm/(spherule_tm+not_spherule_tm), 4),
                                                       round(spherule_not_tm/(spherule_not_tm+not_spherule_not_tm), 4)), )
print('p = {}'.format(p))

[[ 218  864]
 [1301 4918]]
spherule prop in category: 0.1435, spherule prop in genome: 0.1494
p = 0.5978615580893053


In [17]:
### Are hyphae upregulated genes enriched for transmembrane transporters? - YES

hyphae_tm = exp[(exp['Beyhan_exp']=='hyphae_upreg') & ~(exp['TransMembrane'].isna())].shape[0]
hyphae_not_tm = exp[(exp['Beyhan_exp']=='hyphae_upreg') & (exp['TransMembrane'].isna())].shape[0]

not_hyphae_tm = exp[(exp['Beyhan_exp']!='hyphae_upreg') & ~(exp['TransMembrane'].isna())].shape[0]
not_hyphae_not_tm = exp[(exp['Beyhan_exp']!='hyphae_upreg') & (exp['TransMembrane'].isna())].shape[0]


p = fisher_exact(np.array([[hyphae_tm, hyphae_not_tm,], [not_hyphae_tm, not_hyphae_not_tm]]),
                alternative='two-sided')[1]

print(np.array([[hyphae_tm, hyphae_not_tm,], [not_hyphae_tm, not_hyphae_not_tm]]))
print('hyphae prop in category: {}, hyphae prop in genome: {}'.format(round(hyphae_tm/(hyphae_tm+not_hyphae_tm), 4),
                                                       round(hyphae_not_tm/(hyphae_not_tm+not_hyphae_not_tm), 4)), )
print('p = {}'.format(p))

[[ 319  881]
 [1200 4901]]
hyphae prop in category: 0.21, hyphae prop in genome: 0.1524
p = 1.4206006361531886e-07


### Signal peptides

In [18]:
### Are spherule upregulated genes enriched for signal peptides? - NO

spherule_secreted = exp[(exp['Beyhan_exp']=='spherule_upreg') & ~(exp['SECRETED'].isna())].shape[0]
spherule_not_secreted = exp[(exp['Beyhan_exp']=='spherule_upreg') & (exp['SECRETED'].isna())].shape[0]

not_spherule_secreted = exp[(exp['Beyhan_exp']!='spherule_upreg') & ~(exp['SECRETED'].isna())].shape[0]
not_spherule_not_secreted = exp[(exp['Beyhan_exp']!='spherule_upreg') & (exp['SECRETED'].isna())].shape[0]


p = fisher_exact(np.array([[spherule_secreted, spherule_not_secreted,], [not_spherule_secreted, not_spherule_not_secreted]]),
                alternative='two-sided')[1]

print(np.array([[spherule_secreted, spherule_not_secreted,], [not_spherule_secreted, not_spherule_not_secreted]]))
print('spherule prop in category: {}, spherule prop in genome: {}'.format(round(spherule_secreted/(spherule_secreted+not_spherule_secreted), 4),
                                                       round(spherule_not_secreted/(spherule_not_secreted+not_spherule_not_secreted), 4)), )
print('p = {}'.format(p))

[[  67 1015]
 [ 334 5885]]
spherule prop in category: 0.1671, spherule prop in genome: 0.1471
p = 0.2780842400949015


In [19]:
### Are hyphae upregulated genes enriched for signal peptides? - YES

hyphae_secreted = exp[(exp['Beyhan_exp']=='hyphae_upreg') & ~(exp['SECRETED'].isna())].shape[0]
hyphae_not_secreted = exp[(exp['Beyhan_exp']=='hyphae_upreg') & (exp['SECRETED'].isna())].shape[0]

not_hyphae_secreted = exp[(exp['Beyhan_exp']!='hyphae_upreg') & ~(exp['SECRETED'].isna())].shape[0]
not_hyphae_not_secreted = exp[(exp['Beyhan_exp']!='hyphae_upreg') & (exp['SECRETED'].isna())].shape[0]


p = fisher_exact(np.array([[hyphae_secreted, hyphae_not_secreted,], [not_hyphae_secreted, not_hyphae_not_secreted]]),
                alternative='two-sided')[1]

print(np.array([[hyphae_secreted, hyphae_not_secreted,], [not_hyphae_secreted, not_hyphae_not_secreted]]))
print('hyphae prop in category: {}, hyphae prop in genome: {}'.format(round(hyphae_secreted/(hyphae_secreted+not_hyphae_secreted), 4),
                                                       round(hyphae_not_secreted/(hyphae_not_secreted+not_hyphae_not_secreted), 4)), )
print('p = {}'.format(p))

[[ 130 1070]
 [ 271 5830]]
hyphae prop in category: 0.3242, hyphae prop in genome: 0.1551
p = 3.9569576713280115e-16


### Carbohydrate active enzymes

In [20]:
### Are spherule upregulated genes enriched for carbohydrate active enzymes? - NO (borderline)

spherule_CAZy = exp[(exp['Beyhan_exp']=='spherule_upreg') & ~(exp['CAZy'].isna())].shape[0]
spherule_not_CAZy = exp[(exp['Beyhan_exp']=='spherule_upreg') & (exp['CAZy'].isna())].shape[0]

not_spherule_CAZy = exp[(exp['Beyhan_exp']!='spherule_upreg') & ~(exp['CAZy'].isna())].shape[0]
not_spherule_not_CAZy = exp[(exp['Beyhan_exp']!='spherule_upreg') & (exp['CAZy'].isna())].shape[0]


p = fisher_exact(np.array([[spherule_CAZy, spherule_not_CAZy,], [not_spherule_CAZy, not_spherule_not_CAZy]]),
                alternative='two-sided')[1]

print(np.array([[spherule_CAZy, spherule_not_CAZy,], [not_spherule_CAZy, not_spherule_not_CAZy]]))
print('spherule prop in category: {}, spherule prop in genome: {}'.format(round(spherule_CAZy/(spherule_CAZy+not_spherule_CAZy), 4),
                                                       round(spherule_not_CAZy/(spherule_not_CAZy+not_spherule_not_CAZy), 4)), )
print('p = {}'.format(p))

[[  32 1050]
 [ 124 6095]]
spherule prop in category: 0.2051, spherule prop in genome: 0.147
p = 0.05196333601426406


In [21]:
### Are hyphae upregulated genes enriched for carbohydrate active enzymes? - yes

hyphae_CAZy = exp[(exp['Beyhan_exp']=='hyphae_upreg') & ~(exp['CAZy'].isna())].shape[0]
hyphae_not_CAZy = exp[(exp['Beyhan_exp']=='hyphae_upreg') & (exp['CAZy'].isna())].shape[0]

not_hyphae_CAZy = exp[(exp['Beyhan_exp']!='hyphae_upreg') & ~(exp['CAZy'].isna())].shape[0]
not_hyphae_not_CAZy = exp[(exp['Beyhan_exp']!='hyphae_upreg') & (exp['CAZy'].isna())].shape[0]


p = fisher_exact(np.array([[hyphae_CAZy, hyphae_not_CAZy,], [not_hyphae_CAZy, not_hyphae_not_CAZy]]),
                alternative='two-sided')[1]

print(np.array([[hyphae_CAZy, hyphae_not_CAZy,], [not_hyphae_CAZy, not_hyphae_not_CAZy]]))
print('hyphae prop in category: {}, hyphae prop in genome: {}'.format(round(hyphae_CAZy/(hyphae_CAZy+not_hyphae_CAZy), 4),
                                                       round(hyphae_not_CAZy/(hyphae_not_CAZy+not_hyphae_not_CAZy), 4)), )
print('p = {}'.format(p))

[[  42 1158]
 [ 114 5987]]
hyphae prop in category: 0.2692, hyphae prop in genome: 0.1621
p = 0.0009312903706935388


### antiSMASH clusters

In [22]:
### Are spherule upregulated genes enriched in antiSMASH cluster? - YES

spherule_antiSMASH = exp[(exp['Beyhan_exp']=='spherule_upreg') & ~(exp['antiSMASH'].isna())].shape[0]
spherule_not_antiSMASH = exp[(exp['Beyhan_exp']=='spherule_upreg') & (exp['antiSMASH'].isna())].shape[0]

not_spherule_antiSMASH = exp[(exp['Beyhan_exp']!='spherule_upreg') & ~(exp['antiSMASH'].isna())].shape[0]
not_spherule_not_antiSMASH = exp[(exp['Beyhan_exp']!='spherule_upreg') & (exp['antiSMASH'].isna())].shape[0]


p = fisher_exact(np.array([[spherule_antiSMASH, spherule_not_antiSMASH,], [not_spherule_antiSMASH, not_spherule_not_antiSMASH]]),
                alternative='two-sided')[1]

print(np.array([[spherule_antiSMASH, spherule_not_antiSMASH,], [not_spherule_antiSMASH, not_spherule_not_antiSMASH]]))
print('spherule prop in category: {}, spherule prop in genome: {}'.format(round(spherule_antiSMASH/(spherule_antiSMASH+not_spherule_antiSMASH), 4),
                                                       round(spherule_not_antiSMASH/(spherule_not_antiSMASH+not_spherule_not_antiSMASH), 4)), )
print('p = {}'.format(p))

[[  63 1019]
 [ 208 6011]]
spherule prop in category: 0.2325, spherule prop in genome: 0.145
p = 0.00016468276244570513


In [23]:
### Are hyphae upregulated genes enriched in antiSMASH cluster? - NO

hyphae_antiSMASH = exp[(exp['Beyhan_exp']=='hyphae_upreg') & ~(exp['antiSMASH'].isna())].shape[0]
hyphae_not_antiSMASH = exp[(exp['Beyhan_exp']=='hyphae_upreg') & (exp['antiSMASH'].isna())].shape[0]

not_hyphae_antiSMASH = exp[(exp['Beyhan_exp']!='hyphae_upreg') & ~(exp['antiSMASH'].isna())].shape[0]
not_hyphae_not_antiSMASH = exp[(exp['Beyhan_exp']!='hyphae_upreg') & (exp['antiSMASH'].isna())].shape[0]


p = fisher_exact(np.array([[hyphae_antiSMASH, hyphae_not_antiSMASH,], [not_hyphae_antiSMASH, not_hyphae_not_antiSMASH]]),
                alternative='two-sided')[1]

print(np.array([[hyphae_antiSMASH, hyphae_not_antiSMASH,], [not_hyphae_antiSMASH, not_hyphae_not_antiSMASH]]))
print('hyphae prop in category: {}, hyphae prop in genome: {}'.format(round(hyphae_antiSMASH/(hyphae_antiSMASH+not_hyphae_antiSMASH), 4),
                                                       round(hyphae_not_antiSMASH/(hyphae_not_antiSMASH+not_hyphae_not_antiSMASH), 4)), )
print('p = {}'.format(p))

[[  53 1147]
 [ 218 5883]]
hyphae prop in category: 0.1956, hyphae prop in genome: 0.1632
p = 0.15613653223219995


### Secondary metabolism gene families

In [24]:
### Are spherule upregulated genes enriched for secondary metabolism gene families? - YES

spherule_SMCOG = exp[(exp['Beyhan_exp']=='spherule_upreg') & ~(exp['SMCOG'].isna())].shape[0]
spherule_not_SMCOG = exp[(exp['Beyhan_exp']=='spherule_upreg') & (exp['SMCOG'].isna())].shape[0]

not_spherule_SMCOG = exp[(exp['Beyhan_exp']!='spherule_upreg') & ~(exp['SMCOG'].isna())].shape[0]
not_spherule_not_SMCOG = exp[(exp['Beyhan_exp']!='spherule_upreg') & (exp['SMCOG'].isna())].shape[0]


p = fisher_exact(np.array([[spherule_SMCOG, spherule_not_SMCOG,], [not_spherule_SMCOG, not_spherule_not_SMCOG]]),
                alternative='two-sided')[1]

print(np.array([[spherule_SMCOG, spherule_not_SMCOG,], [not_spherule_SMCOG, not_spherule_not_SMCOG]]))
print('spherule prop in category: {}, spherule prop in genome: {}'.format(round(spherule_SMCOG/(spherule_SMCOG+not_spherule_SMCOG), 4),
                                                       round(spherule_not_SMCOG/(spherule_not_SMCOG+not_spherule_not_SMCOG), 4)), )
print('p = {}'.format(p))

[[  30 1052]
 [  51 6168]]
spherule prop in category: 0.3704, spherule prop in genome: 0.1457
p = 6.156172599690421e-07


In [25]:
### Are hyphae upregulated genes enriched for secondary metabolism gene families? - YES

hyphae_SMCOG = exp[(exp['Beyhan_exp']=='hyphae_upreg') & ~(exp['SMCOG'].isna())].shape[0]
hyphae_not_SMCOG = exp[(exp['Beyhan_exp']=='hyphae_upreg') & (exp['SMCOG'].isna())].shape[0]

not_hyphae_SMCOG = exp[(exp['Beyhan_exp']!='hyphae_upreg') & ~(exp['SMCOG'].isna())].shape[0]
not_hyphae_not_SMCOG = exp[(exp['Beyhan_exp']!='hyphae_upreg') & (exp['SMCOG'].isna())].shape[0]


p = fisher_exact(np.array([[hyphae_SMCOG, hyphae_not_SMCOG,], [not_hyphae_SMCOG, not_hyphae_not_SMCOG]]),
                alternative='two-sided')[1]

print(np.array([[hyphae_SMCOG, hyphae_not_SMCOG,], [not_hyphae_SMCOG, not_hyphae_not_SMCOG]]))
print('hyphae prop in category: {}, hyphae prop in genome: {}'.format(round(hyphae_SMCOG/(hyphae_SMCOG+not_hyphae_SMCOG), 4),
                                                       round(hyphae_not_SMCOG/(hyphae_not_SMCOG+not_hyphae_not_SMCOG), 4)), )
print('p = {}'.format(p))

[[  20 1180]
 [  61 6040]]
hyphae prop in category: 0.2469, hyphae prop in genome: 0.1634
p = 0.049834429405231286
