In [231]:
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix
import os
import random
from statsmodels.stats.proportion import binom_test
from statsmodels.stats.multitest import multipletests

def parse_genome(df):
    genome_id = df['#query'][0].split('_')[0]
    keggs = df['KEGG_ko'].replace('-', None).dropna()
    keggs = list(map(lambda x: x.split(','), keggs.values))
    keggs = sum(keggs, [])
    keggs = pd.DataFrame({'KEGG_ko': keggs})
    keggs['genome_id'] = genome_id
    return keggs

def to_sparse_matrix(func_df, genome_id='genome_id', kegg_id='KEGG_ko'):
    # create genome-specific index
    ogus = list(set(func_df[genome_id]))
    ogu_lookup = pd.Series(np.arange(0, len(ogus)), ogus)
    # create KEGG-specific index
    keggs = list(set(func_df[kegg_id]))
    kegg_lookup = pd.Series(np.arange(0, len(keggs)), keggs)
    # rename names as numbers
    ogu_id = func_df[genome_id].apply(lambda x: ogu_lookup.loc[x]).astype(np.int64)
    kegg_id = func_df[kegg_id].apply(lambda x: kegg_lookup.loc[x]).astype(np.int64)
    # assign the presence / absence of a gene
    func_df['count'] = 1
    c = func_df['count'].values
    # format into a matrix
    data = coo_matrix((c, (ogu_id, kegg_id)))
    ko_ogu = pd.DataFrame(data.todense(), index=ogus, columns=keggs)
    return ko_ogu

def btest(pa1, pa2, seed=0, return_proportions=False):
    """ Performs genome wide binomial test between two groups of taxa
    Parameters
    ----------
    df1 : pd.DataFrame
        Rows are taxa, columns are genes
    df2 : pd.DataFrame
        Rows are taxa, columns are genes
    Returns
    -------
    pd.Series : list of genes associated with df1
    pd.Series : list of genes associated with df2
    """
    np.random.seed(seed)
    random.seed(seed)
    #pa1 = df1 > 0
    #pa2 = df2 > 0
    idx = list(set(pa1.columns) | set(pa2.columns))
    idx.sort()
    pa1 = pa1.sum(axis=0).reindex(idx).fillna(0)
    pa2 = pa2.sum(axis=0).reindex(idx).fillna(0)
    n = pa1 + pa2
    obs = list(zip(list(pa1.values), list((pa2.values + 1) / (pa2 + 1).sum()), list(n.values)))
    pvals = pd.Series([binom_test(a, n, b, 'two-sided') for (a, b, n) in obs],
                      index=n.index)
    if return_proportions:
        res = pd.DataFrame({'groupA': pa1, 'groupB': pa2, 'pval': pvals})
        def relabel_f(x):
            if x['groupA'] < x['groupB']:
                return 'groupB'
            else:
                return 'groupA'
        res['side'] = res.apply(relabel_f, axis=1)
        return res

    return pvals

def log_pvalue(lr, alpha=0.1, filter=True):
    """ Converts pvalues to -log(pvalue)
    Also performs Boniferroni correction.
    """
    lr = lr.reset_index()
    # lr.columns = ['KEGG', 'pvalue']
    lr['-log(pvalue)'] = -np.log(lr['pvalue'] + 1e-200)
    res = multipletests(lr['pvalue'], method='fdr_bh', alpha=alpha)
    lr['pvalue_corrected'] = res[1]
    if filter:
        lr = lr.loc[res[0]]
        return lr
    return lr

In [47]:
#run only once to generate two columns file metadata_new 
# #parse genomes-all_metadata.tsv: get species names from Lineage and Species_rep
# metadata = pd.read_table('../../../../Downloads/genomes-all_metadata.tsv')
# metadata_new = metadata[['Lineage','Species_rep']]
# metadata_new['species'] = metadata_new['Lineage'].str.split(';').str[6]
# metadata_new['Species'] = metadata_new['species'].str.split('__').str[1]
# metadata_new = metadata_new[['Species','Species_rep']]
# metadata_new.to_csv('../table/eggNOG_species_rep.txt', sep = '\t', index = False)

In [95]:
metadata_new = pd.read_table('../table/eggNOG_species_rep.txt')

In [140]:
#get the species_rep for specific species
#  'CAG-170 sp002404795',
#  'CAG-83 sp003487665',
#  'UBA11452 sp003526375',
#  'Victivallis vadensis'
asd_cd_positive = ['CAG-170 sp002404795',
 'CAG-83 sp003487665',
 'UBA11452 sp003526375',
 'Victivallis vadensis']

In [141]:
type(asd_cd_positive)

list

In [139]:
#load tables
table_t2d = pd.read_table('../table/QinT2D_deseq2_all.txt')
table_cd = pd.read_table('../table/Franzosa_CD_deseq2_all.txt')
table_asd = pd.read_table('../table/Dan_n_Wang_ASD_deseq2_all.txt')

#negative: more abundant in cases
table_t2d_negative = table_t2d.sort_values(by=['CI_95'],ascending=True).head(100)
table_cd_negative = table_cd.sort_values(by=['CI_95'],ascending=True).head(100)
table_asd_negative = table_asd.sort_values(by=['CI_95'],ascending=True).head(100)

#positive: more abundant in controls
table_t2d_positive = table_t2d.sort_values(by=['CI_5'],ascending=False).head(100)
table_cd_positive = table_cd.sort_values(by=['CI_5'],ascending=False).head(100)
table_asd_positive = table_asd.sort_values(by=['CI_5'],ascending=False).head(100)


In [145]:
t2d_positive = list(table_t2d_positive['Unnamed: 0'])
t2d_negative = list(table_t2d_negative['Unnamed: 0'])

In [168]:
asd_cd_positive_rep = metadata_new[metadata_new['Species'].isin(asd_cd_positive)]
Species_rep_ids = asd_cd_positive_rep['Species_rep'].drop_duplicates()
t2d_positive_rep = metadata_new[metadata_new['Species'].isin(t2d_positive)]
Species_rep_ids_t2d_positive = t2d_positive_rep['Species_rep'].drop_duplicates()
t2d_negative_rep = metadata_new[metadata_new['Species'].isin(t2d_negative)]
Species_rep_ids_t2d_negative = t2d_negative_rep['Species_rep'].drop_duplicates()

In [175]:
Species_rep_ids_t2d_positive
Species_rep_ids_t2d_negative
#why length = 101?

6       MGYG000000007
7       MGYG000000008
20      MGYG000000021
45      MGYG000000046
79      MGYG000000080
            ...      
4395    MGYG000004424
4588    MGYG000004618
4775    MGYG000004805
4791    MGYG000004821
4831    MGYG000004861
Name: Species_rep, Length: 101, dtype: object

In [165]:
eggNOG_dir

'http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue'

In [177]:
mkdir T2D_negative

In [178]:
for i in Species_rep_ids_t2d_negative:
    os.system("wget '{}/{}/{}/genome/{}_eggNOG.tsv' -O T2D_negative/{}_eggNOG.tsv".format(eggNOG_dir, i[:-2], i, i, i))

--2022-12-06 21:18:16--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000000/MGYG000000007/genome/MGYG000000007_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 734141 (717K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000000007_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  276K 2s
    50K .......... .......... .......... .......... .......... 13%  568K 2s
   100K .......... .......... .......... .......... .......... 20% 46.1M 1s
   150K .......... .......... .......... .......... .......... 27% 27.2M 1s
   200K .......... .......... .......... .......... .......... 34%  569K 1s
   250K .......... .......... .......... .......... .......... 41%  157M 1s
   300K .......... .......... .......... .......... .......... 48% 38.2M 0s
   35

   850K .......... .......... .......... .......... .......... 93%  578K 0s
   900K .......... .......... .......... .......... .......... 98% 65.2M 0s
   950K .......... ...                                        100% 56.4M=0.5s

2022-12-06 21:18:22 (1.73 MB/s) - ‘T2D_negative/MGYG000000080_eggNOG.tsv’ saved [987065/987065]

--2022-12-06 21:18:22--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000001/MGYG000000106/genome/MGYG000000106_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1149230 (1.1M) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000000106_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  4%  248K 4s
    50K .......... .......... .......... .......... ..........  8%  557K 3s
   100K .......... .......... .......... .......... ....

   500K .......... .......... .......... .......... .......... 71%  554K 0s
   550K .......... .......... .......... .......... .......... 77%  564K 0s
   600K .......... .......... .......... .......... .......... 83% 34.8M 0s
   650K .......... .......... .......... .......... .......... 90%  565K 0s
   700K .......... .......... .......... .......... .......... 96%  151M 0s
   750K .......... .......... ....                            100%  271K=1.2s

2022-12-06 21:18:30 (651 KB/s) - ‘T2D_negative/MGYG000000161_eggNOG.tsv’ saved [792993/792993]

--2022-12-06 21:18:30--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000002/MGYG000000218/genome/MGYG000000218_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 654192 (639K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG00000

--2022-12-06 21:18:37--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000007/MGYG000000742/genome/MGYG000000742_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 577559 (564K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000000742_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  8%  273K 2s
    50K .......... .......... .......... .......... .......... 17%  573K 1s
   100K .......... .......... .......... .......... .......... 26%  277K 1s
   150K .......... .......... .......... .......... .......... 35%  552K 1s
   200K .......... .......... .......... .......... .......... 44%  424K 1s
   250K .......... .......... .......... .......... .......... 53%  280K 1s
   300K .......... .......... .......... .......... .......... 62%  554K 1s
   35

   100K .......... .......... .......... .......... ..........  9%  530K 3s
   150K .......... .......... .......... .......... .......... 13%  564K 3s
   200K .......... .......... .......... .......... .......... 16%  563K 3s
   250K .......... .......... .......... .......... .......... 19%  557K 3s
   300K .......... .......... .......... .......... .......... 22%  558K 2s
   350K .......... .......... .......... .......... .......... 26%  559K 2s
   400K .......... .......... .......... .......... .......... 29%  550K 2s
   450K .......... .......... .......... .......... .......... 32%  547K 2s
   500K .......... .......... .......... .......... .......... 35%  563K 2s
   550K .......... .......... .......... .......... .......... 39%  550K 2s
   600K .......... .......... .......... .......... .......... 42% 39.5M 2s
   650K .......... .......... .......... .......... .......... 45%  550K 1s
   700K .......... .......... .......... .......... .......... 49%  550K 1s
   750K ....

--2022-12-06 21:18:53--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000011/MGYG000001156/genome/MGYG000001156_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 777532 (759K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000001156_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  278K 3s
    50K .......... .......... .......... .......... .......... 13%  277K 2s
   100K .......... .......... .......... .......... .......... 19%  276K 2s
   150K .......... .......... .......... .......... .......... 26%  279K 2s
   200K .......... .......... .......... .......... .......... 32%  280K 2s
   250K .......... .......... .......... .......... .......... 39%  555K 2s
   300K .......... .......... .......... .......... .......... 46%  281K 1s
   35

HTTP request sent, awaiting response... 200 OK
Length: 811502 (792K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000001334_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  280K 3s
    50K .......... .......... .......... .......... .......... 12%  185K 3s
   100K .......... .......... .......... .......... .......... 18%  274K 3s
   150K .......... .......... .......... .......... .......... 25%  286K 2s
   200K .......... .......... .......... .......... .......... 31%  279K 2s
   250K .......... .......... .......... .......... .......... 37%  552K 2s
   300K .......... .......... .......... .......... .......... 44%  251K 2s
   350K .......... .......... .......... .......... .......... 50%  542K 1s
   400K .......... .......... .......... .......... .......... 56%  562K 1s
   450K .......... .......... .......... .......... .......... 63%  273K 1s
   500K .......... .......... .......... .......... .......... 69%  553K 1s
   550K .......

   600K .......... .......... .......... .......... .......... 86%  562K 0s
   650K .......... .......... .......... .......... .......... 93%  551K 0s
   700K .......... .......... .......... .......... .......... 99% 76.1M 0s
   750K ..                                                    100% 53.7K=1.2s

2022-12-06 21:19:09 (639 KB/s) - ‘T2D_negative/MGYG000001362_eggNOG.tsv’ saved [770749/770749]

--2022-12-06 21:19:09--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000013/MGYG000001369/genome/MGYG000001369_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 757680 (740K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000001369_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  282K 2s
    50K .......... .......... .......... .......... ......

    50K .......... .......... .......... .......... ..........  9%  552K 3s
   100K .......... .......... .......... .......... .......... 14% 22.6M 2s
   150K .......... .......... .......... .......... .......... 19%  540K 2s
   200K .......... .......... .......... .......... .......... 24%  547K 1s
   250K .......... .......... .......... .......... .......... 29%  596K 1s
   300K .......... .......... .......... .......... .......... 34%  552K 1s
   350K .......... .......... .......... .......... .......... 38% 64.5M 1s
   400K .......... .......... .......... .......... .......... 43%  545K 1s
   450K .......... .......... .......... .......... .......... 48%  567K 1s
   500K .......... .......... .......... .......... .......... 53%  107M 1s
   550K .......... .......... .......... .......... .......... 58%  432K 1s
   600K .......... .......... .......... .......... .......... 63%  552K 1s
   650K .......... .......... .......... .......... .......... 68%  228M 0s
   700K ....

   650K .......... .......... .......... .......... .......... 55%  557K 1s
   700K .......... .......... .......... .......... .......... 59% 82.5M 1s
   750K .......... .......... .......... .......... .......... 63%  546K 1s
   800K .......... .......... .......... .......... .......... 67%  550K 1s
   850K .......... .......... .......... .......... .......... 71%  525K 1s
   900K .......... .......... .......... .......... .......... 75%  551K 1s
   950K .......... .......... .......... .......... .......... 79%  104M 0s
  1000K .......... .......... .......... .......... .......... 83%  549K 0s
  1050K .......... .......... .......... .......... .......... 87%  556K 0s
  1100K .......... .......... .......... .......... .......... 91%  555K 0s
  1150K .......... .......... .......... .......... .......... 95%  126M 0s
  1200K .......... .......... .......... .......... .......... 99%  553K 0s
  1250K ....                                                  100% 87.3M=2.3s

2022-12-0

   250K .......... .......... .......... .......... .......... 29%  566K 1s
   300K .......... .......... .......... .......... .......... 34%  553K 1s
   350K .......... .......... .......... .......... .......... 39%  552K 1s
   400K .......... .......... .......... .......... .......... 44%  548K 1s
   450K .......... .......... .......... .......... .......... 49%  264M 1s
   500K .......... .......... .......... .......... .......... 54%  555K 1s
   550K .......... .......... .......... .......... .......... 58%  551K 1s
   600K .......... .......... .......... .......... .......... 63%  549K 1s
   650K .......... .......... .......... .......... .......... 68%  556K 1s
   700K .......... .......... .......... .......... .......... 73% 90.3M 0s
   750K .......... .......... .......... .......... .......... 78%  548K 0s
   800K .......... .......... .......... .......... .......... 83%  553K 0s
   850K .......... .......... .......... .......... .......... 88%  557K 0s
   900K ....

--2022-12-06 21:19:39--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000022/MGYG000002228/genome/MGYG000002228_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 783806 (765K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000002228_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  273K 3s
    50K .......... .......... .......... .......... .......... 13%  570K 2s
   100K .......... .......... .......... .......... .......... 19%  277K 2s
   150K .......... .......... .......... .......... .......... 26%  558K 2s
   200K .......... .......... .......... .......... .......... 32%  553K 1s
   250K .......... .......... .......... .......... .......... 39%  552K 1s
   300K .......... .......... .......... .......... .......... 45%  276K 1s
   35

HTTP request sent, awaiting response... 200 OK
Length: 760162 (742K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000002336_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  279K 2s
    50K .......... .......... .......... .......... .......... 13%  539K 2s
   100K .......... .......... .......... .......... .......... 20%  553K 1s
   150K .......... .......... .......... .......... .......... 26%  561K 1s
   200K .......... .......... .......... .......... .......... 33%  555K 1s
   250K .......... .......... .......... .......... .......... 40%  550K 1s
   300K .......... .......... .......... .......... .......... 47%  541K 1s
   350K .......... .......... .......... .......... .......... 53% 6.64M 1s
   400K .......... .......... .......... .......... .......... 60%  609K 1s
   450K .......... .......... .......... .......... .......... 67%  576K 0s
   500K .......... .......... .......... .......... .......... 74%  553K 0s
   550K .......

   550K .......... .......... .......... .......... .......... 65%  552K 1s
   600K .......... .......... .......... .......... .......... 70%  254M 0s
   650K .......... .......... .......... .......... .......... 75%  512K 0s
   700K .......... .......... .......... .......... .......... 81%  599K 0s
   750K .......... .......... .......... .......... .......... 86%  561K 0s
   800K .......... .......... .......... .......... .......... 92%  561K 0s
   850K .......... .......... .......... .......... .......... 97%  131M 0s
   900K .......... .......... ..                              100%  254K=1.5s

2022-12-06 21:19:59 (601 KB/s) - ‘T2D_negative/MGYG000002380_eggNOG.tsv’ saved [945073/945073]

--2022-12-06 21:19:59--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000023/MGYG000002384/genome/MGYG000002384_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.1

    50K .......... .......... .......... .......... .......... 15%  554K 2s
   100K .......... .......... .......... .......... .......... 22%  277K 2s
   150K .......... .......... .......... .......... .......... 30%  557K 1s
   200K .......... .......... .......... .......... .......... 37%  549K 1s
   250K .......... .......... .......... .......... .......... 45%  240K 1s
   300K .......... .......... .......... .......... .......... 53%  551K 1s
   350K .......... .......... .......... .......... .......... 60%  555K 1s
   400K .......... .......... .......... .......... .......... 68%  549K 1s
   450K .......... .......... .......... .......... .......... 75%  571K 0s
   500K .......... .......... .......... .......... .......... 83%  559K 0s
   550K .......... .......... .......... .......... .......... 91%  562K 0s
   600K .......... .......... .......... .......... .......... 98% 27.6M 0s
   650K .........                                             100%  104K=1.5s

2022-12-0

Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 474758 (464K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000002739_eggNOG.tsv’

     0K .......... .......... .......... .......... .......... 10%  279K 1s
    50K .......... .......... .......... .......... .......... 21%  550K 1s
   100K .......... .......... .......... .......... .......... 32%  276K 1s
   150K .......... .......... .......... .......... .......... 43%  534K 1s
   200K .......... .......... .......... .......... .......... 53%  571K 1s
   250K .......... .......... .......... .......... .......... 64%  278K 0s
   300K .......... .......... .......... .......... .......... 75%  545K 0s
   350K .......... .......... .......... .......... .......... 86%  556K 0s
   400K .......... .......... .......... .......... .......... 97%  557K 0s
   450K .......... ...                                        100%  116M=1.1s

2022-12-06 

--2022-12-06 21:20:24--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000029/MGYG000002940/genome/MGYG000002940_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 468054 (457K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000002940_eggNOG.tsv’

     0K .......... .......... .......... .......... .......... 10%  269K 2s
    50K .......... .......... .......... .......... .......... 21%  558K 1s
   100K .......... .......... .......... .......... .......... 32%  273K 1s
   150K .......... .......... .......... .......... .......... 43%  550K 1s
   200K .......... .......... .......... .......... .......... 54%  552K 1s
   250K .......... .......... .......... .......... .......... 65%  548K 0s
   300K .......... .......... .......... .......... .......... 76%  566K 0s
   35

   250K .......... .......... .......... .......... .......... 58%  548K 0s
   300K .......... .......... .......... .......... .......... 68%  555K 0s
   350K .......... .......... .......... .......... .......... 78%  551K 0s
   400K .......... .......... .......... .......... .......... 87%  561K 0s
   450K .......... .......... .......... .......... .......... 97%  478K 0s
   500K .......... ..                                         100% 93.0M=1.0s

2022-12-06 21:20:33 (509 KB/s) - ‘T2D_negative/MGYG000003228_eggNOG.tsv’ saved [524975/524975]

--2022-12-06 21:20:33--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000048/MGYG000004801/genome/MGYG000004801_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 695115 (679K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG00000

   750K .......... .......... .......... .......... .......... 86%  559K 0s
   800K .......... .......... .......... .......... .......... 92% 21.8M 0s
   850K .......... .......... .......... .......... .......... 97%  564K 0s
   900K .......... .......... ..                              100%  246K=1.7s

2022-12-06 21:20:41 (537 KB/s) - ‘T2D_negative/MGYG000003383_eggNOG.tsv’ saved [944550/944550]

--2022-12-06 21:20:41--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000034/MGYG000003452/genome/MGYG000003452_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 747299 (730K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000003452_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  277K 2s
    50K .......... .......... .......... .......... ......

   300K .......... .......... .......... .......... .......... 48%  286K 1s
   350K .......... .......... .......... .......... .......... 55%  564K 1s
   400K .......... .......... .......... .......... .......... 62%  524K 1s
   450K .......... .......... .......... .......... .......... 69%  586K 1s
   500K .......... .......... .......... .......... .......... 76%  552K 0s
   550K .......... .......... .......... .......... .......... 82% 95.4M 0s
   600K .......... .......... .......... .......... .......... 89%  544K 0s
   650K .......... .......... .......... .......... .......... 96%  567K 0s
   700K .......... .......... ..                              100%  170M=1.5s

2022-12-06 21:20:50 (494 KB/s) - ‘T2D_negative/MGYG000003690_eggNOG.tsv’ saved [740262/740262]

--2022-12-06 21:20:50--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000040/MGYG000004022/genome/MGYG000004022_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk

   100K .......... .......... .......... .......... .......... 26%  276K 1s
   150K .......... .......... .......... .......... .......... 34%  519K 1s
   200K .......... .......... .......... .......... .......... 43%  535K 1s
   250K .......... .......... .......... .......... .......... 52%  541K 1s
   300K .......... .......... .......... .......... .......... 60%  564K 1s
   350K .......... .......... .......... .......... .......... 69%  527K 0s
   400K .......... .......... .......... .......... .......... 78%  596K 0s
   450K .......... .......... .......... .......... .......... 86%  563K 0s
   500K .......... .......... .......... .......... .......... 95%  556K 0s
   550K .......... .......... ......                          100%  326K=1.3s

2022-12-06 21:20:58 (457 KB/s) - ‘T2D_negative/MGYG000004288_eggNOG.tsv’ saved [589874/589874]

--2022-12-06 21:20:58--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000043/MGYG0000

   400K .......... .......... .......... .......... .......... 88%  578K 0s
   450K .......... .......... .......... .......... .......... 97%  575K 0s
   500K ..........                                            100% 6.88M=1.0s

2022-12-06 21:21:05 (516 KB/s) - ‘T2D_negative/MGYG000004821_eggNOG.tsv’ saved [522490/522490]

--2022-12-06 21:21:05--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000048/MGYG000004861/genome/MGYG000004861_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 804833 (786K) [text/tab-separated-values]
Saving to: ‘T2D_negative/MGYG000004861_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  278K 3s
    50K .......... .......... .......... .......... .......... 12%  552K 2s
   100K .......... .......... .......... .......... ......

In [166]:
mkdir T2D_positive

In [176]:
#download the eggNOG tsv files
for i in Species_rep_ids_t2d_positive:
    os.system("wget '{}/{}/{}/genome/{}_eggNOG.tsv' -O T2D_positive/{}_eggNOG.tsv".format(eggNOG_dir, i[:-2], i, i, i))

--2022-12-06 21:15:02--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000000/MGYG000000028/genome/MGYG000000028_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 775912 (758K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000000028_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  266K 3s
    50K .......... .......... .......... .......... .......... 13%  639K 2s
   100K .......... .......... .......... .......... .......... 19% 5.61M 1s
   150K .......... .......... .......... .......... .......... 26% 8.10M 1s
   200K .......... .......... .......... .......... .......... 32%  598K 1s
   250K .......... .......... .......... .......... .......... 39% 63.1M 1s
   300K .......... .......... .......... .......... .......... 46% 10.2M 0s
   35

HTTP request sent, awaiting response... 200 OK
Length: 721533 (705K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000000204_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  7%  277K 2s
    50K .......... .......... .......... .......... .......... 14%  551K 2s
   100K .......... .......... .......... .......... .......... 21% 88.8M 1s
   150K .......... .......... .......... .......... .......... 28%  557K 1s
   200K .......... .......... .......... .......... .......... 35% 58.2M 1s
   250K .......... .......... .......... .......... .......... 42%  559K 1s
   300K .......... .......... .......... .......... .......... 49% 36.2M 0s
   350K .......... .......... .......... .......... .......... 56% 71.6M 0s
   400K .......... .......... .......... .......... .......... 63%  552K 0s
   450K .......... .......... .......... .......... .......... 70%  145M 0s
   500K .......... .......... .......... .......... .......... 78%  555K 0s
   550K .......

   850K .......... .......... .......... .......... .......... 97%  582K 0s
   900K .......... .........                                  100% 60.3M=0.5s

2022-12-06 21:15:12 (1.65 MB/s) - ‘T2D_positive/MGYG000000250_eggNOG.tsv’ saved [941083/941083]

--2022-12-06 21:15:12--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000002/MGYG000000255/genome/MGYG000000255_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 937493 (916K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000000255_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  5%  276K 3s
    50K .......... .......... .......... .......... .......... 10%  560K 2s
   100K .......... .......... .......... .......... .......... 16% 48.0M 1s
   150K .......... .......... .......... .......... .....

    50K .......... .......... .......... .......... .......... 13%  543K 2s
   100K .......... .......... .......... .......... .......... 19% 16.5M 1s
   150K .......... .......... .......... .......... .......... 26%  566K 1s
   200K .......... .......... .......... .......... .......... 33%  555K 1s
   250K .......... .......... .......... .......... .......... 39%  558K 1s
   300K .......... .......... .......... .......... .......... 46%  600K 1s
   350K .......... .......... .......... .......... .......... 53% 6.40M 1s
   400K .......... .......... .......... .......... .......... 59%  547K 0s
   450K .......... .......... .......... .......... .......... 66%  564K 0s
   500K .......... .......... .......... .......... .......... 73%  553K 0s
   550K .......... .......... .......... .......... .......... 79%  687K 0s
   600K .......... .......... .......... .......... .......... 86% 2.61M 0s
   650K .......... .......... .......... .......... .......... 93%  557K 0s
   700K ....

HTTP request sent, awaiting response... 200 OK
Length: 714203 (697K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000000692_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  7%  270K 2s
    50K .......... .......... .......... .......... .......... 14%  585K 2s
   100K .......... .......... .......... .......... .......... 21% 54.0M 1s
   150K .......... .......... .......... .......... .......... 28%  554K 1s
   200K .......... .......... .......... .......... .......... 35% 26.7M 1s
   250K .......... .......... .......... .......... .......... 43%  570K 1s
   300K .......... .......... .......... .......... .......... 50% 27.4M 0s
   350K .......... .......... .......... .......... .......... 57% 2.66M 0s
   400K .......... .......... .......... .......... .......... 64%  689K 0s
   450K .......... .......... .......... .......... .......... 71% 2.84M 0s
   500K .......... .......... .......... .......... .......... 78%  693K 0s
   550K .......

     0K .......... .......... .......... .......... ..........  5%  272K 3s
    50K .......... .......... .......... .......... .......... 11%  549K 2s
   100K .......... .......... .......... .......... .......... 17%  120M 1s
   150K .......... .......... .......... .......... .......... 22%  554K 1s
   200K .......... .......... .......... .......... .......... 28%  554K 1s
   250K .......... .......... .......... .......... .......... 34%  541K 1s
   300K .......... .......... .......... .......... .......... 39%  542K 1s
   350K .......... .......... .......... .......... .......... 45% 61.2M 1s
   400K .......... .......... .......... .......... .......... 51%  568K 1s
   450K .......... .......... .......... .......... .......... 56%  554K 1s
   500K .......... .......... .......... .......... .......... 62%  558K 1s
   550K .......... .......... .......... .......... .......... 68%  556K 0s
   600K .......... .......... .......... .......... .......... 73% 60.0M 0s
   650K ....

HTTP request sent, awaiting response... 200 OK
Length: 750069 (732K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000004691_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  6%  277K 2s
    50K .......... .......... .......... .......... .......... 13%  563K 2s
   100K .......... .......... .......... .......... .......... 20% 81.1M 1s
   150K .......... .......... .......... .......... .......... 27% 85.1M 1s
   200K .......... .......... .......... .......... .......... 34%  552K 1s
   250K .......... .......... .......... .......... .......... 40%  157M 1s
   300K .......... .......... .......... .......... .......... 47% 67.2M 0s
   350K .......... .......... .......... .......... .......... 54% 60.6M 0s
   400K .......... .......... .......... .......... .......... 61%  568K 0s
   450K .......... .......... .......... .......... .......... 68% 54.7M 0s
   500K .......... .......... .......... .......... .......... 75%  156M 0s
   550K .......

   300K .......... .......... .......... .......... .......... 58%  550K 1s
   350K .......... .......... .......... .......... .......... 66%  557K 1s
   400K .......... .......... .......... .......... .......... 74%  546K 0s
   450K .......... .......... .......... .......... .......... 82%  551K 0s
   500K .......... .......... .......... .......... .......... 91%  557K 0s
   550K .......... .......... .......... .......... .......... 99%  554K 0s
   600K ..                                                    100% 48.2K=1.4s

2022-12-06 21:15:45 (446 KB/s) - ‘T2D_positive/MGYG000001365_eggNOG.tsv’ saved [616869/616869]

--2022-12-06 21:15:45--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000013/MGYG000001392/genome/MGYG000001392_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 

   500K .......... .......... .......... .......... .......... 76%  542K 0s
   550K .......... .......... .......... .......... .......... 83%  144M 0s
   600K .......... .......... .......... .......... .......... 90%  572K 0s
   650K .......... .......... .......... .......... .......... 97% 36.4M 0s
   700K .......... .......                                    100% 80.9M=0.7s

2022-12-06 21:15:51 (990 KB/s) - ‘T2D_positive/MGYG000001756_eggNOG.tsv’ saved [734351/734351]

--2022-12-06 21:15:51--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000018/MGYG000001814/genome/MGYG000001814_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1085368 (1.0M) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000001814_eggNOG.tsv’

     0K .......... .......... .......... .......... .....

   450K .......... .......... .......... .......... .......... 71%  534K 0s
   500K .......... .......... .......... .......... .......... 78%  588K 0s
   550K .......... .......... .......... .......... .......... 85%  554K 0s
   600K .......... .......... .......... .......... .......... 93% 18.4M 0s
   650K .......... .......... .......... .......... .......   100%  549K=1.1s

2022-12-06 21:15:58 (642 KB/s) - ‘T2D_positive/MGYG000001933_eggNOG.tsv’ saved [714526/714526]

--2022-12-06 21:15:58--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000020/MGYG000002056/genome/MGYG000002056_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 748112 (731K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000002056_eggNOG.tsv’

     0K .......... .......... .......... .......... ......

   150K .......... .......... .......... .......... .......... 39%  558K 1s
   200K .......... .......... .......... .......... .......... 49%  302K 1s
   250K .......... .......... .......... .......... .......... 58%  556K 1s
   300K .......... .......... .......... .......... .......... 68%  553K 0s
   350K .......... .......... .......... .......... .......... 78%  552K 0s
   400K .......... .......... .......... .......... .......... 88%  560K 0s
   450K .......... .......... .......... .......... .......... 98% 4.28M 0s
   500K .........                                             100%  120K=1.1s

2022-12-06 21:16:06 (472 KB/s) - ‘T2D_positive/MGYG000002236_eggNOG.tsv’ saved [521538/521538]

--2022-12-06 21:16:06--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000022/MGYG000002287/genome/MGYG000002287_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.1

--2022-12-06 21:16:12--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000024/MGYG000002486/genome/MGYG000002486_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 856698 (837K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000002486_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  5%  271K 3s
    50K .......... .......... .......... .......... .......... 11%  553K 2s
   100K .......... .......... .......... .......... .......... 17%  277K 2s
   150K .......... .......... .......... .......... .......... 23%  544K 2s
   200K .......... .......... .......... .......... .......... 29%  565K 1s
   250K .......... .......... .......... .......... .......... 35%  548K 1s
   300K .......... .......... .......... .......... .......... 41%  561K 1s
   35

   500K .......... .......... .......... .......... .......... 78%  555K 0s
   550K .......... .......... .......... .......... .......... 85%  546K 0s
   600K .......... .......... .......... .......... .......... 93% 80.4M 0s
   650K .......... .......... .......... .......... ........  100%  538K=1.1s

2022-12-06 21:16:20 (641 KB/s) - ‘T2D_positive/MGYG000002559_eggNOG.tsv’ saved [715476/715476]

--2022-12-06 21:16:20--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000025/MGYG000002570/genome/MGYG000002570_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 636082 (621K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000002570_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  8%  281K 2s
    50K .......... .......... .......... .......... ......

   450K .......... .......... .......... .......... .......... 56%  559K 1s
   500K .......... .......... .......... .......... .......... 62%  551K 1s
   550K .......... .......... .......... .......... .......... 68%  547K 0s
   600K .......... .......... .......... .......... .......... 73% 75.0M 0s
   650K .......... .......... .......... .......... .......... 79%  548K 0s
   700K .......... .......... .......... .......... .......... 85%  557K 0s
   750K .......... .......... .......... .......... .......... 90%  551K 0s
   800K .......... .......... .......... .......... .......... 96%  245M 0s
   850K .......... .......... .......... .                    100%  349K=1.4s

2022-12-06 21:16:27 (648 KB/s) - ‘T2D_positive/MGYG000004687_eggNOG.tsv’ saved [902538/902538]

--2022-12-06 21:16:27--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000026/MGYG000002634/genome/MGYG000002634_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk

    50K .......... .......... .......... .......... .......... 12%  552K 2s
   100K .......... .......... .......... .......... .......... 18%  153M 1s
   150K .......... .......... .......... .......... .......... 24%  552K 1s
   200K .......... .......... .......... .......... .......... 30%  549K 1s
   250K .......... .......... .......... .......... .......... 36%  552K 1s
   300K .......... .......... .......... .......... .......... 43%  558K 1s
   350K .......... .......... .......... .......... .......... 49% 32.9M 1s
   400K .......... .......... .......... .......... .......... 55%  562K 1s
   450K .......... .......... .......... .......... .......... 61%  550K 1s
   500K .......... .......... .......... .......... .......... 67%  550K 0s
   550K .......... .......... .......... .......... .......... 73%  547K 0s
   600K .......... .......... .......... .......... .......... 80% 69.2M 0s
   650K .......... .......... .......... .......... .......... 86%  554K 0s
   700K ....

   650K .......... .......... .......... .......... .......... 95%  556K 0s
   700K .......... .......... .......... .                    100%  360K=1.7s

2022-12-06 21:16:43 (428 KB/s) - ‘T2D_positive/MGYG000003074_eggNOG.tsv’ saved [749486/749486]

--2022-12-06 21:16:43--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000031/MGYG000003126/genome/MGYG000003126_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 916016 (895K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000003126_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  5%  272K 3s
    50K .......... .......... .......... .......... .......... 11%  578K 2s
   100K .......... .......... .......... .......... .......... 16%  552K 2s
   150K .......... .......... .......... .......... ......

   250K .......... .......... .......... .......... .......... 41%  558K 1s
   300K .......... .......... .......... .......... .......... 48%  559K 1s
   350K .......... .......... .......... .......... .......... 55% 69.9M 1s
   400K .......... .......... .......... .......... .......... 62%  553K 0s
   450K .......... .......... .......... .......... .......... 69%  559K 0s
   500K .......... .......... .......... .......... .......... 76%  552K 0s
   550K .......... .......... .......... .......... .......... 83%  557K 0s
   600K .......... .......... .......... .......... .......... 90% 61.7M 0s
   650K .......... .......... .......... .......... .......... 97%  561K 0s
   700K .......... ..........                                 100% 85.6M=1.1s

2022-12-06 21:16:50 (662 KB/s) - ‘T2D_positive/MGYG000003458_eggNOG.tsv’ saved [737529/737529]

--2022-12-06 21:16:50--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000034/MGYG0000

   750K .......... .                                          100% 99.8K=1.7s

2022-12-06 21:16:58 (460 KB/s) - ‘T2D_positive/MGYG000003552_eggNOG.tsv’ saved [780148/780148]

--2022-12-06 21:16:58--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000036/MGYG000003653/genome/MGYG000003653_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 662182 (647K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000003653_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  7%  295K 2s
    50K .......... .......... .......... .......... .......... 15%  560K 1s
   100K .......... .......... .......... .......... .......... 23%  559K 1s
   150K .......... .......... .......... .......... .......... 30%  555K 1s
   200K .......... .......... .......... .......... ......

Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 649441 (634K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000003990_eggNOG.tsv’

     0K .......... .......... .......... .......... ..........  7%  278K 2s
    50K .......... .......... .......... .......... .......... 15%  551K 1s
   100K .......... .......... .......... .......... .......... 23% 82.9M 1s
   150K .......... .......... .......... .......... .......... 31%  553K 1s
   200K .......... .......... .......... .......... .......... 39%  548K 1s
   250K .......... .......... .......... .......... .......... 47%  553K 1s
   300K .......... .......... .......... .......... .......... 55%  551K 1s
   350K .......... .......... .......... .......... .......... 63% 48.2M 0s
   400K .......... .......... .......... .......... .......... 70%  554K 0s
   450K .......... .......... .......... .......... .......... 78%  551K 0s
   500K ......

   650K .......... .......... .......... .......... .......... 81%  544K 0s
   700K .......... .......... .......... .......... .......... 87% 46.8M 0s
   750K .......... .......... .......... .......... .......... 93%  569K 0s
   800K .......... .......... .......... .......... .......... 99%  561K 0s
   850K ....                                                  100% 81.1K=1.3s

2022-12-06 21:17:13 (677 KB/s) - ‘T2D_positive/MGYG000004313_eggNOG.tsv’ saved [874551/874551]

--2022-12-06 21:17:13--  http://ftp.ebi.ac.uk/pub/databases/metagenomics/mgnify_genomes/human-gut/v2.0/species_catalogue/MGYG0000043/MGYG000004329/genome/MGYG000004329_eggNOG.tsv
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.138
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.138|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 696045 (680K) [text/tab-separated-values]
Saving to: ‘T2D_positive/MGYG000004329_eggNOG.tsv’

     0K .......... .......... .......... .......... ......

    50K .......... .......... .......... .......... .......... 11%  562K 2s
   100K .......... .......... .......... .......... .......... 17%  554K 2s
   150K .......... .......... .......... .......... .......... 23%  553K 1s
   200K .......... .......... .......... .......... .......... 29%  557K 1s
   250K .......... .......... .......... .......... .......... 35%  552K 1s
   300K .......... .......... .......... .......... .......... 41%  558K 1s
   350K .......... .......... .......... .......... .......... 47% 69.3M 1s
   400K .......... .......... .......... .......... .......... 53%  550K 1s
   450K .......... .......... .......... .......... .......... 59%  559K 1s
   500K .......... .......... .......... .......... .......... 65%  537K 1s
   550K .......... .......... .......... .......... .......... 71%  570K 0s
   600K .......... .......... .......... .......... .......... 76%  115M 0s
   650K .......... .......... .......... .......... .......... 82%  549K 0s
   700K ....

In [179]:
# df1 = pd.read_table('../../../../Downloads/MGYG000000001_eggNOG.tsv')
# df2 = pd.read_table('../../../../Downloads/MGYG000002834_eggNOG.tsv')

In [207]:
df_list_p = []
for i in Species_rep_ids_t2d_positive:
    f_name = 'T2D_positive/{}_eggNOG.tsv'.format(i)
    df_parsed = parse_genome(pd.read_table(f_name))
    df_list_p.append(df_parsed)
    
df_cat_p = pd.concat(df_list_p, axis=0)
genome_kegg_counts_p = to_sparse_matrix(df_cat_p)


df_list_n = []
for i in Species_rep_ids_t2d_negative:
    f_name = 'T2D_negative/{}_eggNOG.tsv'.format(i)
    df_parsed = parse_genome(pd.read_table(f_name))
    df_list_n.append(df_parsed)
    
df_cat_n = pd.concat(df_list_n, axis=0)
genome_kegg_counts_n = to_sparse_matrix(df_cat_n)

In [254]:
!rm -r T2D_negative/
!rm -r T2D_positive/

In [224]:
#compare controls and cases: for top 100 microbes
kegg_t2d = btest(genome_kegg_counts_p, genome_kegg_counts_n, return_proportions=False)
kegg_t2d = pd.DataFrame({'pvalue': kegg_t2d})
kegg_t2d = kegg_t2d.reset_index().rename(columns={'index': 'KEGG'})
kegg_t2d = log_pvalue(kegg_t2d, alpha=0.1)

In [249]:
kegg_t2d.sort_values(by = ['pvalue'])

Unnamed: 0,index,pvalue,-log(pvalue),pvalue_corrected
1155,ko:K02078,0.000000,460.517019,0.000000
2786,ko:K06921,0.000000,460.517019,0.000000
397,ko:K00791,0.000000,460.517019,0.000000
1221,ko:K02238,0.000000,460.517019,0.000000
2859,ko:K07011,0.000000,460.517019,0.000000
...,...,...,...,...
1897,ko:K03492,0.023240,3.761895,0.032526
3713,ko:K09963,0.024038,3.728110,0.033636
3332,ko:K08161,0.028225,3.567542,0.039484
3606,ko:K09773,0.029101,3.536978,0.040700


In [252]:
kegg_t2d.sort_values(by = ['pvalue']).head(50)

Unnamed: 0,index,pvalue,-log(pvalue),pvalue_corrected
1155,ko:K02078,0.0,460.517019,0.0
2786,ko:K06921,0.0,460.517019,0.0
397,ko:K00791,0.0,460.517019,0.0
1221,ko:K02238,0.0,460.517019,0.0
2859,ko:K07011,0.0,460.517019,0.0
2868,ko:K07025,0.0,460.517019,0.0
2913,ko:K07085,0.0,460.517019,0.0
394,ko:K00788,0.0,460.517019,0.0
2922,ko:K07098,0.0,460.517019,0.0
579,ko:K01091,0.0,460.517019,0.0
