In [None]:
import pandas as pd
import numpy as np
import csv 

In [None]:
PATH = "./disgenet/"
filename = "./disgenet/curated_gene_disease_associations.tsv"

In [None]:
df = pd.read_csv(filename, sep = '\t')
df.head()

In [None]:
print(len(df), df.columns)

#find row associated to disease Malignant Mesothelioma with id C0345967

The columns in the files are:
* geneId 		-> NCBI Entrez Gene Identifier
* geneSymbol	-> Official Gene Symbol
* DSI		-> The Disease Specificity Index for the gene
* DPI		-> The Disease Pleiotropy Index for the gene
* diseaseId 	-> UMLS concept unique identifier
* diseaseName 	-> Name of the disease	
* diseaseType  	-> The DisGeNET disease type: disease, phenotype and group
* diseaseClass	-> The MeSH disease class(es)
* diseaseSemanticType	-> The UMLS Semantic Type(s) of the disease
* score		-> DisGENET score for the Gene-Disease association
* EI		-> The Evidence Index for the Gene-Disease association
* YearInitial	-> First time that the Gene-Disease association was reported
* YearFinal	-> Last time that the Gene-Disease association was reported
* NofPmids	-> Total number of publications reporting the Gene-Disease association
* NofSnps		-> Total number of SNPs associated to the Gene-Disease association
* source		-> Original source reporting the Gene-Disease association

In [None]:
target = df.loc[df['diseaseName'] == 'Malignant mesothelioma']
target.to_csv(PATH+"malignant_mesothelioma_curated_genes.tsv", sep = '\t')

In [None]:
t = pd.read_csv(PATH+"malignant_mesothelioma_curated_genes.tsv", sep = '\t')

In [None]:
t = t.drop('Unnamed: 0', axis = 1)
t.head()

### Explore the DisGeNet dataset, find the disease of interest and get the list of human genes involved.

In [None]:
curated = pd.read_csv("./disgenet/browser_source_genes_summary_CURATED.tsv", sep = '\t')
curated.head()            

In [None]:
#now let's save the gene symbols, entrez ID and names in arrays
geneSymbol=[]
geneID=[]
geneName=[]
for i in range(len(curated)):
    geneSymbol.append(curated['Gene'][i])
    geneID.append(curated['Gene_id'][i])
    geneName.append(curated['Gene_Full_Name'][i])    
#we check on HGNC to see if we need to change genes name

b) For all genes in the seed gene list, collect the following basic information from the Uniprot:

* official (primary) gene symbol (check if the symbols are updated and approved on the HGNC website; report any issue/lack of data/potential misinterpretation)

* Uniprot AC, alphanumeric ‘accession number’ (a.k.a. ’Uniprot entry’)

* protein name (the main one only, do not report the aliases)

* Entrez Gene ID (a.k.a. ‘GeneID’) very brief description of its function (keep it very short, i.e. max 20 words)

* notes related to the above information, if any and if relevant

Store the data gathered in a table in an easily accessible format of your choice (csv, tab,
excel, etc).

In [None]:
print(geneSymbol)

In [None]:
#fino a COL12A1 approved

In [None]:
# printing original list  
print("The original list is : " + str(geneSymbol))
  
# using join() 
# avoiding printing last comma 
print("The formatted output is : ") 
print(', '.join(geneSymbol)) 

# deleted quotes to pass the list into site
#https://www.genenames.org/tools/multi-symbol-checker/

# all gene symbols were approved by HGNC 

In [None]:
uniprotAC=[]
for i in range(len(curated)):
    uniprotAC.append(curated['UniProt'][i])

In [None]:
results = pd.DataFrame(list(zip(geneSymbol,geneName,geneID,uniprotAC)), columns=['Symbol','Name','ID','UniprotAC'])

In [None]:
results.to_csv('mesothelioma-curated-genes.csv')
results.head()

### Exercise 1.2

For each seed gene, collect all binary protein interactions from two different PPI sources:
* Biogrid Human, latest release available
* IID Integrated Interactions Database (experimental data only, all tissues, unless stated otherwise in further instruction)

Note: once you got the list of the proteins interacting with at least one seed gene, you must
also retrieve and include in your interactome the interactions among these non-seed
proteins

In [None]:
#open biogrid DB
biogrid=pd.read_csv('BIOGRID-ALL-3.5.179.tab2.txt', sep='\t')

In [None]:
biogrid.head()

In [None]:
len(biogrid)

In [None]:
#let's select what we want from the biogrid dataset.
#select only human PPI
biogrid.columns

In [None]:
#I took the interactions for NAT2 gene in humans 
example = pd.read_csv('./biogrid/BIOGRID-GENE-106528-3.5.179.tab2.txt', sep='\t')
example.head()

In [None]:
#select only human genes
biogrid_human=biogrid.loc[(biogrid['Organism Interactor A']==9606) & (biogrid['Organism Interactor B']==9606)]
biogrid_human

In [None]:
# look for the genes which interacts with at least one seed genes
biogrid_seed_genes = biogrid_human.loc[(biogrid_human['Official Symbol Interactor A'].isin(geneSymbol)) | (biogrid_human['Official Symbol Interactor B'].isin(geneSymbol))]
biogrid_seed_genes

## from our seed interaction let's search non-seeds genes that interacts with at least one seed gene

In [None]:
#Interactor A is not a seed gene but Interactor B is a seed gene
non_seed_df_A = biogrid_seed_genes.loc[~(biogrid_seed_genes['Official Symbol Interactor A'].isin(geneSymbol))]
seed_B = non_seed_df_A.loc[(non_seed_df_A ['Official Symbol Interactor B'].isin(geneSymbol))]

In [None]:
# interactor B is not a seed genes but Interactor A is a seed gene
non_seed_df_B = biogrid_seed_genes.loc[~(biogrid_seed_genes['Official Symbol Interactor B'].isin(geneSymbol))]
non_seed_df_B

In [None]:
# Reset Index, otherwise they don't work with list
non_seed_df_A=non_seed_df_A.reset_index(drop=True)
non_seed_df_B=non_seed_df_B.reset_index(drop=True)

In [None]:
non_seed_df_B

In [None]:
#build a list with only non seed genes that interacts with at least one seed gene
non_seed_list=[]
for i in range(0, len(non_seed_df_A)):
    non_seed_list.append(non_seed_df_A['Official Symbol Interactor A'][i])

In [None]:
for i in range(0, len(non_seed_df_B)):
    non_seed_list.append(non_seed_df_B['Official Symbol Interactor B'][i])

In [None]:
#drop duplicates
non_seed_list = list(dict.fromkeys(non_seed_list))

In [None]:
# Verify Correctness
for i in range(0, len(geneSymbol)):
    for j in range(0, len(non_seed_list)):
        if(geneSymbol[i] == non_seed_list[j]):
            print("Something is wrong")
print("Ok")

In [None]:
print(len(non_seed_list))

In [None]:
#now search for non seed interactions from the human DB
#Return positions of non-seed genes that interacts with a non seed gene but both interacts with at least one seed gene
biogrid_human = biogrid_human.reset_index(drop=True)
#create a list of index of the original matrix with non seed genes interacting each others
list_of_idx=[]
#fill the list
for i in range(len(biogrid_human)):
    if biogrid_human['Official Symbol Interactor A'][i] in non_seed_list and biogrid_human['Official Symbol Interactor B'][i] in non_seed_list:
        list_of_idx.append(i)

In [None]:
#biogrid_seed_genes
biogrid_non_seed = biogrid_human.loc[list_of_idx]
biogrid_non_seed = biogrid_non_seed.reset_index(drop=True)
biogrid_non_seed

In [None]:
#make a table in which interactor 1 is seed and interactor 2 can be seed or non-seed
biogrid_seed_df = biogrid_seed_genes.loc[(biogrid_seed_genes['Official Symbol Interactor A'].isin(geneSymbol))]
interactome = pd.concat([biogrid_seed_df, biogrid_non_seed], ignore_index=True)
#interactome.to_csv("interactome-biogrid.txt", sep='\t')

In [None]:
interactome = pd.read_csv("interactome-biogrid.txt", sep= '\t')
interactome = interactome.drop(['Unnamed: 0'], axis = 1)
interactome

In [None]:
iid = pd.read_csv('human_annotated_PPIs.txt', sep='\t')
iid

In [None]:
#select only rows with evidence type exp, drop some columns.
iid = iid.loc[iid['evidence type'] == 'exp']
icols = ['uniprot1', 'uniprot2', 'symbol1', 'symbol2' , 'evidence type', 'cancer']
iid = iid.reset_index(drop=True)
iid = iid[['uniprot1', 'uniprot2', 'symbol1', 'symbol2' , 'evidence type', 'cancer']]

In [None]:
iid

In [None]:
iid.to_csv('iid.txt', sep = '\t')

In [None]:
#SAME PROCEDURE. 

In [None]:
iid_seed_genes = iid.loc[(iid['symbol1'].isin(geneSymbol)) | (iid['symbol2'].isin(geneSymbol))]
iid_seed_genes

In [None]:
non_seed_1 = iid_seed_genes.loc[~(iid_seed_genes['symbol1'].isin(geneSymbol))]
# interactor 1 is not a seed genes and interacts with a seed gene
#B is a seed gene
non_seed_1

In [None]:
non_seed_2 = iid_seed_genes.loc[~(iid_seed_genes['symbol2'].isin(geneSymbol))]
# interactor 1 is not a seed genes and interacts with a seed gene
#B is a seed gene
non_seed_2

In [None]:
non_seed_1=non_seed_1.reset_index(drop=True)
non_seed_2=non_seed_2.reset_index(drop=True)

In [None]:
#build a list with only non seed genes that interacts with at least one seed gene
non_seed=[]
#non_seed_df_A = biogrid_seed_genes.loc[~(biogrid_seed_genes['Official Symbol Interactor A'].isin(geneSymbol))]
for i in range(0, len(non_seed_1)):
    non_seed.append(non_seed_1['symbol1'][i])
for i in range(0, len(non_seed_2)):
    non_seed.append(non_seed_2['symbol2'][i])
#drop duplicates
non_seed = list(dict.fromkeys(non_seed))

In [None]:
len(non_seed)

In [None]:
iid=iid.reset_index(drop=True)
list_of_idx=[]
#fill the list
for i in range(len(iid_seed_genes)):
    if iid['symbol1'][i] in non_seed and iid['symbol2'][i] in non_seed:
        list_of_idx.append(i)

In [None]:
#biogrid_seed_genes
iid_non_seed = iid.loc[list_of_idx]
iid_non_seed = iid_non_seed.reset_index(drop=True)
iid_non_seed

In [None]:
iid_seed_df = iid_seed_genes.loc[(iid_seed_genes['symbol1'].isin(geneSymbol))]
interactome2 = pd.concat([iid_seed_df, iid_non_seed], ignore_index=True)
interactome2.to_csv("interactome-iid.txt", sep='\t')

In [None]:
interactome2 = pd.read_csv("interactome-iid.txt", sep='\t')

In [None]:
interactome2 = interactome2.drop(['Unnamed: 0'], axis = 1)

In [None]:
interactome2

In [None]:
interactome

### Summarize the main results in a table reporting:
* no. of seed genes found in each different DBs (some seed genes may be missing in the DBs);
* total no. of interacting proteins, including seed genes, for each DB;
* total no. of interactions found in each DB.

In [None]:
len(geneSymbol)

In [None]:
seed_B = non_seed_df_A.loc[(non_seed_df_A ['Official Symbol Interactor B'].isin(geneSymbol))]
seed_A = non_seed_df_B.loc[(non_seed_df_B ['Official Symbol Interactor A'].isin(geneSymbol))]

In [None]:
def find_unique_genes(dataframe, column_name):
    genes_found = []
    for index in range(len(dataframe)):
        if dataframe[column_name][index] not in genes_found:
            genes_found.append(dataframe[column_name][index])
    return genes_found

In [None]:
# Genes from Biogrid

la = find_unique_genes(seed_B, 'Official Symbol Interactor B')
lb = find_unique_genes(seed_A, 'Official Symbol Interactor A')
l_tot = la + lb
#drop duplicates
total_genes = list(dict.fromkeys(l_tot))
len(total_genes)
missing_gene = []
for index in range(len(geneSymbol)):
    if geneSymbol[index] not in total_genes:
        missing_gene.append(geneSymbol[index])
print("Genes missing in Biogrid\n", missing_gene)

In [None]:
# Genes from IID 
seed1 = non_seed_2.loc[(non_seed_2['symbol1'].isin(geneSymbol))]
seed2 = non_seed_1.loc[(non_seed_1['symbol2'].isin(geneSymbol))]
# interactor 1 is not a seed genes and interacts with a seed gene
#B is a seed gene
l1 = find_unique_genes(seed1, 'symbol1')
l2 = find_unique_genes(seed2, 'symbol2')
l = l1+l2
#drop duplicates
i_genes = list(dict.fromkeys(l))
iid_missing = []
for index in range(len(geneSymbol)):
    if geneSymbol[index] not in i_genes:
        iid_missing.append(geneSymbol[index])
print("Genes missing from IID DB \n", iid_missing)

total no. of interacting proteins, including seed genes, for each DB;

In [None]:
interactors= []
for index in range(len(biogrid_human)):
    if biogrid_human['Official Symbol Interactor A'][index] not in interactors:
        interactors.append(biogrid_human['Official Symbol Interactor A'][index])
    if biogrid_human['Official Symbol Interactor B'][index] not in interactors:
        interactors.append(biogrid_human['Official Symbol Interactor B'][index])

In [None]:
len(interactors)

In [None]:
def count_interactors(dataframe, column1, column2):
    interactors= []
    for index in range(len(dataframe)):
        if dataframe[column1][index] not in interactors:
            interactors.append(dataframe[column1][index])
        if dataframe[column2][index] not in interactors:
            interactors.append(dataframe[column2][index])
    return len(interactors)

In [None]:
iid_len = count_interactors(iid, 'symbol1', 'symbol2')

In [None]:
print(iid_len)

In [None]:
len(iid)

In [None]:
# total no. of interactions found in each DB.

In [None]:
countbio = biogrid_human['#BioGRID Interaction ID'].nunique()
print(countbio)

In [None]:
uniquesyms = []
for index in range(0, len(iid)):
    sym1 = iid['symbol1'][index]
    sym2 = iid['symbol2'][index]
    uniquesyms.append(sym1+sym2)
#drop duplicates

In [None]:
uniquesyms = list(dict.fromkeys(uniquesyms))
print(len(uniquesyms))

### Build and store three tables:

* seed genes interactome: interactions that involve seed genes only, from all DBs, in the format:
  interactor A gene symbol, interactor B gene symbol, interactor A Uniprot AC, interactor B
  Uniprot AC, database source
* union interactome: all proteins interacting with at least one seed gene, from all DBs, same format as above.
* intersection interactome: all proteins interacting with at least one seed gene confirmed by both DBs, in the       format: interactor A gene symbol, interactor B gene symbol, interactor A Uniprot AC, interactor B Uniprot AC

Always check that interactors are both human (i.e. organism ID is always 9606, Homo
Sapiens)

In [None]:
uniprot_human = pd.read_csv("./uniprot/HUMAN_9606_idmapping.dat", sep = '\t')

In [None]:
uniprot_human

In [None]:
#save a list of all the symbols in order to search their uniprot
sym_to_fix=[]
sym_to_fix.extend(biogrid_human['Official Symbol Interactor A'])
sym_to_fix.extend(biogrid_human['Official Symbol Interactor B'])

#remove duplicates
sym_to_fix=list(set(sym_to_fix))

# using join() 
# avoiding printing last comma 
print("The formatted output is : ") 
print(', '.join(sym_to_fix)) 
##print in order to search on uniprot.com

In [None]:
#upload the uniprot fixing file
unigene = pd.read_csv("./uniprot/uniprot-geneid-mapping.tab", sep = '\t')

In [None]:
#create a dictionary that maps symbol with its uniprot
unigene=unigene.rename(columns={"yourlist:M202001056746803381A1F0E0DB47453E0216320D5454DDB": "symbol"})
unigene=pd.Series(unigene.Entry.values, index=unigene.symbol).to_dict()
biogrid_human['UniprotAC interactor A']= biogrid_human['Official Symbol Interactor A'].map(unigene)
biogrid_human['UniprotAC interactor B']= biogrid_human['Official Symbol Interactor B'].map(unigene)
biogrid_human

In [None]:
iid_human = pd.read_csv('iid.txt', sep = '\t')
iid_human.drop(['Unnamed: 0'], axis = 1)

In [None]:
#SEED GENES INTERACTOME
def build_first_table(biogrid_human, iid_human):
    db1 = 'Biogrid Human'
    db2 = 'Integrated Interactions Database experimental data'
    t = pd.DataFrame(columns=['interactorA', 'interactorB', 
                                        'interactorA_Uniprot_AC', 'interactorB_Uniprot_AC', 'db_source'])
    for i in range(len(biogrid_human)):
        sa = biogrid_human['Official Symbol Interactor A'][i]
        sb = biogrid_human['Official Symbol Interactor B'][i]
        uniprota = biogrid_human['UniprotAC interactor A'][i]
        uniprotb = biogrid_human['UniprotAC interactor B'][i]
        if sa in geneSymbol and sb in geneSymbol:
            t = t.append({'interactorA':sa, 'interactorB':sb, 
                          'interactorA_Uniprot_AC':uniprota, 'interactorB_Uniprot_AC':uniprotb, 'db_source': db1}
                         , ignore_index=True)
    for i in range(len(iid_human)):
        sa = iid_human['symbol1'][i]
        sb = iid_human['symbol2'][i]
        uniprota = iid_human['uniprot1'][i]
        uniprotb = iid_human['uniprot2'][i]
        if sa in geneSymbol and sb in geneSymbol:
            t = t.append({'interactorA':sa, 'interactorB':sb, 
                          'interactorA_Uniprot_AC':uniprota, 'interactorB_Uniprot_AC':uniprotb, 'db_source': db2}
                         , ignore_index=True)
    t.to_csv("seed_genes_interactome.tsv", sep = '\t')

In [None]:
biogrid_human=biogrid_human.reset_index(drop=True)
iid_human=iid_human.reset_index(drop=True)
build_first_table(biogrid_human, iid_human)

In [None]:
interactome_seed = pd.read_csv("seed_genes_interactome.tsv", sep = '\t')
interactome_seed.drop(['Unnamed: 0'], axis = 1)

union interactome: all proteins interacting with at least one seed gene, from all DBs, same format as above.

In [None]:
biogrid_human

In [None]:
def build_union_interactome(biogrid_human, iid_human):
    db1 = 'Biogrid Human'
    db2 = 'Integrated Interactions Database experimental data'
    t = pd.DataFrame(columns=['interactorA', 'interactorB', 
                                        'interactorA_Uniprot_AC', 'interactorB_Uniprot_AC', 'db_source'])
    for i in range(len(biogrid_human)):
        sa = biogrid_human['Official Symbol Interactor A'][i]
        sb = biogrid_human['Official Symbol Interactor B'][i]
        uniprota = biogrid_human['UniprotAC interactor A'][i]
        uniprotb = biogrid_human['UniprotAC interactor B'][i]
        if sa in geneSymbol or sb in geneSymbol:
            t = t.append({'interactorA':sa, 'interactorB':sb, 
                          'interactorA_Uniprot_AC':uniprota, 'interactorB_Uniprot_AC':uniprotb, 'db_source': db1}
                         , ignore_index=True)
    for i in range(len(iid_human)):
        sa = iid_human['symbol1'][i]
        sb = iid_human['symbol2'][i]
        uniprota = iid_human['uniprot1'][i]
        uniprotb = iid_human['uniprot2'][i]
        if sa in geneSymbol or sb in geneSymbol:
            t = t.append({'interactorA':sa, 'interactorB':sb, 
                          'interactorA_Uniprot_AC':uniprota, 'interactorB_Uniprot_AC':uniprotb, 'db_source': db2}
                         , ignore_index=True)
    t.to_csv("union_interactome.tsv", sep = '\t')

In [None]:
biogrid_human=biogrid_human.reset_index(drop=True)
iid_human=iid_human.reset_index(drop=True)
build_union_interactome(biogrid_human, iid_human)

In [None]:
unionint = pd.read_csv("union_interactome.tsv", sep = '\t')

In [None]:
unionint

### intersection interactome: all proteins interacting with at least one seed gene confirmed by both DBs, in the       format: interactor A gene symbol, interactor B gene symbol, interactor A Uniprot AC, interactor B Uniprot AC


In [None]:
def build_intersection_interactome(biogrid_human, iid_human):
    db1 = 'Biogrid Human'
    db2 = 'Integrated Interactions Database experimental data'
    union = pd.read_csv("union_interactome.tsv", sep = '\t')
    
    union_biogrid = union.loc[(union['db_source'] == 'Biogrid Human')]
    union_biogrid = union_biogrid.drop(['Unnamed: 0', 'db_source'], axis = 1)
    
    union_iid = union.loc[(union['db_source'] == 'Integrated Interactions Database experimental data')]
    union_iid = union_iid.drop(['Unnamed: 0', 'db_source'], axis = 1)
    
    intersect = pd.merge(union_biogrid, union_iid)
    intersect.dropna(inplace=True)
    intersect.to_csv("intersection_interactome.tsv", sep = '\t')

In [None]:
biogrid_human=biogrid_human.reset_index(drop=True)
iid_human=iid_human.reset_index(drop=True)
build_intersection_interactome(biogrid_human, iid_human)

In [None]:
intersect = pd.read_csv("intersection_interactome.tsv", sep = '\t')

In [None]:
intersect

### Enrichment analysis
* Using the service Enrichr, find, report in tables and save related charts (8 charts in total) of the overrepresented GO categories (limit to the first 10 for each main category, BP, MF, CL) and the the overrepresented pathways (KEGG 2019 Human) for:
    * the seed genes,
    * the union interactome genes

In [None]:
unionint = pd.read_csv("union_interactome.tsv", sep = '\t')
unionint = unionint.drop(['Unnamed: 0'], axis = 1)

interactome_seed = pd.read_csv("seed_genes_interactome.tsv", sep = '\t')
interactome_seed = interactome_seed.drop(['Unnamed: 0'], axis = 1)

In [None]:
interactome_seed

In [None]:
#save a list of all the symbols in order to search their uniprot
def print_list(dataframe, column1, column2):
    sym_to_fix=[]
    sym_to_fix.extend(dataframe[column1])
    sym_to_fix.extend(dataframe[column2])
    #remove duplicates
    sym_to_fix=list(set(sym_to_fix))
    # using join() 
    # avoiding printing last comma 
    for gene in sym_to_fix:
        print(gene)

In [None]:
print_list(interactome_seed, 'interactorA', 'interactorB')
#Print this list and put the list to https://amp.pharm.mssm.edu/Enrichr Enrichr. Than download KEGG HUMAN 2019 and Ontologies tables

In [None]:
sym_to_fix=[]
sym_to_fix.extend(unionint['interactorA'])
sym_to_fix.extend(unionint['interactorB'])
#remove duplicates
sym_to_fix=list(set(sym_to_fix))
# using join() 
# avoiding printing last comma 
#print(', '.join(sym_to_fix)) 

# deleted quotes to pass the list into site
#https://www.genenames.org/tools/multi-symbol-checker/

In [None]:
approvedsym = pd.read_csv("approved-symbols.csv", sep = ',')

In [None]:
approvedsym.head()

Now, let's open the downloaded kegg and go tables and save only the first 10 entries

In [None]:
kegg_seed = pd.read_table("enrichr/kegg_human/seed/KEGG_2019_Human_table.txt")
kegg_seed = kegg_seed[:10]
kegg_seed.to_csv("enrichr/kegg_human/seed/KEGG_2019_Human_table.txt")

In [None]:
kegg_union = pd.read_table("enrichr/kegg_human/union/KEGG_2019_Human_table.txt")
kegg_union = kegg_union[:10]
kegg_union.to_csv("enrichr/kegg_human/union/KEGG_2019_Human_table_top10.txt")

In [None]:
go_bp_seed = pd.read_table("enrichr/ontologies/seed/GO_Biological_Process_2018_table.txt")
go_bp_seed = go_bp_seed[:10]
go_bp_seed.to_csv("enrichr/ontologies/seed/GO_Biological_Process_2018_table.txt")

go_mf_seed = pd.read_table("enrichr/ontologies/seed/GO_Molecular_Function_2018_table.txt")
go_mf_seed = go_mf_seed[:10]
go_mf_seed.to_csv("enrichr/ontologies/seed/GO_Molecular_Function_2018_table.txt")

go_cc_seed = pd.read_csv("enrichr/ontologies/seed/GO_Cellular_Component_2018_table.txt")
go_cc_seed = go_cc_seed[:10]
go_cc_seed.to_csv("enrichr/ontologies/seed/GO_Cellular_Component_2018_table.txt")

In [None]:
go_cc_seed = pd.read_csv("enrichr/ontologies/seed/GO_Cellular_Component_2018_table.csv")

In [None]:
go_cc_seed

In [None]:
go_bp_union = pd.read_table("enrichr/ontologies/union/GO_Biological_Process_2018_table.txt")
go_bp_union = go_bp_union[:10]
go_bp_union.to_csv("enrichr/ontologies/union/GO_Biological_Process_2018_table.txt")

go_mf_union = pd.read_table("enrichr/ontologies/union/GO_Molecular_Function_2018_table.txt")
go_mf_union = go_mf_union[:10]
go_mf_union.to_csv("enrichr/ontologies/union/GO_Molecular_Function_2018_table.txt")

In [None]:
go_cc_union = pd.read_csv("enrichr/ontologies/union/GO_Cellular_Component_2018_table.csv")

In [None]:
go_cc_union