In [1]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import MuxVizPy as mxp
import graph_tool as gt
from operator import itemgetter
from tqdm import tqdm
import os

import optuna
from optuna.samplers import TPESampler, RandomSampler

In [2]:
#dataset with all links from Virus STRING
full_links = pd.read_csv("../Data/data_STRING/9606.protein.links.full.v10.5.txt", sep=" ")

# HUMAN PPI

In [3]:
full_links = full_links[(full_links["protein2"].map(lambda x: "9606"in x))]
print("Human net edges:", len(full_links))
print("Human net nodes:", np.unique(np.concatenate([full_links["protein1"].unique(), full_links["protein2"].unique()])).shape[0])

Human net edges: 11353056
Human net nodes: 19576


In [125]:
15247/19576full_links

0.7788618716796076

In [124]:
np.savetxt(X=np.unique(np.concatenate([hum_links["protein1"].unique(), hum_links["protein2"].unique()])),
           fname="../Data/data_STRING/human_prots.txt",
           fmt="%s")

In [16]:
coexpr_hum_links = hum_links[(hum_links["coexpression"]>400) | (hum_links["coexpression_transferred"]>400)]
print("Coexpression net edges:", len(coexpr_hum_links))
print("Coexpression net nodes:", np.unique(np.concatenate([coexpr_hum_links["protein1"], coexpr_hum_links["protein2"]])).shape[0])

Coexpression net edges: 112776
Coexpression net nodes: 4499


In [4]:
highConf_hum_links = hum_links[hum_links["combined_score"]>700]
print("HighConfidence links net edges:", len(highConf_hum_links))
print("HighConfidence links net nodes:", np.unique(np.concatenate([highConf_hum_links["protein1"], highConf_hum_links["protein2"]])).shape[0])

HighConfidence links net edges: 719552
HighConfidence links net nodes: 15131


In [11]:
pd.merge(coexpr_hum_links, highConf_hum_links, how='inner', on=['protein1', 'protein2'])

Unnamed: 0,protein1,protein2,neighborhood_x,neighborhood_transferred_x,fusion_x,cooccurence_x,homology_x,coexpression_x,coexpression_transferred_x,experiments_x,...,homology_y,coexpression_y,coexpression_transferred_y,experiments_y,experiments_transferred_y,database_y,database_transferred_y,textmining_y,textmining_transferred_y,combined_score_y
0,9606.ENSP00000003100,9606.ENSP00000261507,0,0,0,0,0,331,729,0,...,0,331,729,0,717,0,0,493,503,984
1,9606.ENSP00000003100,9606.ENSP00000322706,0,0,0,0,0,174,464,0,...,0,174,464,0,99,0,0,439,287,811
2,9606.ENSP00000003100,9606.ENSP00000320604,0,0,0,0,0,0,417,0,...,0,0,417,0,507,0,0,0,469,834
3,9606.ENSP00000003100,9606.ENSP00000265896,0,0,0,0,0,172,918,0,...,0,172,918,0,276,0,0,504,504,985
4,9606.ENSP00000003100,9606.ENSP00000360918,0,0,0,0,0,0,417,0,...,0,0,417,0,507,0,0,112,469,846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61769,9606.ENSP00000473233,9606.ENSP00000262105,0,0,0,0,0,0,720,0,...,0,0,720,0,0,0,0,0,504,855
61770,9606.ENSP00000473233,9606.ENSP00000229854,0,0,0,0,0,0,667,0,...,0,0,667,0,0,0,0,0,498,825
61771,9606.ENSP00000473233,9606.ENSP00000307288,0,0,0,0,0,0,667,0,...,0,0,667,0,53,0,0,0,503,829
61772,9606.ENSP00000473233,9606.ENSP00000233146,0,0,0,0,0,0,595,0,...,0,0,595,0,295,0,0,0,321,789


In [7]:
def create_human_g(filt_df, mapper_pos):
    un_p = np.unique(np.concatenate([filt_df["protein1"].to_numpy(), 
                                     filt_df["protein2"].to_numpy()]))
    pd.DataFrame({"ProgNumber": np.arange(len(un_p)), "Prot": un_p}).to_csv("../Data/data_STRING/"+mapper_pos, index=False)
    number_mapper = pd.read_csv("../Data/data_STRING/"+mapper_pos)
    number_dict = dict(zip(number_mapper["Prot"], number_mapper["ProgNumber"]))
    edge_df = pd.DataFrame({"source": filt_df["protein1"].map(number_dict).values, 
                            "target": filt_df["protein2"].map(number_dict).values})
    human_g = gt.Graph(directed=False)
    human_g.add_edge_list(edge_df.values)
    
    return human_g

In [118]:
def node_mapper(df):
    #mapping between node ID and name
    mapper = pd.read_csv("../Data/data_STRING/9606.protein.info.v11.5.txt", sep="\t")
    mapper_dict = dict(zip(mapper["#string_protein_id"], mapper["preferred_name"]))

    #load ncbi database
    ncbi = pd.read_csv("../Data/data_BIOGRID/Homo_sapiens.gene_info", sep="\t")
    symbols = ncbi["Symbol"]
    synonyms = [ncbi["Synonyms"][i].split("|") for i in range(len(ncbi))]
    unique_synonims = np.unique(np.concatenate(synonyms))

    df["p1name"] = df["protein1"].map(mapper_dict)
    df["p2name"] = df["protein2"].map(mapper_dict)

    df_conv1 = df[["p1name", "p2name"]].dropna().reset_index(drop=True)
    df_conv1_nodes = np.unique(df_conv1.to_numpy()) #pd.concat([df_conv1["p1name"], df_conv1["p2name"]]).unique()

    chg_idx = np.where(np.logical_not(np.isin(df_conv1_nodes, symbols)))[0]
    but_synon = np.isin(df_conv1_nodes[chg_idx], unique_synonims)

    new_names = []
    for i in tqdm(range(len(chg_idx))):
        if but_synon[i]:
            new_names.append(symbols[([df_conv1_nodes[chg_idx[i]] in s for s in synonyms])].values[0])
        else:
            new_names.append(df_conv1_nodes[chg_idx[i]])

    df_conv2_nodes = np.copy(df_conv1_nodes)
    df_conv2_nodes[chg_idx] = new_names
    df_conv2_nodes[chg_idx[np.logical_not(but_synon)]] = np.nan

    mapper2_dict= dict(zip(df_conv1_nodes, df_conv2_nodes))

    df_conv2 = df_conv1.copy()
    df_conv2["p1name"] = df_conv2["p1name"].map(mapper2_dict)
    df_conv2["p2name"] = df_conv2["p2name"].map(mapper2_dict)
    df_conv2 = df_conv2.dropna().reset_index(drop=True)

    df_conv2_nodes = np.unique(df_conv1.to_numpy())
    
    return {"mapping_1": [df_conv1, df_conv1_nodes, mapper_dict], "mapping_2": [df_conv2, df_conv2_nodes, mapper2_dict]}

In [119]:
maps = node_mapper(coexpr_hum_links)

100%|█████████████████████████████████████████| 240/240 [00:10<00:00, 22.58it/s]


In [122]:
maps["mapping_1"][0]

Unnamed: 0,p1name,p2name
0,FKBP4,RRP1
1,FKBP4,PES1
2,FKBP4,GMPS
3,FKBP4,RRS1
4,FKBP4,RRP1B
...,...,...
73565,ENSG00000142539,TTK
73566,ENSG00000142539,MCM7
73567,ENSG00000142539,MSH2
73568,ENSG00000142539,TUBGCP6


In [5]:
unip_su = pd.read_csv("../Data/data_STRING/uniprot_string_to_uniprot.tsv", sep="\t")

In [7]:
unip_su_dict = dict(zip(unip_su["From"], unip_su["Entry"]))

In [10]:
full_links["p1"] = full_links["protein1"].map(unip_su_dict)
full_links["p2"] = full_links["protein2"].map(unip_su_dict)

In [14]:
np.unique(full_links.dropna()[["p1","p2"]].to_numpy()).shape

(15245,)

In [16]:
7059546/11353056

0.6218190062658019

In [60]:
human_g_coex = create_human_g(filt_df=coexpr_hum_links, mapper_pos="unique_prot_coex.csv")
human_g_coex

<Graph object, undirected, with 4499 vertices and 112776 edges, at 0x7ff442b4e050>

In [8]:
human_highConf = create_human_g(filt_df=highConf_hum_links, mapper_pos="unique_prot_highConf.csv")
human_highConf

<Graph object, undirected, with 15131 vertices and 719552 edges, at 0x7fb2cf95bee0>

In [None]:
#mapping between node ID and name
mapper = pd.read_csv("../Data/data_STRING/9606.protein.info.v11.5.txt", sep="\t")
mapper_dict = dict(zip(mapper["#string_protein_id"], mapper["preferred_name"]))

co_res = coexpr_hum_links[(coexpr_hum_links["protein1"].isin(mapper["#string_protein_id"])) & coexpr_hum_links["protein2"].isin(mapper["#string_protein_id"])]
co_res["protein1_name"]=itemgetter(*co_res["protein1"].to_list())(mapper_dict)
co_res["protein2_name"]=itemgetter(*co_res["protein2"].to_list())(mapper_dict)

# From STRING.viruses dataset exctract human proteins directly targeted by viruses and uniform names to NCBI

In [2]:
#only interactions between virus and human
links = pd.read_csv("../Data/data_STRING/9606.protein.links.full.v10.5.txt", sep=" ")


NameError: name 'full_links' is not defined

In [3]:
links = links[(links["protein2"].map(lambda x: not("9606"in x)))]
links = links.reset_index(drop=True)
viruses_code = np.unique(links["protein2"].map(lambda x: x.split(".")[0]).to_numpy())

In [4]:
links

Unnamed: 0,protein1,protein2,neighborhood,neighborhood_transferred,fusion,cooccurence,homology,coexpression,coexpression_transferred,experiments,experiments_transferred,database,database_transferred,textmining,textmining_transferred,combined_score
0,9606.ENSP00000000233,10258.Q6TVW1_ORFSA,0,0,0,0,0,0,0,0,0,0,0,0,188,188
1,9606.ENSP00000000233,10279.Q98187_MCV1,0,0,0,0,0,0,0,0,0,0,0,0,188,188
2,9606.ENSP00000000233,10298.GM_HHV11,0,0,0,0,0,0,0,0,0,0,0,17,336,336
3,9606.ENSP00000000233,37296.ORF4_HHV8P,0,0,0,0,0,0,0,0,0,0,0,91,109,155
4,9606.ENSP00000000233,195054.PRO_0000039759,0,0,0,0,0,0,0,0,0,0,0,0,329,329
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84004,9606.ENSP00000473243,10310.GB_HHV2H,0,0,0,0,0,0,0,0,0,0,0,0,227,227
84005,9606.ENSP00000473243,10298.GB_HHV11,0,0,0,0,0,0,0,0,0,0,0,0,227,227
84006,9606.ENSP00000473243,10335.GB_VZVD,0,0,0,0,0,0,0,0,0,0,0,0,227,227
84007,9606.ENSP00000473243,10359.GB_HCMVM,0,0,0,0,0,0,0,0,0,0,0,0,227,227


In [19]:
np.savetxt(X=links["protein1"].unique(), fname="string_prot.txt", fmt="%s")

In [6]:
#correspondance between name of virus and taxonomic code
codevir = pd.read_csv("../Data/data_STRING/viruses_codes.txt", sep="\t")

virus_name = []
virus_name_len = []
for s in viruses_code:
    if len(codevir[codevir["## taxon_id"]==int(s)]["official_name_NCBI"].to_list())==0:
        virus_name.append("")
        virus_name_len.append(0)
    else:
        virus_name.append(codevir[codevir["## taxon_id"]==int(s)]["official_name_NCBI"].to_list()[0])
        virus_name_len.append(1)

virus_names_df = pd.DataFrame({"name": virus_name, "ID": viruses_code})
virus_names_dict = dict(zip(viruses_code, virus_name))
ID_nonzeronames = np.array(virus_names_df[virus_names_df["name"]!=""].ID)

In [7]:
#filter only links with protein2 corresponding to known virus
links_pul = links[(links["protein2"].map(lambda x: np.any(x.split(".")[0] in ID_nonzeronames)))].reset_index(drop=True)

#links_pul["protein1"]=links_pul["protein1"].map(lambda x: x.split(".")[1])
links_pul["virus"]=links_pul["protein2"].map(lambda x: x.split(".")[0])
#links_pul["protein2"]=links_pul["protein2"].map(lambda x: x.split(".")[1])
links_pul["virus_name"]=itemgetter(*links_pul["virus"].to_list())(virus_names_dict)

In [8]:
links_pul

Unnamed: 0,protein1,protein2,neighborhood,neighborhood_transferred,fusion,cooccurence,homology,coexpression,coexpression_transferred,experiments,experiments_transferred,database,database_transferred,textmining,textmining_transferred,combined_score,virus,virus_name
0,9606.ENSP00000000233,10258.Q6TVW1_ORFSA,0,0,0,0,0,0,0,0,0,0,0,0,188,188,10258,Orf virus (strain Goat/Texas/SA00/2000)
1,9606.ENSP00000000233,10279.Q98187_MCV1,0,0,0,0,0,0,0,0,0,0,0,0,188,188,10279,Molluscum contagiosum virus subtype 1
2,9606.ENSP00000000233,10298.GM_HHV11,0,0,0,0,0,0,0,0,0,0,0,17,336,336,10298,Human herpesvirus 1 (strain 17)
3,9606.ENSP00000000233,37296.ORF4_HHV8P,0,0,0,0,0,0,0,0,0,0,0,91,109,155,37296,Human herpesvirus 8 type P (isolate GK18)
4,9606.ENSP00000000233,195054.PRO_0000039759,0,0,0,0,0,0,0,0,0,0,0,0,329,329,195054,Human parechovirus 2 (strain Williamson)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80770,9606.ENSP00000473243,10310.GB_HHV2H,0,0,0,0,0,0,0,0,0,0,0,0,227,227,10310,Human herpesvirus 2 (strain HG52)
80771,9606.ENSP00000473243,10298.GB_HHV11,0,0,0,0,0,0,0,0,0,0,0,0,227,227,10298,Human herpesvirus 1 (strain 17)
80772,9606.ENSP00000473243,10335.GB_VZVD,0,0,0,0,0,0,0,0,0,0,0,0,227,227,10335,Varicella-zoster virus (strain Dumas)
80773,9606.ENSP00000473243,10359.GB_HCMVM,0,0,0,0,0,0,0,0,0,0,0,0,227,227,10359,Human cytomegalovirus (strain Merlin)


In [27]:
#list of unique viruses names
unique_viruses_df = links_pul["virus_name"].unique()
unique_viruses = pd.Series(links_pul["virus_name"].unique())

unique_viruses = unique_viruses.map(lambda x: x.replace("(", ""))
unique_viruses = unique_viruses.map(lambda x: x.replace(")", ""))
unique_viruses = unique_viruses.map(lambda x: x.replace("/", "_"))
unique_viruses = unique_viruses.map(lambda x: x.replace(" ", "_"))

unique_viruses = unique_viruses.to_numpy()

In [36]:
#proteins for each virus which are in a link with text mining of high confidence 
##r experiment with mean confidence
type0_list = []
type1_list = []

for nv, un in zip(unique_viruses, unique_viruses_df):
    if not os.path.isdir("../Data/data_STRING/STRING_to_VIRPROT/"+nv):
        os.mkdir("../Data/data_STRING/STRING_to_VIRPROT/"+nv)
    provv_link = links_pul[links_pul["virus_name"]==un]
    type0_list.append(provv_link["protein2"].unique())
    type1_list.append(provv_link[(provv_link["textmining"]>700) | (provv_link["experiments"]>400)]["protein1"].unique())
    np.savetxt(X=type0_list[-1], fname="../Data/data_STRING/STRING_to_VIRPROT/"+nv+"/type0_bio.txt", fmt="%s")
    np.savetxt(X=type1_list[-1], fname="../Data/data_STRING/STRING_to_VIRPROT/"+nv+"/type1_bio.txt", fmt="%s")
    provv_link[["protein1","protein2"]].to_csv("../Data/data_STRING/STRING_to_VIRPROT/"+nv+"/edges_bio.txt", index=False)
res_df = pd.DataFrame({"name":unique_viruses, 
                       "len_vir":[len(l) for l in type0_list],
                       "len_hum":[len(l) for l in type1_list], 
                       "virus": type0_list, 
                       "human": type1_list})

In [30]:
res_df[res_df["len_hum"]>10]

Unnamed: 0,name,len_vir,len_hum,virus,human
2,Human_herpesvirus_1_strain_17,73,377,"[10298.GM_HHV11, 10298.NP04_HHV11, 10298.UL07_...","[9606.ENSP00000000412, 9606.ENSP00000002596, 9..."
3,Human_herpesvirus_8_type_P_isolate_GK18,69,200,"[37296.ORF4_HHV8P, 37296.GM_HHV8P, 37296.VMI2_...","[9606.ENSP00000012443, 9606.ENSP00000071281, 9..."
5,Varicella-zoster_virus_strain_Dumas,68,16,"[10335.NP04_VZVD, 10335.GM_VZVD, 10335.GI_VZVD...","[9606.ENSP00000078445, 9606.ENSP00000217244, 9..."
10,Human_herpesvirus_2_strain_HG52,56,20,"[10310.NP04_HHV2H, 10310.GM_HHV2H, 10310.GN_HH...","[9606.ENSP00000216274, 9606.ENSP00000245907, 9..."
15,Epstein-Barr_virus_strain_B95-8,86,1110,"[10376.GM_EBVB9, 10376.EBNA5_EBVB9, 10376.GL_E...","[9606.ENSP00000000442, 9606.ENSP00000005340, 9..."
18,Human_cytomegalovirus_strain_Merlin,163,160,"[10359.GM_HCMVM, 10359.GL_HCMVM, 10359.VP26_HC...","[9606.ENSP00000011653, 9606.ENSP00000046087, 9..."
20,Human_immunodeficiency_virus_type_1_group_M_su...,18,913,"[11676.PRO_0000038428, 11676.VPU_HV1H2, 11676....","[9606.ENSP00000009530, 9606.ENSP00000011653, 9..."
21,Measles_virus_strain_Ichinose-B95a,8,459,"[11234.PHOSP_MEASC, 11234.L_MEASC, 11234.FUS_M...","[9606.ENSP00000000412, 9606.ENSP00000006777, 9..."
22,Influenza_A_virus_strain_A_Puerto_Rico_8_1934_...,12,486,"[11320.HEMA_I34A1, 11320.M2_I34A1, 11320.NRAM_...","[9606.ENSP00000005340, 9606.ENSP00000009530, 9..."
23,Hepatitis_C_virus_genotype_1a_isolate_H,11,129,"[11103.PRO_0000037576, 11103.PRO_0000037575, 1...","[9606.ENSP00000001008, 9606.ENSP00000169298, 9..."


In [19]:
#mapping between node ID and name
mapper = pd.read_csv("data_STRING/9606.protein.info.v11.5.txt", sep="\t")
mapper["ID"] = mapper["#string_protein_id"].map(lambda x: x.split(".")[1])
mapper_dict = dict(zip(mapper["ID"], mapper["preferred_name"]))

In [29]:
mapper

Unnamed: 0,#string_protein_id,preferred_name,protein_size,annotation,ID
0,9606.ENSP00000000233,ARF5,180,ADP-ribosylation factor 5; GTP-binding protein...,ENSP00000000233
1,9606.ENSP00000000412,M6PR,277,Cation-dependent mannose-6-phosphate receptor;...,ENSP00000000412
2,9606.ENSP00000001008,FKBP4,459,Peptidyl-prolyl cis-trans isomerase FKBP4; Imm...,ENSP00000001008
3,9606.ENSP00000001146,CYP26B1,512,Cytochrome P450 26B1; Involved in the metaboli...,ENSP00000001146
4,9606.ENSP00000002125,NDUFAF7,441,"Protein arginine methyltransferase NDUFAF7, mi...",ENSP00000002125
...,...,...,...,...,...
19561,9606.ENSP00000485671,ENSG00000280273,120,HCG1991042,ENSP00000485671
19562,9606.ENSP00000485672,ENSG00000279458,86,annotation not available,ENSP00000485672
19563,9606.ENSP00000485673,ENSG00000279988,243,annotation not available,ENSP00000485673
19564,9606.ENSP00000485675,ENSG00000280116,84,annotation not available,ENSP00000485675


In [25]:
uga=np.isin(links_pul["protein1"].unique(),mapper["#string_protein_id"])

In [28]:
links_pul["protein1"].unique().shape

(8575,)

In [66]:
#filter again with proteins1 which code is mapped to a name
links_res = links_pul[links_pul["protein1"].isin(mapper["ID"])]
links_res["protein1_name"]=itemgetter(*links_res["protein1"].to_list())(mapper_dict)

In [74]:
#list of unique viruses names
unique_viruses = links_res["virus_name"].unique()

In [251]:
#proteins for each virus which are in a link with text mining of high confidence 
##r experiment with mean confidence
type0_list = []
type1_list = []

for un in unique_viruses:
    provv_link = links_res[links_res["virus_name"]==un]
    type0_list.append(provv_link["protein2"].unique())
    type1_list.append(provv_link[(provv_link["textmining"]>700) | (provv_link["experiments"]>400)]["protein1_name"].unique())

res_df = pd.DataFrame({"name":unique_viruses, 
                       "len_vir":[len(l) for l in type0_list],
                       "len_hum":[len(l) for l in type1_list], 
                       "virus": type0_list, 
                       "human": type1_list})
res_df["name"] = res_df["name"].map(lambda x: x.replace("(", ""))
res_df["name"] = res_df["name"].map(lambda x: x.replace(")", ""))
res_df["name"] = res_df["name"].map(lambda x: x.replace("/", "_"))
res_df["name"] = res_df["name"].map(lambda x: x.replace(" ", "_"))

In [121]:
#load ncbi database
ncbi = pd.read_csv("BIOGRID_data/Homo_sapiens.gene_info", sep="\t")
symbols = ncbi["Symbol"]
synonyms = [ncbi["Synonyms"][i].split("|") for i in range(len(ncbi))]
unique_synonims = np.unique(np.concatenate(synonyms))



In [252]:
resss = res_df[res_df["len_hum"]>0].sort_values("name").reset_index(drop=True)

In [255]:
np.any(np.isin(resss.loc[18,"human"], symbols))

True

In [257]:
new_list_human = []
for j in range(len(resss)):
    print(f"{j}/{len(resss)}")
    if not np.all(np.isin(resss.loc[j,"human"], symbols)):
        chg_idx = np.where(np.logical_not(np.isin(resss.loc[j,"human"], symbols)))[0]
        but_synon = np.isin(resss.loc[j,"human"][chg_idx], unique_synonims)

        new_names = []
        for i in tqdm(range(len(chg_idx))):
            if but_synon[i]:
                new_names.append(symbols[([resss.loc[j,"human"][chg_idx[i]] in s for s in synonyms])].values[0])
            else:
                new_names.append(resss.loc[j,"human"][chg_idx[i]])

        new_list_human.append(np.delete(resss.loc[j,"human"], chg_idx[np.where(np.logical_not(but_synon))[0]]))
    else:
        new_list_human.append(resss.loc[j,"human"])

0/83
1/83
2/83
3/83
4/83
5/83
6/83
7/83
8/83
9/83
10/83
11/83
12/83


100%|█████████████████████████████████████████████| 1/1 [00:02<00:00,  2.16s/it]


13/83
14/83
15/83
16/83
17/83
18/83


100%|███████████████████████████████████████████| 48/48 [01:37<00:00,  2.04s/it]


19/83
20/83
21/83
22/83
23/83
24/83
25/83


100%|███████████████████████████████████████████| 15/15 [00:30<00:00,  2.04s/it]


26/83


100%|█████████████████████████████████████████████| 1/1 [00:02<00:00,  2.09s/it]


27/83


100%|█████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it]


28/83
29/83
30/83
31/83


100%|█████████████████████████████████████████████| 2/2 [00:04<00:00,  2.13s/it]


32/83


100%|█████████████████████████████████████████████| 6/6 [00:12<00:00,  2.12s/it]


33/83


100%|█████████████████████████████████████████████| 3/3 [00:06<00:00,  2.12s/it]


34/83


100%|███████████████████████████████████████████| 26/26 [00:54<00:00,  2.10s/it]


35/83
36/83
37/83


100%|█████████████████████████████████████████████| 5/5 [00:10<00:00,  2.11s/it]


38/83


100%|███████████████████████████████████████████| 48/48 [01:33<00:00,  1.96s/it]


39/83


100%|█████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it]


40/83
41/83


100%|███████████████████████████████████████████| 16/16 [00:29<00:00,  1.85s/it]


42/83


100%|███████████████████████████████████████████| 20/20 [00:40<00:00,  2.00s/it]


43/83


100%|█████████████████████████████████████████████| 9/9 [00:19<00:00,  2.12s/it]


44/83


100%|███████████████████████████████████████████| 11/11 [00:23<00:00,  2.13s/it]


45/83


100%|███████████████████████████████████████████| 26/26 [00:52<00:00,  2.03s/it]


46/83
47/83
48/83


100%|█████████████████████████████████████████████| 4/4 [00:07<00:00,  1.96s/it]


49/83


100%|███████████████████████████████████████████| 29/29 [00:54<00:00,  1.89s/it]


50/83
51/83
52/83
53/83
54/83
55/83
56/83


100%|█████████████████████████████████████████████| 2/2 [00:04<00:00,  2.01s/it]


57/83
58/83
59/83
60/83
61/83
62/83
63/83
64/83
65/83
66/83


100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 6721.64it/s]


67/83
68/83


100%|█████████████████████████████████████████████| 8/8 [00:16<00:00,  2.06s/it]


69/83
70/83
71/83


100%|█████████████████████████████████████████████| 1/1 [00:02<00:00,  2.08s/it]


72/83


100%|█████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it]


73/83
74/83
75/83
76/83


100%|███████████████████████████████████████████| 21/21 [00:43<00:00,  2.08s/it]


77/83
78/83
79/83
80/83
81/83
82/83


100%|███████████████████████████████████████████| 25/25 [00:52<00:00,  2.11s/it]


In [264]:
resss["human_new"] = new_list_human

In [285]:
for i in range(len(resss)):
    if not os.path.isdir(os.path.isdir("STRING_data/STRING_to_VIRPROT/"+resss.loc[i,"name"])):
        os.mkdir("STRING_data/STRING_to_VIRPROT/"+resss.loc[i,"name"])
    np.savetxt(X=resss.loc[i,"human_new"], fname="STRING_data/STRING_to_VIRPROT/"+resss.loc[i,"name"]+"/type1.txt", fmt="%s")
    np.savetxt(X=resss.loc[i,"virus"], fname="STRING_data/STRING_to_VIRPROT/"+resss.loc[i,"name"]+"/type0.txt", fmt="%s")