# Libraries

In [15]:
import sys
sys.path.append('..')

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import pandas as pd
import graph_tool as gt
from tqdm import tqdm
from operator import itemgetter
from functools import reduce
import itertools
import scipy.sparse as sps
import random
import os
#from pymnet import *

import MuxVizPy as mxp

import gseapy as gp

import warnings
warnings.filterwarnings("ignore")


#set.seed(1)

# input data settings
NEIGH_ORDER = 1 # or 0, order of nerighbors, 0 only connected proteins, 1 also first neighbors
CUT_THR = 0.7   # don't change this one

target_folder = "../Data/Virus_data_Enriched_"+str(CUT_THR)+"_Neigh_"+str(NEIGH_ORDER)+"/"

# multilayer settings
layerCouplingStrength = 1
networkOfLayersType = "categorical" ## = all to all

#virus metadata
virus_metadata = pd.read_csv("../Data/Files/viruses_metadata.csv", header=0, sep=";")

virus_metadata_onco = virus_metadata[virus_metadata["isOncogenic"] == True].reset_index()
virus_metadata_nonco = virus_metadata[virus_metadata["isOncogenic"] == False].reset_index()

#dictionary containing a unquie mapping between name of the protein and a corresponding index
node_map_df = pd.read_csv("../Data/Files/node_map.csv")
node_map_dict = {k:(v-1) for k,v in zip(node_map_df["Prot"], node_map_df["Index"])}

#function to create list of n_iter combination of nonco virus indexes with a fixed random seed for repitibility
def SamplingForNoco(n_iter, start=0, group_dim=8, random_seed=1234):
    np.random.seed(random_seed)
    nonco_cond = np.where(np.all([np.array(virus_metadata["virus"]!="Human_SARS_coronavirus_2"),
                                  np.array(virus_metadata["virus_short"]!="Lymphocytic_choriomeningitis_virus"),
                                  np.array(virus_metadata["neigh_order"]==NEIGH_ORDER), 
                                  np.array(virus_metadata["isOncogenic"]==False)],
                                  axis=0))
    
    nonco_sampling = np.array([np.random.choice(nonco_cond[0], group_dim, replace=False) for i in range(n_iter+start)])
    return nonco_sampling[start:(n_iter+start)]

# No restrictions

## Index Lists

In [53]:
for o in onco_virus_indexes:
    np.savetxt(X = np.where([o in v for v in n1o_virus_indexes])[0], fname=str(o)+"_index.txt", fmt="%d")

In [4]:
#np.savetxt(X = np.where([67 in v for v in n1o_virus_indexes])[0], fname="67_index.txt")

In [46]:
n1o_virus_indexes[0]

array([39,  1, 87, 28])

In [49]:
n_iters = 3000
np.random.seed(100)

Sars_pos = np.where(np.array(np.all([virus_metadata["neigh_order"]==NEIGH_ORDER, virus_metadata["virus"]=="Human_SARS_coronavirus_2"], axis=0)))[0][0]
onco_virus_indexes = np.where(np.array(np.all([virus_metadata["neigh_order"]==NEIGH_ORDER, virus_metadata["isOncogenic"] == True], axis=0)))[0]


#N
n_virus_indexes = SamplingForNoco(n_iters, group_dim=4, random_seed=41252145)


#N1O
n1o_virus_indexes = []
n1o_sampling = SamplingForNoco(n_iters, group_dim=3, random_seed=456)
for i in range(len(n1o_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes)
    n1o_virus_indexes.append(np.concatenate([n1o_sampling[i], [onco_pick]]))

#N1S
Snonco_nonco_samples = SamplingForNoco(n_iters, group_dim=3, random_seed=4563)
n1s_virus_indexes = np.concatenate([Snonco_nonco_samples, np.repeat(Sars_pos,n_iters).reshape([n_iters,1])], axis=1)
    
#N2O
n2o_virus_indexes = []
n2o_sampling = SamplingForNoco(n_iters, group_dim=2, random_seed=17521)
for i in range(len(n2o_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes, 2)
    n2o_virus_indexes.append(np.concatenate([n2o_sampling[i], onco_pick]))
    
#N3O
comb = list(itertools.combinations(range(8), 3))
o1n_onco_comb = [list(onco_virus_indexes[list(comb[i])]) for i in range(len(comb))]

nonco_positions = np.where(np.all([np.array(virus_metadata["virus"]!="Human_SARS_coronavirus_2"),
                                  np.array(virus_metadata["virus_short"]!="Lymphocytic_choriomeningitis_virus"),
                                  np.array(virus_metadata["neigh_order"]==NEIGH_ORDER), 
                                  np.array(virus_metadata["isOncogenic"]==False)],
                                  axis=0))[0]
n3o_virus_indexes = random.sample(set(itertools.product(np.arange(len(o1n_onco_comb)), nonco_positions)), n_iters)
n3o_virus_indexes = [np.concatenate([list(o1n_onco_comb[i[0]]), [i[1]]]) for i in n3o_virus_indexes]

#O
comb = list(itertools.combinations(range(8), 4))
o_virus_indexes = np.array([list(onco_virus_indexes[list(comb[i])]) for i in range(len(comb))])


########################################################################
#N1S
Snonco_nonco_samples = SamplingForNoco(n_iters, group_dim=3, random_seed=4563)
n1s_virus_indexes = np.concatenate([Snonco_nonco_samples, np.repeat(Sars_pos,n_iters).reshape([n_iters,1])], axis=1)

#N1O1S
n1o1s_virus_indexes = []
n1o1s_sampling = SamplingForNoco(n_iters, group_dim=2, random_seed=783)
for i in range(len(n1o1s_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes)
    n1o1s_virus_indexes.append(np.concatenate([n1o1s_sampling[i], [onco_pick], [Sars_pos]]))
    
#N2O1S
n2o1s_virus_indexes = []
n2o1s_sampling = SamplingForNoco(n_iters, group_dim=1, random_seed=154)
for i in range(len(n2o1s_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes,2)
    n2o1s_virus_indexes.append(np.concatenate([n2o1s_sampling[i], onco_pick, [Sars_pos]]))
    
#O1S
comb = list(itertools.combinations(range(8), 3))
Sars_pos = np.where(np.array(np.all([virus_metadata["neigh_order"]==NEIGH_ORDER, virus_metadata["virus"]=="Human_SARS_coronavirus_2"], axis=0)))[0][0]
o1s_virus_indexes = np.array([list(onco_virus_indexes[list(comb[i])])+[Sars_pos] for i in range(len(comb))])

In [50]:
index_lists = [n_virus_indexes,
               n1o_virus_indexes, 
               #n2o_virus_indexes, 
               #n3o_virus_indexes,
               #o_virus_indexes, 
               #n1s_virus_indexes,
               #n1o1s_virus_indexes,
               #n2o1s_virus_indexes,
               #o1s_virus_indexes
               ]

names_lists=["n", 
             "n1o", 
             #"n2o", 
             #"n3o", 
             #"o", 
             #"n1s", 
             #"n1o1s", 
             #"n2o1s", 
             #"o1s"
             ]
print(dict(zip(names_lists, [len(ioo) for ioo in index_lists])))

{'n': 3000, 'n1o': 3000}


## Percolation

In [None]:
########################################PERC CRITP#######################################################

for i in tqdm(range(70,1000)):
    for nam, lst in zip(names_lists, index_lists):
        if not os.path.isdir("../Data/ClassificationData_4vir/topology/"+nam):
            os.mkdir("../Data/ClassificationData_4vir/topology/"+nam)
        
        net = mxp.VirusMultiplex(lst[i], target_folder=target_folder, virus_metadata=virus_metadata)

        tensor=mxp.build.get_node_tensor_from_network_list(net.g_list)

        order = mxp.versatility.get_multi_RW_centrality_edge_colored(tensor)
        order = order.sort_values("vers")["phy nodes"].to_numpy()
        g_agg = mxp.build.get_aggregate_network(tensor)
    
        perc_agg_2 = gt.topology.vertex_percolation(g_agg, order, second=True)[0]
        max_perc = np.argmax(perc_agg_2)/len(perc_agg_2)
        
        if not os.path.isfile("../Data/ComponentsNew/Perc/"+nam+".txt"):
            np.savetxt(X=[max_perc], fname="../Data/ComponentsNew/Perc/"+nam+".txt", fmt="%.5f")
        else:
            perc_list = np.loadtxt("../Data/ComponentsNew/Perc/"+nam+".txt")
            if perc_list.size==1:
                np.savetxt(X=np.concatenate([[perc_list],[max_perc]]), fname="../Data/ComponentsNew/Perc/"+nam+".txt", fmt="%.5f")
            else:
                np.savetxt(X=np.concatenate([perc_list,[max_perc]]), fname="../Data/ComponentsNew/Perc/"+nam+".txt", fmt="%.5f")
        

 26%|█████████▊                           | 246/930 [1:19:27<3:43:36, 19.61s/it]

## Components

In [None]:
############################COMPONENTS##################################################################

for i in range(1000,4000):
    for nam, lst in zip(names_lists, index_lists):
        net = mxp.VirusMultiplex(lst[i], target_folder=target_folder, virus_metadata=virus_metadata)

        #components
        mod_res = mxp.mesoscale.get_mod(g_multi=net.g_multi, n_iter=1)
        
        if not os.path.isfile("../Data/Mod/"+nam+"_mods.txt"):
            np.savetxt(X=[mod_res[0]], fname="../Data/Mod/"+nam+"_mods.txt", fmt="%d")
        else:
            mods_list = np.loadtxt("../Data/Mod/"+nam+"_mods.txt")
            if mods_list.size==1:
                np.savetxt(X=np.concatenate([[mods_list],mod_res[0]]), fname="../Data/Mod/"+nam+"_mods.txt", fmt="%d")
            else:
                np.savetxt(X=np.concatenate([mods_list,mod_res[0]]), fname="../Data/Mod/"+nam+"_mods.txt", fmt="%d")
                
        if not os.path.isfile("../Data/Mod/"+nam+"_mody.txt"):
            np.savetxt(X=[mod_res[1]], fname="../Data/Mod/"+nam+"_mody.txt", fmt="%.5f")
        else:
            mody_list = np.loadtxt("../Data/Mod/"+nam+"_mody.txt")
            if mody_list.size==1:
                np.savetxt(X=np.concatenate([[mody_list],mod_res[1]]), fname="../Data/Mod/"+nam+"_mody.txt", fmt="%.5f")
            else:
                np.savetxt(X=np.concatenate([mody_list,mod_res[1]]), fname="../Data/Mod/"+nam+"_mody.txt", fmt="%.5f")
        

## LCC only

In [None]:
########################################LCC ONLY#######################################################

for i in tqdm(range(2000)):
    for nam, lst in zip(names_lists, index_lists):
        if not os.path.isdir("../Data/ClassificationData_4vir/topology/"+nam):
            os.mkdir("../Data/ClassificationData_4vir/topology/"+nam)
        
        net = mxp.VirusMultiplex(lst[i], target_folder=target_folder, virus_metadata=virus_metadata)

        #components
        lcc_curr = len(mxp.topology.get_multi_LCC(net.g_list))

        
        if not os.path.isfile("../Data/ComponentsNew/LCC/"+nam+"_lcc.txt"):
            np.savetxt(X=[lcc_curr], fname="../Data/ComponentsNew/LCC/"+nam+"_lcc.txt", fmt="%d")
        else:
            lcc_list = np.loadtxt("../Data/ComponentsNew/LCC/"+nam+"_lcc.txt")
            if lcc_list.size==1:
                np.savetxt(X=np.concatenate([[lcc_list],[lcc_curr]]), fname="../Data/ComponentsNew/LCC/"+nam+"_lcc.txt", fmt="%d")
            else:
                np.savetxt(X=np.concatenate([lcc_list,[lcc_curr]]), fname="../Data/ComponentsNew/LCC/"+nam+"_lcc.txt", fmt="%d")
        

 49%|███████████████████▌                    | 980/2000 [26:35<23:39,  1.39s/it]

## Centrality

In [None]:
##############################CENTR ONLY###########################################################

biostr_df = pd.read_csv("../Data/data_BIOGRID/BIOGRID_homo_sapiens.nodes", sep=" ")
biostr_map = dict(zip(biostr_df["nodeSymbol"], biostr_df["nodeID"]))

for i in tqdm(range(1000,3000)):
    for nam, lst in zip(names_lists, index_lists):
        if not os.path.isdir("../Data/ClassificationDataALL_4vir/topology_p/"+nam):
            os.mkdir("../Data/ClassificationDataALL_4vir/topology_p/"+nam)
        
        net = mxp.VirusMultiplex(lst[i], target_folder=target_folder, virus_metadata=virus_metadata)
        tensor = mxp.build.get_node_tensor_from_network_list(net.g_list)
        
        
        res_df = mxp.versatility.get_multi_RW_centrality_edge_colored(node_tensor=tensor, cval=0.15)
        

        list_res = np.array(list(net.node_map.keys()))[res_df.sort_values("vers", ascending=False).index[:50]]

        centr_norm = np.zeros(len(biostr_map))
        centr_norm[np.array(itemgetter(*list(net.node_map.keys()))(biostr_map))] = res_df["vers"].to_numpy()
        centr_norm=centr_norm/max(centr_norm)
        
        np.savetxt(X=centr_norm, fname="../Data/ClassificationDataALL_4vir/topology_p/"+nam+"/"+str(i)+".txt", fmt="%.4e")
        """
        #components
        lcc_curr = len(mxp.topology.get_multi_LCC(net.g_list))
        lic_curr = len(mxp.topology.get_multi_LIC(net.g_list))
        lvc_curr = len(mxp.topology.get_multi_LVC(net.g_list, printt=False))
        
        if not os.path.isfile("../Data/ComponentsNew/LCC/"+nam+"_lcc.txt"):
            np.savetxt(X=[lcc_curr], fname="../Data/ComponentsNew/LCC/"+nam+"_lcc.txt", fmt="%d")
        else:
            lcc_list = np.loadtxt("../Data/ComponentsNew/LCC/"+nam+"_lcc.txt")
            if lcc_list.size==1:
                np.savetxt(X=np.concatenate([[lcc_list],[lcc_curr]]), fname="../Data/ComponentsNew/LCC/"+nam+"_lcc.txt", fmt="%d")
            else:
                np.savetxt(X=np.concatenate([lcc_list,[lcc_curr]]), fname="../Data/ComponentsNew/LCC/"+nam+"_lcc.txt", fmt="%d")
        
        
        if not os.path.isfile("../Data/ComponentsNew/LIC/"+nam+"_lic.txt"):
            np.savetxt(X=[lic_curr], fname="../Data/ComponentsNew/LIC/"+nam+"_lic.txt", fmt="%d")
        else:
            lic_list = np.loadtxt("../Data/ComponentsNew/LIC/"+nam+"_lic.txt")
            if lic_list.size==1:
                np.savetxt(X=np.concatenate([[lic_list],[lic_curr]]), fname="../Data/ComponentsNew/LIC/"+nam+"_lic.txt", fmt="%d")
            else:
                np.savetxt(X=np.concatenate([lic_list,[lic_curr]]), fname="../Data/ComponentsNew/LIC/"+nam+"_lic.txt", fmt="%d")

                
        if not os.path.isfile("../Data/ComponentsNew/LVC/"+nam+"_lvc.txt"):
            np.savetxt(X=[lvc_curr], fname="../Data/ComponentsNew/LVC/"+nam+"_lvc.txt", fmt="%d")
        else:
            lvc_list = np.loadtxt("../Data/ComponentsNew/LVC/"+nam+"_lvc.txt")
            if lvc_list.size==1:
                np.savetxt(X=np.concatenate([[lvc_list],[lvc_curr]]), fname="../Data/ComponentsNew/LVC/"+nam+"_lvc.txt", fmt="%d")
            else:
                np.savetxt(X=np.concatenate([lvc_list,[lvc_curr]]), fname="../Data/ComponentsNew/LVC/"+nam+"_lvc.txt", fmt="%d")
        """

  1%|▏                                      | 11/2000 [00:37<1:33:09,  2.81s/it]

In [None]:
##################################COMPONENTS#######################################################à

for i in tqdm(range(70,2000)):
    for nam, lst in zip(names_lists, index_lists):
        if not os.path.isdir("../Data/ClassificationData_4vir/topology/"+nam):
            os.mkdir("../Data/ClassificationData_4vir/topology/"+nam)
        
        net = mxp.VirusMultiplex(lst[i], target_folder=target_folder, virus_metadata=virus_metadata)
        
        #components
        #lic_curr = mxp.topology.get_multi_LIC(net.g_list)
        #if len(lic_curr)!=0:
        #    lic_curr = itemgetter(*np.array(list(net.node_map.keys()))[lic_curr])(node_map_dict)
        #if type(lic_curr)==int:
        #    lic_curr = [lic_curr]
        lvc_curr = mxp.topology.get_multi_LVC(net.g_list, printt=False)
        if len(lvc_curr)!=0:
            lvc_curr = itemgetter(*np.array(list(net.node_map.keys()))[lvc_curr])(node_map_dict)
        if type(lvc_curr)==int:
            lvc_curr = [lvc_curr]
    
        #if not os.path.isfile("../Data/ComponentsNew/LIC_all/"+nam+".txt"):
        #    mxp.utils.writeComponent(fname="../Data/ComponentsNew/LIC_all/"+nam+".txt", ensemble=[lic_curr])
        #else:
        #    lic_list = mxp.utils.readComponent("../Data/ComponentsNew/LIC_all/"+nam+".txt")
        #    lic_list.append(lic_curr)
        #    mxp.utils.writeComponent(ensemble=lic_list, fname="../Data/ComponentsNew/LIC_all/"+nam+".txt")

        if not os.path.isfile("../Data/ComponentsNew/LVC_all/"+nam+".txt"):
            mxp.utils.writeComponent(fname="../Data/ComponentsNew/LVC_all/"+nam+".txt", ensemble=[lvc_curr])
        else:
            lvc_list = mxp.utils.readComponent("../Data/ComponentsNew/LVC_all/"+nam+".txt")
            lvc_list.append(lvc_curr)
            mxp.utils.writeComponent(ensemble=lvc_list, fname="../Data/ComponentsNew/LVC_all/"+nam+".txt")
        

 25%|████████▉                          | 490/1930 [3:48:30<15:24:19, 38.51s/it]

# Deleting onco virus 67

## Index Lists

In [None]:
onco_virus_indexes = np.where(np.array(np.all([virus_metadata["neigh_order"]==NEIGH_ORDER, virus_metadata["isOncogenic"] == True], axis=0)))[0]
onco_virus_indexes_del = np.delete(onco_virus_indexes, 3)

In [233]:
n_iters = 2000

#N
n_virus_indexes = SamplingForNoco(n_iters, group_dim=4, random_seed=41252145)


#N1O
n1o_virus_indexes = []
n1o_sampling = SamplingForNoco(n_iters, group_dim=3, random_seed=456)
for i in range(len(n1o_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes_del)
    n1o_virus_indexes.append(np.concatenate([n1o_sampling[i], [onco_pick]]))

#N2O
n2o_virus_indexes = []
n2o_sampling = SamplingForNoco(n_iters, group_dim=2, random_seed=17521)
for i in range(len(n2o_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes_del, 2)
    n2o_virus_indexes.append(np.concatenate([n2o_sampling[i], onco_pick]))
    
#N3O
comb = list(itertools.combinations(range(7), 3))
o1n_onco_comb = [list(onco_virus_indexes_del[list(comb[i])]) for i in range(len(comb))]

nonco_positions = np.where(np.all([np.array(virus_metadata["virus"]!="Human_SARS_coronavirus_2"),
                                  np.array(virus_metadata["virus_short"]!="Lymphocytic_choriomeningitis_virus"),
                                  np.array(virus_metadata["neigh_order"]==NEIGH_ORDER), 
                                  np.array(virus_metadata["isOncogenic"]==False)],
                                  axis=0))[0]
n3o_virus_indexes = random.sample(set(itertools.product(np.arange(len(o1n_onco_comb)), nonco_positions)), n_iters)
n3o_virus_indexes = [np.concatenate([list(o1n_onco_comb[i[0]]), [i[1]]]) for i in n3o_virus_indexes]

#O
comb = list(itertools.combinations(range(7), 4))
o_virus_indexes = np.array([list(onco_virus_indexes_del[list(comb[i])]) for i in range(len(comb))])


########################################################################
#N1S
Snonco_nonco_samples = SamplingForNoco(n_iters, group_dim=3, random_seed=4563)
n1s_virus_indexes = np.concatenate([Snonco_nonco_samples, np.repeat(Sars_pos,n_iters).reshape([n_iters,1])], axis=1)

#N1O1S
n1o1s_virus_indexes = []
n1o1s_sampling = SamplingForNoco(n_iters, group_dim=2, random_seed=783)
for i in range(len(n1o1s_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes_del)
    n1o1s_virus_indexes.append(np.concatenate([n1o1s_sampling[i], [onco_pick], [Sars_pos]]))
    
#N2O1S
n2o1s_virus_indexes = []
n2o1s_sampling = SamplingForNoco(n_iters, group_dim=1, random_seed=154)
for i in range(len(n2o1s_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes_del,2)
    n2o1s_virus_indexes.append(np.concatenate([n2o1s_sampling[i], onco_pick, [Sars_pos]]))
    
#O1S
comb = list(itertools.combinations(range(7), 3))
Sars_pos = np.where(np.array(np.all([virus_metadata["neigh_order"]==NEIGH_ORDER, virus_metadata["virus"]=="Human_SARS_coronavirus_2"], axis=0)))[0][0]
o1s_virus_indexes = np.array([list(onco_virus_indexes_del[list(comb[i])])+[Sars_pos] for i in range(len(comb))])

In [250]:
index_lists = [#n_virus_indexes,
               n1o_virus_indexes, 
               n2o_virus_indexes, 
               n3o_virus_indexes,
               #o_virus_indexes, 
               #n1s_virus_indexes,
               #n1o1s_virus_indexes,
               #n2o1s_virus_indexes,
               #o1s_virus_indexes
               ]

names_lists=[#"n", 
             "n1o", 
             "n2o", 
             "n3o", 
             #"o", 
             #"n1s", 
             #"n1o1s", 
             #"n2o1s", 
             #"o1s"
             ]
print(dict(zip(names_lists, [len(ioo) for ioo in index_lists])))

## Data

In [None]:
net = net_onco

for i in range(2000):
    print(i)
    for nam, lst in tqdm(zip(names_lists, index_lists)):
        if not os.path.isdir("../Data/ClassificationData_4vir/train/"+nam):
            os.mkdir("../Data/ClassificationData_4vir/train/"+nam)
        
        net = mxp.VirusMultiplex(lst[i], target_folder=target_folder, virus_metadata=virus_metadata)
        tensor = mxp.build.get_node_tensor_from_network_list(net.g_list)
        
        
        res_df = mxp.versatility.get_multi_RW_centrality_edge_colored(node_tensor=tensor, cval=0.15)
        

        list_res = np.array(list(net.node_map.keys()))[res_df.sort_values("vers", ascending=False).index[:50]]

        centr_norm = np.zeros(len(node_map_dict))
        centr_norm[np.array(itemgetter(*list(net.node_map.keys()))(node_map_dict))] = res_df["vers"].to_numpy()
        centr_norm=centr_norm/max(centr_norm)
        
        np.savetxt(X=centr_norm, fname="../Data/ClassificationData_4vir/train/"+nam+"/"+str(i)+".txt", fmt="%.6f")
        

# Always with virus 67

In [262]:
n_iters=100
#N1O
n1o_virus_indexes = []
n1o_sampling = SamplingForNoco(n_iters, group_dim=3, random_seed=456)
for i in range(len(n1o_sampling)):
    #onco_pick = np.random.choice(onco_virus_indexes_del)
    n1o_virus_indexes.append(np.concatenate([n1o_sampling[i], [67]]))

#N2O
n2o_virus_indexes = []
n2o_sampling = SamplingForNoco(n_iters, group_dim=2, random_seed=17521)
for i in range(len(n2o_sampling)):
    onco_pick = np.random.choice(onco_virus_indexes_del, 1)
    n2o_virus_indexes.append(np.concatenate([n2o_sampling[i], onco_pick,[67]]))
    
#N3O
comb = list(itertools.combinations(range(7), 2))
o1n_onco_comb = [list(onco_virus_indexes_del[list(comb[i])]) for i in range(len(comb))]

nonco_positions = np.where(np.all([np.array(virus_metadata["virus"]!="Human_SARS_coronavirus_2"),
                                  np.array(virus_metadata["virus_short"]!="Lymphocytic_choriomeningitis_virus"),
                                  np.array(virus_metadata["neigh_order"]==NEIGH_ORDER), 
                                  np.array(virus_metadata["isOncogenic"]==False)],
                                  axis=0))[0]
n3o_virus_indexes = set(itertools.product(np.arange(len(o1n_onco_comb)), nonco_positions))
n3o_virus_indexes = [np.concatenate([list(o1n_onco_comb[i[0]]), [i[1]],[67]]) for i in n3o_virus_indexes]
n3o_virus_indexes = random.sample(n3o_virus_indexes, n_iters)

In [263]:
index_lists = [#n_virus_indexes,
               n1o_virus_indexes, 
               n2o_virus_indexes, 
               n3o_virus_indexes,
               #o_virus_indexes, 
               #n1s_virus_indexes,
               #n1o1s_virus_indexes,
               #n2o1s_virus_indexes,
               #o1s_virus_indexes
               ]

names_lists=[#"n", 
             "n1o", 
             "n2o", 
             "n3o", 
             #"o", 
             #"n1s", 
             #"n1o1s", 
             #"n2o1s", 
             #"o1s"
             ]
print(dict(zip(names_lists, [len(ioo) for ioo in index_lists])))

{'n1o': 100, 'n2o': 100, 'n3o': 100}


In [264]:
net = net_onco

for i in tqdm(range(50,100)):
    #print(i)
    for nam, lst in zip(names_lists, index_lists):
        if not os.path.isdir("../Data/ClassificationData_4vir/val/"+nam):
            os.mkdir("../Data/ClassificationData_4vir/val/"+nam)
        
        net = mxp.VirusMultiplex(lst[i], target_folder=target_folder, virus_metadata=virus_metadata)
        tensor = mxp.build.get_node_tensor_from_network_list(net.g_list)
        
        
        res_df = mxp.versatility.get_multi_RW_centrality_edge_colored(node_tensor=tensor, cval=0.15)
        

        list_res = np.array(list(net.node_map.keys()))[res_df.sort_values("vers", ascending=False).index[:50]]

        centr_norm = np.zeros(len(node_map_dict))
        centr_norm[np.array(itemgetter(*list(net.node_map.keys()))(node_map_dict))] = res_df["vers"].to_numpy()
        centr_norm=centr_norm/max(centr_norm)
        
        np.savetxt(X=centr_norm, fname="../Data/ClassificationData_4vir/val/"+nam+"/"+str(i)+".txt", fmt="%.6f")
        

100%|███████████████████████████████████████████| 50/50 [06:38<00:00,  7.97s/it]


# With Synt Viruses

## 330 - Nonco

In [77]:
np.random.seed(123451)
n_iters=100
#N1YN
n1yn_sampling = SamplingForNoco(n_iters, group_dim=3, random_seed=456)
n1yn_virus_dir = [["../Data/SynteticViruses/Original/"+str(virus_metadata.loc[i, "virus_short"]) for i in ns] for ns in n1yn_sampling]
for i in range(len(n1yn_sampling)):
    n1yn_virus_dir[i].append("../Data/SynteticViruses/330_Nonco/"+str(np.random.randint(50)))
    
n_iters=100
#N1O

n1o1yn_sampling = SamplingForNoco(n_iters, group_dim=2, random_seed=456)
n1o1yn_virus_dir = [["../Data/SynteticViruses/Original/"+str(virus_metadata.loc[i, "virus_short"]) for i in ns] for ns in n1o1yn_sampling]
for i in range(len(n1o1yn_sampling)):
    n1o1yn_virus_dir[i].append("../Data/SynteticViruses/330_Nonco/"+str(np.random.randint(50)))
    n1o1yn_virus_dir[i].append("../Data/SynteticViruses/Original/"+str(virus_metadata.loc[np.random.choice(onco_virus_indexes), "virus_short"]))


In [78]:
index_lists = [n1yn_virus_dir,
               n1o1yn_virus_dir
               ]

names_lists=["n1yn", 
             "n1o1yn" 
             ]
print(dict(zip(names_lists, [len(ioo) for ioo in index_lists])))

{'n1yn': 100, 'n1o1yn': 100}


In [9]:
biostr_df = pd.read_csv("../Data/data_BIOGRID/BIOGRID_homo_sapiens.nodes", sep=" ")

biostr_map = dict(zip(biostr_df["nodeSymbol"], biostr_df["nodeID"]))

In [None]:
for i in tqdm(range(100)):
    #print(i)
    for nam, lst in zip(names_lists, index_lists):
        if not os.path.isdir("../Data/ClassificationDataALL_4vir/Test_Synt_330Nonco/"+nam):
            os.mkdir("../Data/ClassificationDataALL_4vir/Test_Synt_330Nonco/"+nam)
        
        net = mxp.VirusMultiplex_from_dirlist(lst[i])
        tensor = mxp.build.get_node_tensor_from_network_list(net.g_list)
        
        
        res_df = mxp.versatility.get_multi_RW_centrality_edge_colored(node_tensor=tensor, cval=0.15)
        

        list_res = np.array(list(net.node_map.keys()))[res_df.sort_values("vers", ascending=False).index[:50]]

        centr_norm = np.zeros(len(biostr_map))
        centr_norm[np.array(itemgetter(*list(net.node_map.keys()))(biostr_map))] = res_df["vers"].to_numpy()
        centr_norm=centr_norm/max(centr_norm)
        
        np.savetxt(X=centr_norm, fname="../Data/ClassificationDataALL_4vir/Test_Synt_330Nonco/"+nam+"/"+str(i)+".txt", fmt="%.3e")
        

## Distr - Sars

In [6]:
np.random.seed(123451)
n_iters=100
#N1YN
n1yn_sampling = SamplingForNoco(n_iters, group_dim=3, random_seed=456)
n1yn_virus_dir = [["../Data/SynteticViruses/Original/"+str(virus_metadata.loc[i, "virus_short"]) for i in ns] for ns in n1yn_sampling]
for i in range(len(n1yn_sampling)):
    n1yn_virus_dir[i].append("../Data/SynteticViruses/Distr_Sars/"+str(np.random.randint(50)))
    
n_iters=100
#N1O

n1o1yn_sampling = SamplingForNoco(n_iters, group_dim=2, random_seed=456)
n1o1yn_virus_dir = [["../Data/SynteticViruses/Original/"+str(virus_metadata.loc[i, "virus_short"]) for i in ns] for ns in n1o1yn_sampling]
for i in range(len(n1o1yn_sampling)):
    n1o1yn_virus_dir[i].append("../Data/SynteticViruses/Distr_Sars/"+str(np.random.randint(50)))
    n1o1yn_virus_dir[i].append("../Data/SynteticViruses/Original/"+str(virus_metadata.loc[np.random.choice(onco_virus_indexes), "virus_short"]))


In [7]:
index_lists = [n1yn_virus_dir,
               n1o1yn_virus_dir
               ]

names_lists=["n1yn", 
             "n1o1yn" 
             ]
print(dict(zip(names_lists, [len(ioo) for ioo in index_lists])))

{'n1yn': 100, 'n1o1yn': 100}


In [10]:
for i in tqdm(range(100)):
    #print(i)
    for nam, lst in zip(names_lists, index_lists):
        if not os.path.isdir("../Data/ClassificationDataALL_4vir/Test_Synt_DistrSars/"+nam):
            os.mkdir("../Data/ClassificationDataALL_4vir/Test_Synt_DistrSars/"+nam)
        
        net = mxp.VirusMultiplex_from_dirlist(lst[i])
        tensor = mxp.build.get_node_tensor_from_network_list(net.g_list)
        
        
        res_df = mxp.versatility.get_multi_RW_centrality_edge_colored(node_tensor=tensor, cval=0.15)
        

        list_res = np.array(list(net.node_map.keys()))[res_df.sort_values("vers", ascending=False).index[:50]]

        centr_norm = np.zeros(len(biostr_map))
        centr_norm[np.array(itemgetter(*list(net.node_map.keys()))(biostr_map))] = res_df["vers"].to_numpy()
        centr_norm=centr_norm/max(centr_norm)
        
        np.savetxt(X=centr_norm, fname="../Data/ClassificationDataALL_4vir/Test_Synt_DistrSars/"+nam+"/"+str(i)+".txt", fmt="%.3e")
        

100%|█████████████████████████████████████████| 100/100 [04:00<00:00,  2.40s/it]


# COEXPR

In [81]:
["a","b"]

TypeError: can only concatenate str (not "list") to str

In [84]:
np.random.seed(12345)
n_combs = [["../Data/data_STRING/coexpr_vir/nonco/"+a for a in np.random.choice(os.listdir("../Data/data_STRING/coexpr_vir/nonco"),4, replace=False)] for i in range(3000)]
n1o_combs = [np.concatenate([["../Data/data_STRING/coexpr_vir/nonco/"+a for a in np.random.choice(os.listdir("../Data/data_STRING/coexpr_vir/nonco"),3, replace=False)], ["../Data/data_STRING/coexpr_vir/onco/"+a for a in np.random.choice(os.listdir("../Data/data_STRING/coexpr_vir/onco"),1)]]) for i in range(3000)]

In [86]:
n1o_combs[0]

array(['../Data/data_STRING/coexpr_vir/nonco/Reovirus_type_1_strain_Lang',
       '../Data/data_STRING/coexpr_vir/nonco/Avian_leukosis_virus_RSA',
       '../Data/data_STRING/coexpr_vir/nonco/Norwalk_virus_strain_GI_Human_United_States_Norwalk_1968',
       '../Data/data_STRING/coexpr_vir/onco/Human_T-cell_leukemia_virus_1_isolate_Caribbea_HS-35_subtype_A'],
      dtype='<U98')

In [None]:
coexpr_df = pd.read_csv("../Data/data_STRING/coexpr.nodes", sep=" ")
coexpr_map = dict(zip(coexpr_df["Prot"], coexpr_df["ID"]))

for i in tqdm(range(3000)):
    for nam, lst in zip(["n","n1o"], [n_combs,n1o_combs]):
        if not os.path.isdir("../Data/ClassificationDataALL_4vir/topology_coex/"+nam):
            os.mkdir("../Data/ClassificationDataALL_4vir/topology_coex/"+nam)
        
        net = mxp.VirusMultiplex_from_dirlist(lst[i])
        tensor = mxp.build.get_node_tensor_from_network_list(net.g_list)
        
        
        res_df = mxp.versatility.get_multi_RW_centrality_edge_colored(node_tensor=tensor, cval=0.15)
        

        list_res = np.array(list(net.node_map.keys()))[res_df.sort_values("vers", ascending=False).index[:50]]

        centr_norm = np.zeros(len(biostr_map))
        centr_norm[np.array(itemgetter(*list(net.node_map.keys()))(coexpr_map))] = res_df["vers"].to_numpy()
        centr_norm=centr_norm/max(centr_norm)
        
        np.savetxt(X=centr_norm, fname="../Data/ClassificationDataALL_4vir/topology_coex/"+nam+"/"+str(i)+".txt", fmt="%.4e")

  7%|██▉                                     | 222/3000 [02:26<26:28,  1.75it/s]