In [None]:
import networkx as nx
import pandas as pd
import numpy as np
import sys
import random
import functools
import pylab as plt

import math
from statsmodels.nonparametric.smoothers_lowess import lowess
from scipy.ndimage import gaussian_filter1d
import copy
import csv

In [None]:

def sliding_hole_highest_mutation_robustness (graph, mg, num_skipping_nodes = 1, position = 0, num_points = -1):
    '''
    Elimino i nodi in ordine di mutazione, ma rimuovo dalla lista n nodi alla posizione "position" prima di far girare il codice.
    '''
    network = copy.deepcopy(graph)

    #initial number of nodes
    N = network.number_of_nodes()
    
    if num_points < 0 : num_points = N + 1

    f = np.arange(0, 1, 1/num_points).tolist()

    P_inf = np.arange(0, 1, 1/num_points).tolist()
    P_inf[0] = 1

    #entropy resilience
    S = np.arange(0, 1, 1/num_points).tolist() 
    S[0] = 0

    num_rem = N//(num_points-1) #number of removed nodes. Must be an integer


    mg_temp = copy.deepcopy(mg)
    N_mut = len(mg_temp)

    network_df = nx.to_pandas_edgelist(network)
    missing_gene_set = set(network_df['source']).union( set(network_df['target']) ) - set(mg['gene']) 
    mg_temp = pd.concat( [mg_temp, pd.DataFrame( { 'gene' : list( missing_gene_set ), 
                                                  'mutations' : [0]*len(missing_gene_set) } ) ], 
                       ignore_index = True)

    mg_temp.sort_values('mutations', ascending=False, inplace=True) #ordino il dataframe in ordine di numero di mutazioni

    mutated_nodes_skipped = [[]]*num_skipping_nodes
    
    # rimuovo num_skipping_nodes nodi mutati alla posizione position
    for k in range (position, position + num_skipping_nodes):
        mutated_nodes_skipped[k-position] = [mg_temp ['gene'].iloc[k], k]
        mg_temp = mg_temp.drop( mg_temp.loc[ mg_temp ['gene'] == mg_temp ['gene'].iloc[k] ].index )

    for l in range (1, num_points):

        if len(mg_temp) != 0:

            #Selection of one random node of the ones with maximum percentage
            nodes_removed = mg_temp.iloc[0,0]
            

            #if nodes_removed is not in the graph -> removes it and go on
            while (not (nodes_removed in network.nodes) ):

                mg_temp = mg_temp.drop( mg_temp.loc[ mg_temp ['gene'] == nodes_removed ].index )

                if len(mg_temp) == 0:
                    for i in range(l, len(P_inf)): P_inf[i] = P_inf[l-1]
                    for i in range(l, len(S)): S[i] = S[l-1]
                    for i in range(l, len(f)): f[i] = f[l-1]
                    break

                nodes_removed = mg_temp.iloc[0,0]
                
            
            mg_temp = mg_temp.drop( mg_temp.loc[ mg_temp ['gene'] == nodes_removed ].index )
                
        else:
            nodes_removed = random.sample( list(network.nodes()), 1 )[0]
                                          
                                          
        
        network.remove_node( nodes_removed )

        comp = [c for c in sorted(nx.connected_components(network), key=len, reverse=True)]
        
        #BREAK if there are no more components
        if len(comp) == 0:

            for i in range(l, len(P_inf)): P_inf[i] = 0       
            for i in range(l, len(S)): S[i] = 0
            for i in range(l, len(f)): f[i] = f[l-1]      
            print('Final P_inf = ',P_inf[l])
            break 

        max_indices = 0
        for i in range(1, len(comp)):
            if len(comp[i]) < len(comp[i-1]): break
            max_indices = i
        ind_max_comp = random.randint(0, max_indices) #if several components of the same max size choose randomly

        nodes_giant_comp = comp[ind_max_comp]


        P_inf[l] = len(nodes_giant_comp)/N 
        f[l] = f[l-1] + 1/N
        sum_comp = 0
        for k in range( 0, len(comp) ):
            p_i = len( comp[k] )/N
            sum_comp = sum_comp + p_i*math.log(p_i)

        S[l] = -1/math.log(N)*sum_comp


    hi_mutation_removal_dataframe = pd.DataFrame({'f': f, 'S': S, 'P_inf': P_inf})
    
    return hi_mutation_removal_dataframe, mutated_nodes_skipped

In [None]:
from datetime import datetime
import multiprocessing as mp
import os

def sliding_hole_highest_mutation_robustness_all_positions_parallel(
    graph, 
    mutation_map_filename, 
    num_nodes_per_step=1, 
    init_pos=0, 
    fin_pos=None, 
    num_points=-1, 
    num_core=1, 
    path=''
):
    """
    Esegue sliding_hole_highest_mutation_robustness per tutte le posizioni da init_pos a fin_pos (escluso), in parallelo.
    """

    # Carica la mutation map
    mg = pd.read_csv(mutation_map_filename, delim_whitespace=True, names=['gene', 'mutations'])
    mg = mg.drop(0)
    mg['mutations'] = mg['mutations'].astype('float64')

    # Tengo solo i geni presenti nel grafo
    mg = mg[mg['gene'].isin(graph.nodes())]

    mg_temp = copy.deepcopy(mg)
    N_mut = len(set(graph.nodes()).intersection(set(mg_temp['gene'])))

    if fin_pos is None:
        fin_pos = N_mut

    if path == '':
        path = './Sliding_hole_all_positions_' + datetime.now().strftime("%d-%m-%Y_%H:%M:%S")

    if not os.path.exists(path):
        os.mkdir(path)

    # Controllo: escludi le posizioni che hanno già la cartella
    positions_list = [
        pos for pos in range(init_pos, fin_pos, num_nodes_per_step)
        if not os.path.exists(f"{path}/position_{pos}")
    ]

    if num_core > len(positions_list):
        num_core = len(positions_list)

    positions_chunks = np.array_split(positions_list, num_core)

    def task(graph, mg_temp, num_nodes_per_step, positions_list, num_points, original_path, j):
        for pos in positions_list:
            sub_path = f"{original_path}/position_{pos}"
            if os.path.exists(sub_path):
                continue  # Salta se la cartella già esiste
            results_list, removed_list = sliding_hole_highest_mutation_robustness(
                graph, mg_temp, num_nodes_per_step, pos, num_points
            )
            os.mkdir(sub_path)
            results_list.to_csv(f"{sub_path}/removal_dataframe.txt")
            with open(f"{sub_path}/genes_removed_from_mg.txt", mode='w', newline='') as file_csv:
                writer = csv.writer(file_csv)
                writer.writerow(['Gene', 'Posizione'])
                writer.writerows(removed_list)

    processes = []
    for j in range(num_core):
        process = mp.Process(
            target=task,
            args=(graph, mg_temp, num_nodes_per_step, positions_chunks[j], num_points, path, j)
        )
        process.start()
        processes.append(process)

    for process in processes:
        process.join()

    return

In [None]:
mg_path = "/home/PERSONALE/nicolas.biondini2/Interactomes/Info_Tumori/variable_mg_PANCAN_10000.txt"

init_pos =  0
fin_pos =  16750 #number of mutated nodes in the graph_full


sliding_hole_highest_mutation_robustness_all_positions_parallel (
    graph = graph_full, 
    mutation_map_filename = mg_path,
    num_nodes_per_step = 1,
    init_pos = init_pos, 
    fin_pos = fin_pos, 
    num_core = 30, 
    path = '/home/PERSONALE/nicolas.biondini2/Interactomes/NEW_LIST-Sliding_hole_high_mutations_FULL')