In [1]:
import networkx as nx
from matplotlib import pyplot, patches
from scipy import io
import numpy as np
from collections import defaultdict
import community
import pickle
from tqdm import tqdm
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt 
import glob, os
import csv

In [14]:

# %%
def structural_network_analysis(fname):
    '''
    function to perform structural anaylsis in binary unipartite and bipartite networks
    by means of nestedness (as defined in ASR et al, PRE 2018), in-block nested and modularity.

    The optimization of modularity and in-block nestedness is done employing the extremal optimization
    algorithm.

    Inputs:
    ----------
       fname: list

           fname[0]: name of the network file to read
           fname[1]: boolean to indicate if "filename" is a bipartite (True) or
           unipartite (False) network
           fname[2]: boolean indicating the format of the data file. Three-column
           or edge list (True) or matrix format (False)
           fname[3]: int for repeat
           fname[4]: matrix input


    '''
    name = "results_" + str(os.path.basename(fname[0]).split('.csv')[0]) + ".npz"
    if len(glob.glob(name)) == 0:
       # if fname[2] == True:  # wheter the data is edge list or adajcency matrix
           # M = from_edges_to_matrix(fname[0], fname[1])
       # else:
           # M = np.loadtxt(fname[0], dtype='int', delimiter=',')
        M=fname[4]

        '''starting the structural analysis '''
        print('starting the structural analysis', str(os.path.basename(fname[0]).split('.csv')[0]))

        if fname[1] == True:  # if the network is bipartite or not
            cols_degr = M.sum(axis=0)
            row_degr = M.sum(axis=1)
            R, C = M.shape  # rows and cols
            # Nestednes
            # In-block nestedness with B=1
            Cn_ = [np.repeat(1, R), np.repeat(1, C)]
            max_blockN = max(max(Cn_[0]), max(Cn_[1])) + 1
            lambdasN = extremal_bi.call_lambda_i(M, cols_degr, row_degr, Cn_[1], Cn_[0], max_blockN, True)
            nestedness_ = extremal_bi.calculate_Fitness(M, cols_degr, row_degr, lambdasN[0], lambdasN[1], True)

            # Modularity Extremal
            C_ = extremal_bi.recursive_step(M, cols_degr, row_degr, .7, 3, False)
            max_blockQ = max(max(C_[0]), max(C_[1])) + 1
            lambdasQ = extremal_bi.call_lambda_i(M, cols_degr, row_degr, C_[1], C_[0], max_blockQ, False)
            Q_ = extremal_bi.calculate_Fitness(M, cols_degr, row_degr, lambdasQ[0], lambdasQ[1], False)

            # Inblock nestedness extremal
            Ci_ = extremal_bi.recursive_step(M, cols_degr, row_degr, .7, 3, True)
            max_blockI = max(max(Ci_[0]), max(Ci_[1])) + 1
            lambdasI = extremal_bi.call_lambda_i(M, cols_degr, row_degr, Ci_[1], Ci_[0], max_blockI, True)
            I_ = extremal_bi.calculate_Fitness(M, cols_degr, row_degr, lambdasI[0], lambdasI[1], True)

            ''' Saving results of analysis'''
            print('saving results for', str(os.path.basename(fname[0]).split('.csv')[0]))
            dfq = pd.DataFrame({'rows': pd.Series(C_[0]), 'cols': pd.Series(C_[1])})
            dfi = pd.DataFrame({'rows': pd.Series(Ci_[0]), 'cols': pd.Series(Ci_[1])})
            dfq.to_csv(str(os.path.basename(fname[0]).split('.csv')[0]) + "/modularity_partitions_" + str(
                os.path.basename(fname[0]).split('.csv')[0]) + ".csv", index=False, float_format='%.0f')
            dfi.to_csv(str(os.path.basename(fname[0]).split('.csv')[0]) + "/in-block_partitions_" + str(
                os.path.basename(fname[0]).split('.csv')[0]) + ".csv", index=False, float_format='%.0f')
            np.savez_compressed("results_" + str(os.path.basename(fname[0]).split('.csv')[0]) + ".npz", N=nestedness_,
                                Q=Q_, I=I_)

        else:
            cols_degr = M.sum(axis=0)
            row_degr = M.sum(axis=1)
            R, C = M.shape  # rows and cols
            # Nestednes
            # IBN with B=1
            Cn_ = np.repeat(1, C).tolist()
            max_blockN = max(Cn_) + 1
            lambdasN = extremal_uni.call_lambda_i(M, cols_degr, Cn_, max_blockN, True)
            nestedness_ = extremal_uni.calculate_Fitness(M, cols_degr, lambdasN, True)  # in-block nestedness value

            # Modularity
            C_ = extremal_uni.recursive_step(M, cols_degr, .7, 3, False)  # vector with labels of partitions
            max_blockQ = max(C_) + 1
            lambdasQ = extremal_uni.call_lambda_i(M, cols_degr, C_, max_blockQ, False)
            Q_ = extremal_uni.calculate_Fitness(M, cols_degr, lambdasQ, False)  # modularity value
            
            lambdasI_Q = extremal_uni.call_lambda_i(M, cols_degr, C_, max_blockQ, True)
            I_maxq_ = extremal_uni.calculate_Fitness(M, cols_degr, lambdasI_Q, True)  # in-block nestedness value ON MAXmodularity

            # Inblock nestedness
            Ci_ = extremal_uni.recursive_step(M, cols_degr, .7, 3, True)  # vector with labels of partitions
            max_blockI = max(Ci_) + 1
            lambdasI = extremal_uni.call_lambda_i(M, cols_degr, Ci_, max_blockI, True)
            I_ = extremal_uni.calculate_Fitness(M, cols_degr, lambdasI, True)  # in-block nestedness value

            ''' Saving results of analysis'''
            print('saving results for', str(os.path.basename(fname[0]).split('.csv')[0]))
            dfq = pd.DataFrame({'rows': pd.Series(C_), 'cols': pd.Series(C_)})
            dfi = pd.DataFrame({'rows': pd.Series(Ci_), 'cols': pd.Series(Ci_)})
            dfq.to_csv(str(os.path.basename(fname[0]).split('.csv')[0]) + "/" + str(
                fname[3]) + "/modularity_partitions_" + str(os.path.basename(fname[0]).split('.csv')[0]) + ".csv",
                       index=False, float_format='%.0f')
            dfi.to_csv(
                str(os.path.basename(fname[0]).split('.csv')[0]) + "/" + str(fname[3]) + "/in-block_partitions_" + str(
                    os.path.basename(fname[0]).split('.csv')[0]) + ".csv", index=False, float_format='%.0f')
            np.savez_compressed("results_" + str(os.path.basename(fname[0]).split('.csv')[0]) + ".npz", N=nestedness_,
                                Q=Q_, I=I_, I_MQ=I_maxq_)


# %%
def str_to_bool(s):
    if s == 'True':
        return True
    elif s == 'False':
        return False
    else:
        raise ValueError


# %%
def arguments_list_to_pool(argv0, argv1, argv3, repeat,adjmat):
    '''
    Function that generate the list of lists with the arguments needed to perfomr structural
    analysis of different networks in parallel processes.

    inputs:
    ----------
    args[0]:
        directory where the network file to read are
   args[1]:
       boolean to indicate if "filename" is a bipartite (True) or
       unipartite (False) network
   args[2]:
       boolean indicating the format of the data file. Three-column
       or edge list (True) or matrix format (False)
    '''
    path = str(argv0)
    filenames = sorted(glob.glob(path + "*.csv"))
    bipartite = bool(argv1)
    edges = bool(argv3)
    parameters = list()
    [parameters.append([filenames[i], bipartite, edges, repeat, adjmat]) for i in range(len(filenames))]
    return parameters


In [None]:
if __name__ == '__main__':
    #    print('parameters')
    # repeat
    for repeat in range(10):
        # find the subfile in folder(4 for each)
        address=str(sys.argv[1])
        for root, _, files in os.walk(address):
            for file in files:
                class_address=address+"/"+file
       
               # load matrix and calculate
                with open(class_address, 'rb') as fp:
                    pkl_file= pickle.load(fp)
                for i in pkl_file.keys():
                    adjacency_matrix = nx.to_numpy_array(pkl_file[i],weight=None).tolist()
        
                    parameters = arguments_list_to_pool(sys.argv[1], str_to_bool(sys.argv[2]), str_to_bool(sys.argv[3]), repeat, adjacency_matrix)

                    if not os.path.exists(str(sys.argv[1]) + "/" + str(repeat) + "/"):
                        os.makedirs(str(sys.argv[1]) + "/" + str(repeat) + "/")


                    n_cpus = multi.cpu_count()
                    if n_cpus > 3:
                        nc = n_cpus - 1
                    else:
                        nc = 1
                    pool = multi.Pool(processes=nc)
                    pool.map(structural_network_analysis, parameters)
                    pool.terminate()
                    del (pool)

                    filenames = sorted(glob.glob("results_*.npz"))
                    N = []
                    Q = []
                    I = []
                    I_MQ=[]
                    fi = []
                    for f in filenames:
                        ff = (os.path.basename(f).split('.npz')[0])
                        fi.append((os.path.basename(ff).split('results_')[1]))
                        data = np.load(f)
                        N.append((data['N']))
                        Q.append((data['Q']))
                        I.append((data['I']))
                        I_MQ.append((data['I_MQ']))

                    df = pd.DataFrame()
                    df['name'] = fi
                    df['N'] = N
                    df['Q'] = Q
                    df['I'] = I
                    df['I_MQ'] = I_MQ
                    df.to_csv(str(sys.argv[1]) + "/" + str(repeat) + "/data_structures_NQI_results" + str(sys.argv[1]) + ".csv",
                              index=False, sep=',')
                    for f in filenames:
                        os.remove(f)

In [9]:
def list_pkl_files(folder_path):
    pkl_files = []

    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".pkl"):
                print(file)
                pkl_files.append(os.path.join(root, file))
    
    return pkl_files



In [11]:
folder_path = 'null_model_generate/2018-01-12'  # Replace with the path to your folder
pkl_files = list_pkl_files(folder_path)

print("Pickle (.pkl) Files:")
for pkl_file in pkl_files:
    print(pkl_file)

EE.pkl
FF.pkl
PEEP.pkl
PP.pkl
Pickle (.pkl) Files:
null_model_generate/2018-01-12\EE.pkl
null_model_generate/2018-01-12\FF.pkl
null_model_generate/2018-01-12\PEEP.pkl
null_model_generate/2018-01-12\PP.pkl


In [13]:
        address='null_model_generate/2018-01-12' 
        for root, _, files in os.walk(address):
            for file in files:
                class_address=address+"/"+file
                print(class_address)

null_model_generate/2018-01-12/EE.pkl
null_model_generate/2018-01-12/FF.pkl
null_model_generate/2018-01-12/PEEP.pkl
null_model_generate/2018-01-12/PP.pkl


In [24]:
for repeat in range(3):
        # find the subfile in folder(4 for each)
        address='null_model_generate/2018-01-12'
        for root, _, files in os.walk(address):
            for file in files:
                class_address=address+"/"+file
                print(file.split('.pkl')[0])
       
               # load matrix and calculate
                with open(class_address, 'rb') as fp:
                    pkl_file= pickle.load(fp)
                for i in pkl_file.keys():
                    adjacency_matrix = nx.to_numpy_array(pkl_file[i],weight=None).tolist()
                    print(adjacency_matrix)
        
                    parameters = arguments_list_to_pool('null_model_generate/2018-01-12', False, False, repeat)


EE
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0

[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]


[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 0.0], [0.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]


[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]
[[0.0, 1.0], [1.0, 0.0]]


In [18]:
parameters

[]

In [20]:
adjacency_matrix 

[[0.0, 0.0], [0.0, 0.0]]

In [39]:
def arguments_list_to_pool(argv0, argv1, argv3, repeat,adjmat,nulltype,nullindex):
    '''
    Function that generate the list of lists with the arguments needed to perfomr structural
    analysis of different networks in parallel processes.

    inputs:
    ----------
    args[0]:
        directory where the network file to read are
   args[1]:
       boolean to indicate if "filename" is a bipartite (True) or
       unipartite (False) network
   args[2]:
       boolean indicating the format of the data file. Three-column
       or edge list (True) or matrix format (False)
    '''
    path = str(argv0)
   # filenames = sorted(glob.glob(path + "*.csv"))
    bipartite = bool(argv1)
    edges = bool(argv3)
    nulltype = str(nulltype)
    nullindex = str(nullindex)

    parameters = list()
    [parameters.append([path, bipartite, edges, repeat, adjmat,nulltype,nullindex]) ]
    return parameters



In [40]:
    for repeat in range(3):
        # find the subfile in folder(4 for each)
        Address = 'null_model_generate/2018-01-14'
        for root, _, files in os.walk(Address):
            for file in files:
                class_address = Address + "/" + file

                # load matrix and calculate
                with open(class_address, 'rb') as fp:
                    pkl_file = pickle.load(fp)
                for i in pkl_file.keys():
                    adjacency_matrix = nx.to_numpy_array(pkl_file[i], weight=None).tolist()

                    parameters = arguments_list_to_pool(Address, str_to_bool('True'), str_to_bool('True'),
                                                        repeat, adjacency_matrix,file.split('.pkl')[0] ,i)

In [41]:
parameters

[['null_model_generate/2018-01-14',
  True,
  True,
  2,
  [[0.0, 1.0], [1.0, 0.0]],
  'PP',
  '199']]