# Part 1: Random Graph generators

In [1]:
import numpy as np 
import pandas as pd
import networkx as nx
import random
import math 

The following function was used in order to generate the 800 synthetic graphs used to test effectiveness of collaborativity formulas. Each of these graphs was created from these parameters:

*   `prob_resp` Probability that the node being added to the network responded the survey.
*   `prob_nuevo` Probability that a respondent's new connection is towards a new node.
*   `respondents_range` Range in which the number of respondents has to be found.
*   `connections_range` Range in which the number of connections out from a respondent has to be found. 



In [2]:
def random_graph( prob_resp=0.25, prob_nuevo=0.55, respondents_range=(20,30), connections_range=(9,18) ):
    """
    Generator of synthetic random graphs intending to simulate the method with which innovation-driven
    entrepreneurship networks were mapped by Global Ecosystems Dynamics Initiative at MIT.
    
    Inputs:
    prob_resp          float   probability that a given node has responded the survey 
    prob_nuevo         float   probability that a respondent is connected to new nodes
    respondents_range  tuple   range in which no. respondents has to be found
    connections_range  tuple   range in which no. connections of a respondent has to be found

    Output: 
    DiGraph. 
    """
    G=nx.Graph()
    queue=["0"]
    max_index=0
    max_respondents=random.randint(respondents_range[0],respondents_range[1])             #Indicador que nos dice cuantos respondientes puede haber
    respondents=0
    
    responses = dict()
    
    while len(queue)!=0:
        if (random.random()<prob_resp and respondents<max_respondents) or queue[0]=="0":      #Crear conexiones para un respondiente
            respondents+=1
            n=random.randint(connections_range[0],connections_range[1])
            
            responses[queue[0]] = n
            #nx.set_node_attributes(G, {queue[0]:n},'responses')
            
            new_n=0
            old_n=0
            for i in range(n):                                              #Calcular cuantas conexiones son con nodos nuevos y cuantos con ya existentesz
                if random.random()<prob_nuevo:
                    new_n+=1
                else:
                    old_n+=1
            for i in range(new_n):
                new_node=str(i+max_index+1)
                queue.append(new_node)
                G.add_node(new_node)
                G.add_edge(queue[0],new_node)
            for i in range(old_n):
                node=str(random.randint(0,int(queue[0]))-1)
                G.add_edge(queue[0],node)
            max_index+=new_n
            queue.pop(0)
        else:                                                                #Crear conexiones para los no respondientes
            if random.random()<0.25:
                n=random.randint(1,6)
                for i in range(n):
                    node=str(random.randint(0,int(queue[0]))-1)
                    G.add_edge(queue[0],node)
            queue.pop(0)
    
    nx.set_node_attributes(G, name='responses', values=responses)
    
    if len(G.nodes())<120 or len(G.nodes())>400:
        return random_graph(prob_resp,prob_nuevo)
    else:
        return G

In [3]:
"""
Cambiar probabilidad de nuevas conexiones 
(ordenadas de menos a mas colaboracion)
"""    
#for i in range(100):
#    nx.write_graphml(random_graph(prob_nuevo=0.55+(100-i)/400),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Nuevas_Conexiones/Random_Graph_Nuevas_Conexiones'+str(i)+'.graphml')
#for i in range(100):
#    nx.write_graphml(random_graph(prob_nuevo=0.55-i/400),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Nuevas_Conexiones/Random_Graph_Nuevas_Conexiones'+str(100+i)+'.graphml')

"""
Cambiar probabilidad de que haya respondientes del cuestionaro
(no se si haya cambio en colaboracion posiblemente no)
"""
#for i in range(100):
#    nx.write_graphml(random_graph(prob_resp=0.40-(100-i)/400),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Respondientes/Random_Graph_Respondientes'+str(i)+'.graphml')
#for i in range(100):
#    nx.write_graphml(random_graph(prob_resp=0.40+i/400),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Respondientes/Random_Graph_Respondientes'+str(100+i)+'.graphml')

"""
Cambiar rango de numero de respondientes por grafica 
(de menos a mas colaboracion)
"""
#for i in range(100):
#    nx.write_graphml(random_graph(respondents_range=(int(35-(100-i)/4),int(35-(100-i)/4)),prob_nuevo=0.3),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Rango_Respondientes/Random_Graph_Rango_Respondientes'+str(i)+'.graphml')
#for i in range(100):
#    nx.write_graphml(random_graph(respondents_range=(int(35+i/4),int(35+i/4)),prob_nuevo=0.3),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Rango_Respondientes/Random_Graph_Rango_Respondientes'+str(100+i)+'.graphml')

"""
Cambiar rango de numero de respuestas por respondiente 
(de menos a mas colaboracion)
"""
#for i in range(100):
#    nx.write_graphml(random_graph(connections_range=(int(15-(100-i)/10),int(24-(100-i)/10))),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Numero_Respuestas/Random_Graph_Numero_Respuestas'+str(i)+'.graphml')
#for i in range(100):
#    nx.write_graphml(random_graph(connections_range=(int(15+i/10),int(24+i/10))),\
#        'Synthetic_Graphs_Colab_Methodology/Random_Graphs_Numero_Respuestas/Random_Graph_Numero_Respuestas'+str(100+i)+'.graphml')

'\nCambiar rango de numero de respuestas por respondiente \n(de menos a mas colaboracion)\n'

In [4]:
from numpy.random import choice

def are_adjacent(u,v,G):
    if v not in G.nodes():
        return False
    elif v in G.neighbors(u):
        return True
    else:
        return False

def random_graph_2(n_evals, prob_out, prob_new):
    '''
    Generator of a random graph, given the number of respondents of the questionnaire, and 
    the fact that each of them could have provided 25 responses maximum.
    
    Input:
     - n_evals  : number of evaluators responding questionnaire
     - prob_out : probability that a mentionned collaboration is outside of the network of evaluators
    
    '''
    
    G=nx.DiGraph()
    setedges = set()
    
    inside = dict()
    outside = dict()
    
    last_index = n_evals-1
    
    for i in range(n_evals):              #add all evaluators to the graph
        G.add_node(i)
        inside[i] = 0
        
    # evaluator |--> no. of collaborations,   in (0,25)~ normal distribution
    sample_num_evaluations = np.random.normal(loc=12.3, scale=4, size=n_evals)
    list_degrees_evaluators = [int(x) for x in sample_num_evaluations]
    
    for i in inside.keys():
        for j in range(list_degrees_evaluators[i]):
            
            if random.random()<prob_out:           #edge added outside evaluators' list
                if outside == dict():              # if list outside evaluators is new
                    last_index+=1
                    to = last_index
                    G.add_node(to)
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to] = 1
                    inside[i] += 1
                else:
                    if random.random()<prob_new:               # if edge goes to a new vertex
                        last_index+=1       
                        to=last_index
                        G.add_node(to)
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to] = 1
                        inside[i] +=1
                        
                    else:
                        vertices, degrees = [], []
                        for vertex, degree in outside.items():
                            if vertex!=i:
                                vertices.append(vertex)
                                degrees.append(degree+1)
                        s = sum(degrees)
                        w = [x/s for x in degrees]
                        to = choice(vertices, size=1, p=w)[0]
                        if are_adjacent(i,to,G):
                            to = choice(vertices, size=1, p=w)[0]
                        G.add_node(to)
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to]+=1
                        inside[i]+=1
                        
            else:
                vertices, degrees = [], []
                for vertex, degree in inside.items():
                    if vertex != i:
                        vertices.append(vertex)
                        degrees.append(degree+1)
                s = sum(degrees)
                w = [x/s for x in degrees]
                to = choice(vertices, size=1, p=w)[0]
                if are_adjacent(i,to,G):
                    to = choice(vertices, size=1, p=w)[0]
                G.add_node(to)
                G.add_edge(i,to)
                setedges.add((i,to))
                inside[to]+=1
                inside[i]+=1
    return G         


list_cities = ['Aguascalientes', 'Buenos Aires', 'Ciudad de México', 'Guadalajara', 'Hidalgo',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago de Chile']
num_evaluators = {'Aguascalientes':19, 'Buenos Aires':31, 'Ciudad de México':36, 'Guadalajara':32, 'Hidalgo':19,
                  'Madrid':37, 'Montevideo':48, 'Oaxaca':36, 'Sao Paulo':28, 'Santiago de Chile':25}
for city in list_cities:
    G= random_graph_2(num_evaluators[city],0.2, 0.5)
    nx.write_graphml(G,'Not_used_anymore/Random_Graphs_Second_Type_Corrected/Random_Graph_'+city+'.graphml')


In [40]:
muestra = pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - Muestra.csv')             


ags_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - AGS.csv')             
caba_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - CABA.csv')             
cdmx_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - CDMX.csv')             
gdl_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - GDL.csv')             
hgo_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - HGO.csv')             
mad_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - MAD.csv')             
mtv_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - MVD.csv')             
oax_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - OAX.csv')             
sao_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - SAO.csv')             
scl_info=pd.read_csv('Ecosystems_from_GED/Data_answers_evaluators/Evaluadores ecosistemas.xlsx - SCL.csv')  

ags_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
gdl_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
hgo_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
oax_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)


list_cities = ['AGS', 'CABA', 'CDMX', 'GDL', 'Pachuca',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago']
cities_info = {'AGS': ags_info, 
              'CABA' : caba_info,
              'CDMX' : cdmx_info,
              'GDL' : gdl_info,
              'Pachuca' : hgo_info,
              'Madrid' : mad_info,
              'Montevideo' : mtv_info,
              'Oaxaca' : oax_info,
              'Sao Paulo' : sao_info,
              'Santiago' : scl_info}

muestra

Unnamed: 0.1,Unnamed: 0,Madrid,Madrid SG,Madrid Typeform,CDMX,Santiago,CABA,Sao Paulo,Montevideo,Oaxaca,GDL,Pachuca,AGS
0,Muestra,38,16,24,51,30,36,34,59,36,32,21,19
1,Nodes,239,120,156,299,195,228,216,198,149,187,125,96
2,Edges,453,166,205,547,385,500,364,767,326,474,254,233
3,Promedio colabs. por participante,12.13,10.13,8.63,12.33,13.04,13.48,10.38,13.4,6.39,10.81,9.52,8.21
4,% Muestra,15.90%,13.33%,15.38%,17.06%,15.38%,15.79%,15.74%,29.80%,24.16%,17.11%,16.80%,19.79%


In [41]:
citygraphs = dict()
for city in list_cities:
    citygraphs[city] = nx.read_graphml('GraphMLs/'+city+' original graph.graphml')

#citygraphs

In [42]:
'''Third Random Graph Generator'''

def eliminate_small_components(G):
    '''
    We make sure there are no disconnected components. 
    '''
    to_remove = []
    
    for x in G.nodes():
        if len(list(nx.node_connected_component(nx.to_undirected(G),x)))< 20 :
            to_remove.append(x)
    for x in to_remove:
        G.remove_node(x)
    
    if nx.number_connected_components(nx.to_undirected(G))>1:
        print('****')    
    return G



def random_graph_3(infos, city, prob_out, prob_new):
    '''
    With this function, we are simulating the responses we obtained from each of the surveyed ecosystems/cities. 
    The information we have is, for each evaluator, how many 
    
    Generator of a random graph, given the number of respondents of the questionnaire,
    the number of evaluators of collaborations, and the number of collaborations reported by each of them.
    
    Input:
     - infos : dictionary mapping from city to num_collab information
     - city : name of city to be simulated
     - prob_out : probability that a mentionned collaboration is outside of the network of evaluators
     - prob_new : probability that a mentionned collaboration is a newly mentionned org
     
    '''
    
    G=nx.DiGraph()
    setedges = set()
    
    
    #degrees of nodes inside and outside respondent list
    inside = dict()
    outside = dict()
    
    
    #determine number of organizations that answered and number of evaluators: 
    n_responded = int(muestra[city][0])
    n_evals = len(list(infos[city]['Colabs']))
    last_index = n_responded
    
    
    #number of total nodes, and number of edges:                  #still don't know how to use
    num_nodes = int(muestra[city][1])
    num_edges = int(muestra[city][2])
    
    for i in range(last_index):              #add all evaluators to the graph
        G.add_node(i)
        if i in range(n_evals):
            inside[i] = 0
        else:
            outside[i] = 0
        
    
    for i in inside.keys():
        
        deg_i = infos[city]['Colabs'][i]         # the degree of that node is in the info retrieved
        for j in range(deg_i):
            
            if random.random() < prob_out:           #edge added outside evaluators' list
                if outside == dict():                              # if list outside evaluators is new
                    #last_index+=1
                    to = last_index
                    G.add_node(to)
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to] = 1
                    inside[i] += 1
                else:
                    if random.random()<prob_new:               # if edge goes to a new vertex
                        last_index+=1       
                        to=last_index
                        G.add_node(to)
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to] = 1
                        inside[i] +=1
                        
                    else:
                        vertices, degrees = [], []
                        for vertex, degree in outside.items():
                            if vertex!=i:
                                vertices.append(vertex)
                                degrees.append(degree+1)
                        s = sum(degrees)
                        w = [x/s for x in degrees]
                        to = choice(vertices, size=1, p=w)[0]
                        if are_adjacent(i,to,G):
                            to = choice(vertices, size=1, p=w)[0]
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to]+=1
                        inside[i]+=1
                        
            else:
                vertices, degrees = [], []
                for vertex, degree in inside.items():
                    if vertex != i:
                        vertices.append(vertex)
                        degrees.append(degree+1)
                s = sum(degrees)
                w = [x/s for x in degrees]
                to = choice(vertices, size=1, p=w)[0]
                if are_adjacent(i,to,G):
                    to = choice(vertices, size=1, p=w)[0]
                G.add_edge(i,to)
                setedges.add((i,to))
                inside[to]+=1
                inside[i]+=1
                
    # now we have the degree of each evaluator covered, 
    # but also some of the orgs responded with
    # 2 key orgs in their development and establishment
    
    for i in range(n_responded):
        for j in range(2):

            if random.random()<prob_out:           #edge added outside evaluators' list
                if random.random()<prob_new:               # if edge goes to a new vertex
                    last_index+=1       
                    to=last_index
                    G.add_node(to)
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to] = 1
                    if i in outside.keys():
                        outside[i]+=1  
                    elif i in inside.keys():
                        inside[i]+=1 
                        
                else:
                    vertices, degrees = [], []
                    for vertex, degree in outside.items():
                        if vertex!=i:
                            vertices.append(vertex)
                            degrees.append(degree+1)
                    s = sum(degrees)
                    w = [x/s for x in degrees]
                    to = choice(vertices, size=1, p=w)[0]
                    if are_adjacent(i,to,G):
                        to = choice(vertices, size=1, p=w)[0]
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to]+=1
                    if i in outside.keys():
                        outside[i]+=1  
                    elif i in inside.keys():
                        inside[i]+=1 
                        
            else:
                vertices, degrees = [], []
                for vertex, degree in inside.items():
                    if vertex != i:
                        vertices.append(vertex)
                        degrees.append(degree+1)
                s = sum(degrees)
                w = [x/s for x in degrees]
                to = choice(vertices, size=1, p=w)[0]
                if are_adjacent(i,to,G):
                    to = choice(vertices, size=1, p=w)[0]
                G.add_edge(i,to)
                setedges.add((i,to))
                inside[to]+=1
                if i in outside.keys():
                    outside[i]+=1  
                elif i in inside.keys():
                    inside[i]+=1  
    
    F=eliminate_small_components(G)
    return F
                
            
for city in list_cities:
    G = random_graph_3(cities_info,city,0.8, 0.6)
    nx.write_graphml(G,'Not_used_anymore/Random_Graphs_Third_Type_Corrected/Random_Graph_'+city+'.graphml')

# Part 2: Helper functions

We work with the helper functions for the four different types of average, and for the different measures we can take from an ecosystem.  

In [43]:
#-----------------------
# Averages
#-----------------------

def quadratic(L):
    return ( sum([x**2 for x in L])/len(L) )**0.5
    
def arithmetic(L):
    return sum(L)/len(L)

def geometric(L):
    prod = 1
    for x in L:
        prod *=x
    return prod**(1/len(L))

def harmonic(L):
    n = len(L)
    sum_reciprocals = sum([1/x for x in L])
    return n/sum_reciprocals


mean_map = {'quadratic': quadratic, 
           'arithmetic': arithmetic, 
           'geometric': geometric, 
           'harmonic': harmonic}

In [44]:
#-----------------------
# Graph metrics!
#-----------------------

def degree(G,U):
    return np.mean([G.degree(x) for x in G.nodes()])

def eccentricity(G,U):
    return np.mean([nx.eccentricity(U,x) for x in U.nodes()])

def clustering(G,U):
    total = 0
    count = 0
    for v in U.nodes():
        if nx.degree(U,v) >= 2:          #solo el clustering coeff de nodos con deg>=2
            total += nx.clustering(U,v)
            count += 1
    return total/count

def diameter(G,U):
    return nx.diameter(U)

def radius(G,U):
    return nx.radius(U)

def avg_shortest_path_length(G,U):
    return nx.average_shortest_path_length(U)

def transitivity(G,U):
    return nx.transitivity(U)

def global_efficiency(G,U):
    return nx.global_efficiency(U)

def small_worldness(G,U):
    return nx.algorithms.smallworld.sigma(U,niter=1,nrand=2)

def rich_club_coeffs(G,U):
    t_ok = True
    t = 0
    d = dict()
    while t_ok:
        nodes_large_degree=[]
        for x in U.nodes():
            if U.degree(x)>t:
                nodes_large_degree.append(x)
        core = U.subgraph(nodes_large_degree)
        edges_core = len(core.edges())
        nodes_core = len(core.nodes())
        if nodes_core<=1:
            t_ok = False
            break
        d[t] = (2*edges_core)/(nodes_core*(nodes_core-1))
        t += 1 
    return d  

def max_rich_club(G,U):
    rich_club=rich_club_coeffs(G,U)
    max_i=0
    
    for i in range(len(rich_club)):
        if rich_club[i]>rich_club[max_i]:
            max_i=i
    return rich_club[max_i]

def core_ratio(G,U):
    return len(nx.k_core(U,k=2).nodes())/len(U.nodes())
        
def central_point_dominance(G,U):
    betwennesses = nx.betweenness_centrality(U)
    b_max = max(betwennesses.values())
    N = len(betwennesses.keys())
    count = 0
    for i, b_i in betwennesses.items():
        count += ( b_max - b_i )/(N-1)
    return count


def spectral_radius(G,U):
    L = nx.normalized_laplacian_matrix(U)
    e = np.linalg.eigvals(L.A)
    e_abs = [abs(x) for x in e]
    return max(e_abs)

def modularity(G,U):
    return nx.algorithms.community.quality.performance(G,nx.algorithms.community.modularity_max.greedy_modularity_communities(U))

def avg_collabs(G,U):
    total,count = 0,0
    for v in G.nodes():
        d = G.out_degree(v)
        if d>=2:
            total+=d-2
            count+=1
    return total/count
        

        
metric_function_map = {'Degree': degree,
            'Excentricidad': eccentricity,
            'Clustering': clustering,
            'Diámetro': diameter,
            'Radio':radius,
            'Camino más corto promedio':avg_shortest_path_length,
            'Transitividad':transitivity,
            'Eficiencia Global':global_efficiency,
            'Rich Club Coefficient':max_rich_club,
            'Core Ratio':core_ratio,
            'Central Point Dominance':central_point_dominance,
            'Spectral radius':spectral_radius,
            'Modularidad':modularity, 
            'Average collabs':avg_collabs} 


def measures(G,U):
    D = dict()
    for metric, function in metric_function_map.items():
        D[metric] = function(G,U)
    return D

def embed(D1, D2):
    for key, value in D1.items():
        D2[key].append(value)
    return D2


# Part 3: Colaborativity functions

In [45]:
#-----------------------
# Colaborativities
#-----------------------
colab_formula_dict = {0: colaborativity_original, 
                     1: colaborativity_formula_1,
                     2: colaborativity_formula_2,
                     3: colaborativity_formula_3,
                     4: colaborativity_formula_4,
                     5: colaborativity_formula_5,
                     6: colaborativity_formula_6,
                     7: colaborativity_formula_7,
                     8: colaborativity_formula_8,
                     9: colaborativity_formula_9}

def colaborativity_original(df):
    '''
    Original Collaborativity Formula during summer 2020 for project GED.
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - df : dataframe mapping each city to its graph metrics
    Returns:
    - Value describing the collaborativity of a graph, or network
    '''
    Avg_colabs =df['Average collabs']
    Clust = df['Clustering']
    Mod = df['Modularidad']
    return (1/2) * Avg_colabs * (Clust + (np.log10(Mod**2)*-1))


def colaborativity_formula_1(df):
    Efi = df['Eficiencia Global']
    Tran = df['Transitividad']
    Mod = df['Modularidad']
    Core = df['Core Ratio']
    return Efi + Tran + 1 - (Mod + Core)/2 


def colaborativity_formula_2(df):
    Efi = df['Eficiencia Global']
    Tran = df['Transitividad']
    Exc = df['Excentricidad']
    Mod = df['Modularidad']
    return Efi + Tran + (1/Exc) - Mod


def colaborativity_formula_3(df):
    efi = df['Eficiencia Global']   #comunicacion
    clus = df['Clustering']         #robustez
    mod = df['Modularidad']          #preparación para el futuro 
    return (efi * clus * (1+np.cos(math.pi*mod)) /2 )**(1/3)


def colaborativity_formula_4(df):
    efi = df['Eficiencia Global']    #comunicacion 
    tran = df['Transitividad']       #robustez
    mod = df['Modularidad']           #preparación para el futuro 
    return (efi * tran * (1+np.cos(math.pi*mod)) /2 )**(1/3)


def colaborativity_formula_5(df):
    efi = df['Eficiencia Global']    #comunicacion 
    tran = df['Transitividad']       #robustez
    core = df['Core Ratio']           #preparación para el futuro 
    return efi + tran + 1 - core 


def colaborativity_formula_6(df):
    efi = df['Eficiencia Global']    #comunicacion 
    tran = df['Transitividad']       #robustez
    exc = df['Excentricidad']        #preparación para el futuro 
    return (efi * tran * (np.sin(math.pi/exc)) )**(1/3)


# N e w    i d e a s    f o r   f o r m u l a s #

def colaborativity_formula_7(df):
    avg_deg = df['Average collabs']  #1 cantidad  
    efi = df['Eficiencia Global']    #2 calidad   comunicacion 
    tran = df['Transitividad']       #            robustez
    exc = df['Excentricidad']        #preparación para el futuro 
    return 0.5*(np.log10(avg_deg+1)/np.log10(26))  +  0.5*quadratic([efi , tran , (np.sin(math.pi/exc))])


def colaborativity_formula_8(df):
    avg_deg = df['Average collabs']  #1 cantidad  
    efi = df['Eficiencia Global']    #2 calidad   comunicacion 
    tran = df['Transitividad']       #            robustez
    mod = df['Modularidad']        #preparación para el futuro 
    return 0.5*(np.log10(avg_deg+1)/np.log10(26)) + 0.5*quadratic([efi , tran , 0.5*(1+np.cos(math.pi*mod)) ])


def colaborativity_formula_9(df):
    avg_deg = df['Average collabs']  #1 cantidad  
    efi = df['Eficiencia Global']    #2 calidad   comunicacion 
    tran = df['Transitividad']       #            robustez
    rcc = df['Rich Club Coefficient']        #preparación para el futuro 
    core = df['Core Ratio']        #preparación para el futuro 
    return 0.5*(np.log10(avg_deg+1)/np.log10(26)) + 0.5*quadratic([efi , tran , (rcc*core)**0.5])


# Part 4: Monte Carlo Simulations

In [46]:
#----------------------------
# Monte Carlo Simulation
#----------------------------

tidydata = pd.read_csv('Referenced_CSVs/Tidy_DataFrame.csv')

def monte_carlo_2(infos, city, prob_out, prob_new, num_trials):
    
    data = {key : [] for key in metric_function_map.keys()}
    data['Name'] = []
    for i in range(10):
        data['Colab '+str(i)] =[]

    
    #Samples
    for i in range(num_trials):
        G = random_graph_3(infos,city,prob_out, prob_new)
        U = nx.to_undirected(G)
        M = measures(G,U)
        
        data = embed(M,data)
        data['Name'].append('G'+str(i))
        for i in range(10):
            data['Colab '+str(i)].append(colab_formula_dict[i](M))
        
        
    #Means
    for key in data.keys():
        if key != 'Name':
            data[key].append(np.mean( data[key] ))
    data['Name'].append('Mean')
    
    
    #Real Data
    citydata = pd.DataFrame(tidydata.loc[tidydata['Ciudad'] == city])
    citymeasures = {metric: list(tidydata.loc[tidydata['Ciudad'] == city][metric])[0] for metric in metric_function_map.keys()}
    
    data = embed(citymeasures, data)
    data['Name'].append(city+' real')
    G = citygraphs[city]
    for i in range(10):
        data['Colab '+str(i)].append(colab_formula_dict[i](citymeasures))

    
    #Incorporating the dataframe! 
    #'avg strength', 'weight', 'Weighted Degree','Small Worldness',
    L = ['Name','Degree', 'Excentricidad', 
        'Clustering', 'Diámetro', 'Radio', 'Camino más corto promedio', 
        'Transitividad', 'Eficiencia Global',
        'Rich Club Coefficient', 'Core Ratio', 'Central Point Dominance',
        'Spectral radius', 'Modularidad']
    L +=['Colab '+str(i) for i in range(10)]
    df = pd.DataFrame(data, columns = L)
    
    return df



#let's try to test this function
monte_carlo_2(cities_info, 'Montevideo', 0.8, 0.6, 10)




Unnamed: 0,Name,Degree,Excentricidad,Clustering,Diámetro,Radio,Camino más corto promedio,Transitividad,Eficiencia Global,Rich Club Coefficient,...,Colab 0,Colab 1,Colab 2,Colab 3,Colab 4,Colab 5,Colab 6,Colab 7,Colab 8,Colab 9
0,G0,3.594272,6.116945,0.088264,8.0,4.0,3.653219,0.036921,0.295981,0.666667,...,0.862835,0.646667,-0.4274,0.071963,0.053819,0.884215,0.175104,0.547042,0.467326,0.560959
1,G1,3.686275,5.352941,0.119812,7.0,4.0,3.636291,0.044912,0.297574,0.666667,...,1.066704,0.665576,-0.388246,0.084096,0.060636,0.906212,0.194879,0.562844,0.46791,0.559185
2,G2,3.530233,5.930233,0.085301,7.0,4.0,3.697165,0.044842,0.291993,1.0,...,0.796505,0.677895,-0.42637,0.065769,0.05308,0.950788,0.187733,0.548573,0.464942,0.578202
3,G3,3.625899,5.592326,0.129362,7.0,4.0,3.62907,0.046912,0.296673,0.266667,...,1.04138,0.654477,-0.407374,0.077478,0.055251,0.895144,0.194989,0.555473,0.465717,0.511165
4,G4,3.4942,5.280742,0.110951,6.0,4.0,3.665775,0.046499,0.293778,1.0,...,0.922839,0.672082,-0.403033,0.071345,0.053391,0.936565,0.197089,0.561436,0.464201,0.5808
5,G5,3.447489,5.454338,0.159351,7.0,4.0,3.691567,0.044691,0.292063,1.0,...,1.251634,0.677026,-0.399819,0.09015,0.059009,0.937211,0.192283,0.557594,0.464134,0.58014
6,G6,3.70516,6.316953,0.11809,8.0,5.0,3.673267,0.047554,0.295658,1.0,...,1.072399,0.667264,-0.417948,0.082219,0.060715,0.910779,0.188592,0.546836,0.470801,0.592821
7,G7,3.315789,5.64693,0.111356,7.0,4.0,3.830605,0.03589,0.282789,1.0,...,0.941741,0.655422,-0.438203,0.069628,0.047739,0.926135,0.175003,0.555019,0.464131,0.580488
8,G8,3.601918,4.980815,0.11265,6.0,4.0,3.629116,0.050564,0.296913,0.5,...,0.967676,0.671302,-0.384439,0.071955,0.055093,0.927813,0.206876,0.574731,0.470576,0.541828
9,G9,3.861538,4.961538,0.098206,6.0,4.0,3.557195,0.047342,0.303986,0.666667,...,0.956589,0.658599,-0.363349,0.080101,0.062807,0.882098,0.204208,0.573645,0.470073,0.565394


In [47]:

for city in cities_info.keys():
    print(city)
    df = monte_carlo_2(cities_info, city, 0.8, 0.6, 5)
    df.to_csv('Not_used_anymore/Data_Cities_wrt_Random/Trials_'+city+'.csv',index=False)


AGS
CABA
CDMX
GDL
Pachuca
Madrid
Montevideo
Oaxaca
Sao Paulo
Santiago


In [1]:

dataframe = pd.read_csv('Not_used_anymore/Data_Cities_wrt_Random/Trials_Oaxaca.csv')
dataframe


NameError: name 'pd' is not defined