In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import math
import os
import matplotlib.pyplot as plt
import scipy.stats
from datetime import datetime

In [2]:
# uploading datasets for 2019
sf_2019 = pd.read_csv('CNC_San_Francisco_2019.csv')
london_2019 = pd.read_csv('CNC_London_2019.csv')
la_2019 = pd.read_csv('CNC_Los_Angeles_2019.csv')

In [317]:
# df_net['year'] = pd.to_datetime(df_net['created_at'], utc = True).dt.to_period('M')

In [3]:
def bipartite_net_make(df, min_weight = 2):
    """
    The function returns a biaprtite network with the two types of nodes: observations and 
    actions (changes made by other users of INaturalist)
    
    Arguments:
    - df -- dataframe
    - min_weight - minimal weight of an edge between nodes
    """
    
    df_net = pd.DataFrame()
    
    for _, row in df.iterrows():
        idents = row['identifications']
        if pd.isnull(idents) == False:
            df_temp = pd.DataFrame(eval(idents))
            df_temp['target'] = np.repeat(row['user_id'], df_temp.shape[0])
            df_net = df_net.append(df_temp)
        
    edges = pd.DataFrame(df_net[['user_id', 'target']])
    
    # removing loops
    edges['temp'] = np.array(edges['user_id']) == np.array(edges['target'])
    edges = edges[edges['temp'] == False]
    edges = edges.drop(['temp'], axis = 1)
    
    # removing edges based on the chosen threshold
    edges['weight'] = 1
    edges = edges.groupby(by=['user_id', 'target']).sum().reset_index()
    edges = edges[edges['weight'] >= min_weight]
    
    bipartite_G = nx.Graph()

    for r, d in edges.iterrows():
        pid = 'P{0}'.format(d['user_id']) 
        cid = 'C{0}'.format(d['target'])  
        bipartite_G.add_node(pid, bipartite='action')
        bipartite_G.add_node(cid, bipartite='observation')
        bipartite_G.add_edge(pid, cid)
        
    return bipartite_G

In [4]:
def projection_make(bip_net, projection = 'action'):
    """
    The function returns a projection of a bipartite network 
    
    Arguments:
    
    - bip_net -- bipartite networks
    - projection (observation or action) -- in the case of 'action', the function returns a network where users are
    connected based on the common observations (they were working on); 'observation'  returns a network of 
    observations having a common list of collaborators
    """
    
    if projection == 'action':
        action_nodes = [node for node in bip_net.nodes() if bip_net.nodes[node]['bipartite'] == 'action']
        actions_graph = nx.bipartite.projection.projected_graph(bip_net, action_nodes)
        
        return actions_graph
    
    elif projection == 'observation':
        observation_nodes = [node for node in bip_net.nodes() if bip_net.nodes[node]['bipartite'] == 'observation']
        observations_graph = nx.bipartite.projection.projected_graph(bip_net, observation_nodes)
        
        return observations_graph

In [132]:
def descriptive_net_analysis(net):
    
    """
    The function returns basic descriptive characteristics of a net
    """
    
    nodes = net.number_of_nodes()
    edges = net.number_of_edges()
    density = nx.density(net)
    
    beetweenness = nx.betweenness_centrality(net)
    avg_beetw = np.mean(list(beetweenness.values()))
    sd_beetw = np.std(list(beetweenness.values()))
    
    degree = nx.degree_centrality(net)
    avg_degree = np.mean(list(degree.values()))
    sd_degree = np.std(list(degree.values()))
    
    clustering_coef = nx.average_clustering(net)
    
    print("number of nodes: {}, number of edges: {}".format(nodes, edges))
    print("density of the network: {0:.2f}".format(density))
    print("average beetweenness of the network: {}, sd: {}".format(avg_beetw, sd_beetw))
    print("average degree of the network: {}, sd: {}".format(avg_degree, sd_degree))
    print("clustering coefficient:", clustering_coef)

In [None]:
# making bipartite networks from the dfs (can take some time)
g_sf = bipartite_net_make(sf_2019, min_weight = 2)
g_london = bipartite_net_make(london_2019, min_weight = 2)
g_la = bipartite_net_make(la_2019, min_weight = 2)

In [128]:
# making a projection for the London bipartite network
london_proj = projection_make(g_london, projection = 'action')

In [129]:
# London net characteristics
descriptive_net_analysis(london_proj)

number of nodes: 206, number of edges: 7308
density of the network: 0.34610466493014447
average beetweenness of the network: 0.003176117712062328, sd: 0.010756374208345182
average degree of the network: 0.3461046649301445, sd: 0.2584748125781289
clustering coefficient: 0.9051918436132372


In [133]:
# making a projection for the San Francisco bipartite network
sf_proj = projection_make(g_sf, projection = 'action')

In [134]:
# San Francisco net characteristics
descriptive_net_analysis(sf_proj)

number of nodes: 666, number of edges: 26153
density of the network: 0.12
average beetweenness of the network: 0.0013451489355103825, sd: 0.006584434393223758
average degree of the network: 0.11810156020682337, sd: 0.13695390689567652
clustering coefficient: 0.8030974330380027


In [60]:
# making a projection for the Los Angeles bipartite network
la_proj = projection_make(g_la, projection = 'action')

In [135]:
# Los Angeles net characteristics
descriptive_net_analysis(la_proj)

number of nodes: 612, number of edges: 35059
density of the network: 0.19
average beetweenness of the network: 0.0013377026733886754, sd: 0.004284953119000909
average degree of the network: 0.18751537712739214, sd: 0.1801642093316199
clustering coefficient: 0.808378623504858
