### Message

This function prints a message for the user

In [2]:
def format_Warning(message, category, filename, lineno, line=''):
    return str(message) + '\n'


### Read data from file

reads a json file and returns the variable 'data'

In [3]:
def read_data(filename):
    with open(filename, encoding="utf8") as json_file:
        data = json.load(json_file)
    return data

In [None]:
def get_vip(followers):
    if (followers > 200000):
        return 2
    elif (followers > 20000):
        return 1
    else:
        return 0

# 1. Network utilities functions

### 1.1 Network creation

####  >> `init_network(data)`

---------------------------------

- **data** = `([{tweet1},{tweet2}])` json dataset

**Result** = the newly created network

In [5]:
def init_network(data):
    G = nx.Graph()
    build_network(data, G)
    return G

In [6]:
def build_network(data, G):
    users_without_tweet_reference = read_data("../data/final/user_followers.json")
    tot_RT = 0
    tot_ME = 0
    tot_QT = 0
    tot_RE = 0

    for tweet in data:
        user = tweet['user']
        retweets = tweet['retweets']
        mentions = [user for user in tweet['mentions'] if user not in retweets]


        if tweet['reply_to'] != "":
            reply_to = tweet['reply_to']
        else:
            reply_to = []
            
        if tweet['quote_to'] != "":
            quote_to = tweet['quote_to']
        else:
            quote_to = []
            
        mentions = [user for user in mentions if user not in retweets]
        
            
        _addEdge(user, retweets, 1, G, "retweet")
        _addEdge(user, mentions, 1, G, "mention")
        _addEdge(user, reply_to, 1, G, "reply_to")
        _addEdge(user, quote_to, 1, G, "quote_to")
        _addEdge(retweets, mentions, 0.5, G,"mention")
        _addEdge(mentions, reply_to, 0.5, G, "reply_to")
        _addEdge(mentions, quote_to, 0.5, G, "quote_to")
        
        _addAttribute(tweet['user'], tweet['user_classification'],tweet['vip'], G)

        tot_RT += len(retweets)
        tot_QT += len(quote_to)
        tot_ME += len(mentions)
        tot_RE += len(reply_to)
    
        
    for node in list(G.nodes()):
        if 'classification' not in G.nodes[node]:
            G.nodes[node]['classification'] = 'Neutral'
            G.nodes[node]['vip'] = 0
        for user in users_without_tweet_reference:
            if user['user'] == node:
                G.nodes[node]['classification'] = 'Neutral'
                G.nodes[node]['vip'] = get_vip(user['followers'])
                
    remove = [node for node,degree in dict(G.degree()).items() if degree == 0]
    G.remove_nodes_from(remove)

### 1.2 Weighted network

####  >> `_addEdge(t1, t2, w, G, types)`

-------------------------------

This function takes as input 2 variables (t1, t2) and one integer (w).
- **t1** = `(list)` or `(string)`
- **t2** = `(list)` or `(string)`
- **w** = `(int)`
- **G** = `(Networkx Graph)`
- **types** = `(string)`

The nodes (t1 e t2) are linked to each other. The weight (w) is calculated summing every occurence of an edge happening between two nodes. The weight is different depending on the type of interaction between the two nodes, as follows:
**Result** = the function add the weighted edges to the network

In [7]:
def _addEdge(t1, t2, w, G, types):
    if t1:
        if isinstance(t1, list):
            for x in t1:
                for y in t2:
                    if G.has_edge(x, y):
                        G[x][y]["weight"] += w
                    else:
                        if x != y:
                            G.add_edge(x, y, weight = w, interaction = types, weigth_classification=0)
        else:            
            for x in t2:
                if G.has_edge(t1, x):
                    G[t1][x]["weight"] += w
                else:
                    if x != t1:
                        G.add_edge(t1, x, weight = w, interaction = types, weigth_classification=0)

In [20]:
def _addAttribute(user, clss, vp, G):
    n_clss = ''
    if clss < -0.5:
        n_clss = 'Pros'
    elif clss >= -0.5 and clss <= 0.5:
        n_clss = 'Neutral'
    elif clss > 0.5:
        n_clss = 'Cons'
        
    G.add_node(user, classification = n_clss, vip = vp, cont_classification = clss)

### 1.3 Number of nodes and edges 

#### >> `network_node_edge(G, name)`

------------------------

- **G** = `(NetworkX Graph)`
- **name** = `(string)`

**Result** = prints the total number of nodes and edges of the network G

In [9]:
def network_node_edge(G, name):
    print(f"La rete {name} ha:\n")
    print("* Numero di nodi = ", G.number_of_nodes())
    print("* Numero di Edge = ", G.number_of_edges())

### 1.4 Information about the network

#### >> `network_info(G, name, multigraph=True)`

------
 
This function takes as input:
- **G** = `(NetworkX Graph)`
- **name** = `(sting)` network name
- **multigraph** = `(Boolean)` 

**Result** = the function prints those information about the network:
- number of nodes;
- number of edges;
- average degree;
- transitivity;
- average clustering;
- number of different connected components;
- diameter:
- density;

In [10]:
def network_info(G, name, multigraph=True):
    N = G.number_of_nodes()
    L = G.number_of_edges()
    g_av_degree = sum(dict(G.degree()).values())/float(len(G)) #or 2*L/N (direct network)
    if multigraph: 
        g_transitivity = nx.transitivity(G) #the fraction of all possible triangles present in g
        g_av_clustering = nx.average_clustering(G)
    n_conn_comp = nx.number_connected_components(G)
    gc_diam = nx.diameter(G.subgraph(sorted(nx.connected_components(G), key=len, reverse=True)[0]))
    density = nx.density(G) #or (2*L)/(N*(N-1))
    
    if multigraph:
        print(f"Network information of {name}:\n",
              f"Nodes: \t{N}\n",
              f"Edges: \t{L}\n\n",
              f"Average degree: \t\t{g_av_degree}\n",
              f"Transitivity: \t\t{g_transitivity}\n",
              f"Average clustering: \t{g_av_clustering}\n\n",
              f"Connected components: \t{n_conn_comp}\n",
              f"Diameter: \t{gc_diam}\n\n",
              f"Density: \t{density}\n")
    else: 
        print(f"Network information of {name}:\n",
              f"Nodes: \t{N}\n",
              f"Edges: \t{L}\n\n",
              f"Average degree: \t\t{g_av_degree}\n",
              f"Transitivity: \t\t{g_transitivity}\n",
              f"Average clustering: \t-\n\n",
              f"Connected components: \t{n_conn_comp}\n",
              f"Diameter: \t{gc_diam}\n\n",
              f"Density: \t{density}\n")
        
    if nx.is_connected(G):
        av_short_p = nx.average_shortest_path_length(G)
        print(f"Average length of shortest paths: {av_short_p}")

### Top score

#### >> `top_score_print(n, dict_centr, order)`

- **n**= `(int)` 
- **dict_centr** = `(dict)`
- **order** = `(boolean)`

In [11]:
def top_score_print(n, dict_centr, order):
    dict_ord = (dict(sorted(dict_centr.items(), key=lambda item: item[1], reverse=order)))
    for i in dict(list(dict_ord.items())[0: n]) :
        print(i,'\t',dict_ord[i])

# 2. Plotter Graphs Utilities Functions


### 2.1 Plotter function

#### >> `plot_dist(G)`

---------

- **G** = `(NetworkX Graph)`

**Result** = prints 3 graphs:

-  **Plotter Scatter della distribuzione**
-  **CDF**
-  **CDF**

In [12]:
def plot_dist(G):
    xmin = min([d for n, d in G.degree()])
    indegrees =sorted([d for n, d in G.degree()], reverse=True)
    degree = np.bincount(indegrees)
    fit = powerlaw.Fit(np.array(degree)+1, fit_method='KS', xmin=xmin, xmax=max(degree)-xmin,discrete=True)
    degree
    
    fig = plt.figure(figsize=(15,4)) 
    """ Plot Distribution """
    plt.subplot(1, 3, 1)
    plt.title("Degree Distribution")
    plt.plot(range(len(degree)),degree,'b.')   
    plt.loglog()
    #plt.xlim((min(degree), max(degree)))
    plt.xlabel('Degree')
    plt.ylabel('P(k) (#Nodes)')

    """ Plot CDF """
    plt.subplot(1, 3, 2)
    fit.plot_cdf()
    plt.xlabel("Degree")
    plt.ylabel('CDF')

    """ Plot CCDF """
    plt.subplot(1, 3, 3)
    fit.plot_ccdf()
    plt.ylabel('CCDF')
    plt.xlabel('Degree')
    plt.tight_layout()
    plt.show()

# 3. Pesantezza dei link

### 3.1 Node weight function

#### >> `edges_weight(G)`

---

- **G** = `(NetworkX Graph)`

**Result** = 4 output:
- **edges** = edges `(node1 - node2)`
- **weights** = `(list[int])` edge weight
- **edge_colors** = `(list[int])`

In [13]:
def edges_weight(G):
    edges,weights = zip(*nx.get_edge_attributes(G,'weight').items())
    edge_colors = weights
    if G.has_edge("perchetendenza", "porquetendencia"):
        G["perchetendenza"]["porquetendencia"]['weight'] = 0
    return edges, list(weights), edge_colors

### 3.2 Prints edges and their weight

#### >> `info_edges_weight(G, edges)`

---

- **G** = `(NetworkX Graph)`
- **edges** = `list[(nodo1, nodo2)]`

**Result** = prints a list of edges with their weight

In [14]:
def info_edges_weight(G, edges):
    for u, v in edges:
        if G[u][v]['weight']:
            print(u,' ', v, ' ', G[u][v]['weight'])
    

# 4. Giant Component


#### >> `giant_component(G)`

---

- **G** = `(NetworkX Graph)`

**Result** = returns network G's giant component
- **G0** = `(NetworkX Graph)`

In [15]:
def giant_component(G):
    conn_comps = sorted(nx.connected_components(G), key=len, reverse=True)
    G0 = G.subgraph(conn_comps[0])
    return G0

# 5. Overlap


#### >> `node_overlap(G)`

---

- **G** = `(NetworkX Graph)`

**Result** = overlap

In [16]:
def node_overlap(G):
    for u, v in G.edges():
        n_u = set(G.neighbors(u)) # set of u neighbors
        n_v = set(G.neighbors(v)) # set of v neighbors
        overlap = len(n_u & n_v) / len(n_u | n_v) # Neighborhood Overlap
        G[u][v]['overlap'] = overlap
    return G

# 6. Time splitting function

### 6.1 Network temporal division 

#### >> `get_snapshot(path, ranges)`

---

- **path** = `(string)` path of the json data file
- **ranges** = `([(data0, data1),(data2, data3),...])` array containg lists of date, to create network temporal frames

**Result** = `([[{tweets_in_range_1}],[{}],...])` list of list of tweet in certain range of time

In [17]:
def get_snapshot(path, ranges):
    with open(path) as json_file:
        data = json.load(json_file)
    range_snap = []
    for r in ranges:
        tweets_in_range = []
        for tweet in data:
            if tweet['date'].split("T")[0] in  r:
                tweets_in_range.append(tweet)
                
        range_snap.append(tweets_in_range)
        
    return range_snap

In [18]:
def format_coms(coms):
    obj= {}
    for key in coms:
        if(coms[key] != None):
            for c in coms[key]:
                obj[c] = int(key.split("_")[1])
    return obj

In [None]:
ranges = [
    ['2021-06-10', '2021-06-11', '2021-06-12', '2021-06-13', '2021-06-14'], # Italy - Turkey (1)
    ['2021-06-15', '2021-06-16', '2021-06-17', '2021-06-18', '2021-06-19'], # Italy - Switzerland (2)
    ['2021-06-20', '2021-06-21', '2021-06-22', '2021-06-23', '2021-06-24'], # Italy - Wales (3)
    ['2021-06-25', '2021-06-26', '2021-06-27', '2021-06-28', '2021-06-29'], # Italy - Austria (4)
    ['2021-06-30', '2021-07-01', '2021-07-02', '2021-07-03', '2021-07-04'], # Italy - Belgium (5)
    ['2021-07-05', '2021-07-06', '2021-07-07', '2021-07-08', '2021-07-09'], # Italy - Spain (6)
    ['2021-07-10', '2021-07-11', '2021-07-12'],                             # Italy - England (7)
]