# Code of Network Analytics Final Course Project

In [None]:
# Importing Libraries

import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from networkx.algorithms.community import girvan_newman, modularity
import itertools
import matplotlib.gridspec as gridspec
from graspologic.plot import heatmap
from sklearn.metrics.pairwise import cosine_similarity
import statsmodels.api as sm

# Reading the data

In [None]:
url = '''https://raw.githubusercontent.com/simoneSantoni/net-analysis
-smm638/9114bedfde4195f2e5ed4fa5c868e3d41b26f939/finalCourseProject/trading_floor.xml'''
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")

In [None]:
# Find all the instances of the nodes
data_nodes = soup.find_all('node')

# Parse all data of each node 
indx, ai, x_pos, y_pos = [],[],[],[]
for i, node in enumerate(data_nodes):
    indx.append(i)
    ai.append(int(node.find('data',attrs={"key" : "d0"}).text))
    x_pos.append(int(node.find('data',attrs={"key" : "d1"}).text))
    y_pos.append(int(node.find('data',attrs={"key" : "d2"}).text))

df_nodes = pd.DataFrame({'node':indx,'ai': ai,'x_pos': x_pos,'y_pos': y_pos})

In [None]:
# Find all the instances of the edges
data_edges = soup.find_all('edge')

# Create list with all pairs of edges
edges = []
for edge in data_edges:
    edges.append([int(edge.get('source')), int(edge.get('target'))])
df_edges = pd.DataFrame(edges, columns=['source','target'])

In [None]:
df_nodes.to_csv('Nodes.csv',index=False)
df_edges.to_csv('Edges.csv',index=False)

# Creating the Networkx Graph

In [None]:
df_nodes = pd.read_csv('Nodes.csv')
df_edges = pd.read_csv('Edges.csv')

In [None]:
# Populating graph with nodes and edges
graph = nx.Graph()
graph = nx.from_pandas_edgelist(df_edges, source='source' ,target='target')

In [None]:
# Assigning additional attributes to the graph's nodes 

for node in graph.nodes:
    graph.nodes[node]['ai'] = ai[node]
    graph.nodes[node]['x_pos'] = x_pos[node]
    graph.nodes[node]['y_pos'] = y_pos[node]

# Analyse the Networkx Graph

In [None]:
# Printing the information of a network     # Network Size = number of nodes
print(nx.info(graph))

# Print the density of the network  = Network density is the number of edges divided by the total possible edges.
print('The density of the Network', round(nx.density(graph),3))

# Shortest paths analysis
print('The Average Path Length of the Network:',
      round(nx.average_shortest_path_length(graph),3))
print('The Diameter of the Network:',
      nx.algorithms.distance_measures.diameter(graph))

# Checking for isolates
isolates = list(nx.isolates(graph))
print('The number of isolates is', len(isolates))

# Checking if the network is bipartile (2-mode network)
print ("Is the Networks bipartite:", nx.is_bipartite(graph))

# Finding the average degree of the node in the Network
degrees = [d for n,d in graph.degree()]
nodes = [n for n,d in graph.degree()]
df_nodes_degree = pd.DataFrame({'Node': nodes, 'Degree': degrees})
Average_Degree = sum(df_nodes_degree['Degree'])/len(df_nodes_degree['Node'])
print("The Average Degree of the Network:", Average_Degree)

# Analyse the distribution of Ai Preferences in the Network

In [None]:
# Understanding the Ai prefernece in the network
df_ai = pd.DataFrame({'Ai Preferences':ai})
#print(df_ai['Ai Preferences'].value_counts())
df_ai.describe()

In [None]:
# Plotting the prefernces distribution

fig0 = plt.figure(figsize=(10,5))
ax0 = fig0.add_subplot(1,2,1)
ax1 = fig0.add_subplot(1,2,2)

ax0.hist(df_ai,histtype='bar',align='mid',rwidth=0.9, bins=11)
ax0.set_xlabel("Ai preferences", fontsize=10)
ax0.set_ylabel("Number of Traders", fontsize=10)
ax0.spines['right'].set_visible(False)
ax0.spines['top'].set_visible(False)

ax1.hist(df_ai,histtype='bar',align='mid',rwidth=0.9, cumulative=True)
ax1.set_xlabel("Cumulative Ai preferences", fontsize=10)
ax1.set_ylabel("Number of Traders", fontsize=10)
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)

# Save figure
fig0.savefig("AI_Distribution.png")

plt.show()

# Computing and ploting the Degree distribution

In [None]:
# List of degrees
k_g = sorted([d for n, d in graph.degree()], reverse=True)

# Counting of the nodes with unique nummber of degrees (1st array unique degrees,
# 2nd count of nodes with this dergee)
p_k = np.unique(k_g, return_counts=True)
cp_k = np.unique(k_g, return_index=True)

# Ploting the degree Distribution and Cumulative degree 
fig = plt.figure(figsize=(9, 4))

ax0 = fig.add_subplot(1, 2, 1)
ax1 = fig.add_subplot(1, 2, 2)

# point-to-point data
ax0.bar(p_k[0], p_k[1]/len(k_g), alpha=1)
ax0.set_title("Point-to-point probability")
ax0.set_ylabel("$Pr(k = k_{i})$")
ax0.set_xlabel("Degree $k$")
ax0.spines['right'].set_visible(False)
ax0.spines['top'].set_visible(False)

# cumulative probability
ax1.bar(cp_k[0], cp_k[1]/len(k_g) , alpha=1)
ax1.set_title("Cumulative probability")
ax1.set_ylabel("$Pr(k \geq k_{i})$")
ax1.set_xlabel("Degree $k$")
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)


# Save figure
fig.savefig("Degree_Distribution.png")

plt.show()

## Centrality measures

In [None]:
# degree distribution
dc = nx.degree_centrality(graph)
# eigenvector_centrality
ec = nx.eigenvector_centrality(graph)
# closeness centrality
cc = nx.closeness_centrality(graph)
# betweeness centrality
bc = nx.betweenness_centrality(graph)
df_dc = pd.DataFrame({'Degree_c': dc, 'Eigenvector_c': ec, 'Betweenness_c': bc})

In [None]:
# Getting descriptive statistics of centrality measures
print(df_dc.describe())

# Getting correlation matrix
print(df_dc.corr())

# Plotting scatter plot matrix
sns.pairplot(df_dc)
plt.show()

# Ploting the Knowledge Network

In [None]:
# Ploting the knowledge exchange graph 
fig2=plt.figure(figsize=(20,15))

G = nx.Graph()
G.add_edges_from(graph.edges())
G.add_nodes_from(graph.nodes())

deg_size = [d*200 for n,d in G.degree()]
ai_opinion = [graph.nodes[node]['ai'] for node in G.nodes]

pos = nx.layout.kamada_kawai_layout(G)

options = {}

nx.draw_networkx(G, pos=pos, node_size= deg_size, alpha=0.7, node_color=ai_opinion,
                 cmap='coolwarm' )


# Save figure
fig2.savefig("Knowledge_Exchange_Network.png")

plt.show()

# Ploting the Knowledge Network based on trader's position in trading floor

In [None]:
# How do traders' opinions map onto the physical layout of the trading floor?

fig = plt.figure(figsize=(15,9))
ax= fig.add_subplot(1,1,1)

G1 = nx.Graph()
G1.add_edges_from(graph.edges())
G1.add_nodes_from(graph.nodes())

points = [(node, graph.nodes[node]['x_pos'],
           graph.nodes[node]['y_pos']) for node in graph.nodes()]
ai_opinion = [graph.nodes[node]['ai'] for node in G1.nodes]

pos1 = {n: (x, y) for n, x, y in points}

nx.draw(G1,pos=pos1, node_size=600, ax=ax, edge_color ='grey', node_color=ai_opinion,
        cmap=plt.cm.Blues)
nx.draw_networkx_labels(G1, pos= pos1, font_size=8)  

plt.axis("on")

# Save figure
fig.savefig("Knowledge_Exchange_In trading_floor.png")

plt.show()

# Checking modularity in our Network

## Examine Core Periphery

In [None]:
# Plotting a heatmap for the adjacency matrix of our Network
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(1, 1, 1)
heatmap(graph, cmap='coolwarm', ax=ax, sort_nodes=True, cbar=False) 
ax.set_title('Core-Periphery Heatmap')

# Save figure
fig.savefig("Heatmap_Core_Periphery.png")

plt.show()

## Examine Communities

In [None]:
# Assess the Modularity of the number of communities using girvan_newman function

solutions = girvan_newman(graph)
com=[]
# alternative paritioning solutions to consider
k = 20
# register modularity scores
modularity_scores = dict()
# iterate over solutions
for community in itertools.islice(solutions, k):
    solution = list(sorted(c) for c in community)
    com.append(solution)
    score = modularity(graph, solution)
    modularity_scores[len(solution)] = score

In [None]:
 # plot modularity data
fig = plt.figure(figsize=(20,10))
pos = list(modularity_scores.keys())
values = list(modularity_scores.values())
ax = fig.add_subplot(1, 1, 1)
ax.stem(pos, values)
ax.set_xticks(pos)
ax.set_xlabel(r'Number of communities detected')
ax.set_ylabel(r'Modularity score')
plt.show()

In [None]:
# Explore "optimal" k

solutions = nx.algorithms.community.girvan_newman(graph)
k = 12
# register modularity scores
modularity_scores = dict()
# iterate over solutions
for community in itertools.islice(solutions, k):
    solution = list(sorted(c) for c in community)
# let's check the solution
for i,community in enumerate(solution):
    for node in community:
        df_nodes.loc[df_nodes['node']==node, 'Community'] = i
    print('The size of the', str(i), 'comminity is',len(community), 'its density is',
          nx.density(nx.subgraph(graph, community))) # number of nodes in the community

In [None]:
com_averages = df_nodes.groupby('Community')['ai'].aggregate(np.mean)
com_averages 

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.bar(np.arange(0,13), com_averages)
ax.set_xlabel("Communities", fontsize=10)
ax.set_ylabel("Average Ai preferences", fontsize=10)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.show()

In [None]:
# Ploting the knowledge exchange graph 
plt.figure(figsize=(20,15))

G = nx.Graph()
G.add_edges_from(graph.edges())
G.add_nodes_from(graph.nodes())

deg_size = [graph.nodes[node]['ai']*200 for node in G.nodes]
colors =['#0343df','#029386','#f97306','#01ff07','#13eac9',
         '#cea2fd','#087804','#610023','#e2ca76','#c65102',
         '#d5b60a','#ff474c','#fac205']

color_map = []
for node in G.nodes():
    for i in range(len(solution)):
        if node in solution[i]:
            color_map.append(colors[i])



#pos=nx.spring_layout(G)
pos = nx.layout.kamada_kawai_layout(G)

options = {}

nx.draw_networkx(G, pos=pos, node_size= deg_size, alpha=0.7, node_color= color_map )

plt.show()

In [None]:
# Ploting the knowledge exchange graph 
fig = plt.figure(figsize=(20,15))

G2 = nx.Graph()
G2.add_edges_from(graph.edges())
G2.add_nodes_from(graph.nodes())

colors =['#0343df','#029386','#f97306','#01ff07','#13eac9',
         '#cea2fd','#087804','#610023','#e2ca76','#c65102',
         '#d5b60a','#ff474c','#fac205']

color_map = []
for node in G2.nodes():
    for i in range(len(solution)):
        if node in solution[i]:
            color_map.append(colors[i])

options = {}

nx.draw_networkx(G2, pos=pos1, node_size= 900, alpha=0.8, node_color= color_map )

# Save figure
fig.savefig("Communitites_In_trading_floor.png")

plt.show()

# Examine similarity between nodes

In [None]:
# Appreciating dyadic similarity from Simone Tutorial
similarity = {}
for u, v in G.edges():
    key = "{}-{}".format(u, v)
    value = np.abs(graph.nodes[u]['ai'] - graph.nodes[v]['ai'])
    similarity[key] = value

In [None]:
distances = {}
for u, v in G.edges():
    key = "{}-{}".format(u, v)
    value =np.sqrt(np.square(int(graph.nodes[u]['x_pos']) - int(graph.nodes[v]['x_pos']))
                   + np.square(int(graph.nodes[u]['y_pos']) - int(graph.nodes[v]['y_pos'])))
    distances[key] = value

In [None]:
df1 = pd.DataFrame({'Distance':distances.values(),})
df2 =pd.DataFrame({'Similarity':similarity.values(), 'Edges':similarity.keys()})
df = pd.concat([df2,df1], axis=1)

In [None]:
X = df['Distance']
Y = df['Similarity']
X = sm.add_constant(X)
lm = sm.OLS(Y,X).fit()
print(lm.summary())

# Diffusion and Cascading Behavior

In order to run it all until the end we can run a while loop which will end when the number of adopters stop growing. adopters_before = adopters_after. In each iteration we will have a counter and plot the trading floor based each iteration.

In [None]:
# Initialise a new graph to test difussion
dg = graph.copy()

# parameters a and b for each node
for i in dg.nodes():
    # pay-off of adopting the new behavior
    dg.nodes[i]['a'] = graph.nodes[i]['ai']/10 
    # pay-off of not adopting the new behavior
    dg.nodes[i]['b'] =  1 - dg.nodes[i]['a'] 

# Initializing node-level attribute reflecting adoption equal 0
for node in dg.nodes:
    dg.nodes[node]["adopting"] = 0

In [None]:
# Creating a function to find diffussion outcome based on threshold

def diffusion_threshold(thres):
    # Create empty list of adopters
    adopters = []
    final_adopters = []
    # Initializing node-level attribute reflecting adoption equal 0
    for node in dg.nodes:
        dg.nodes[node]["adopting"] = 0
        
    # at time 1 there are early adopters emerge for some reasons
    # --+ new adopters
    early_adopters = [indx for indx in dg.nodes() if dg.nodes[indx]['a']>=thres]
    # --+ expand the set of adopters
    adopters.extend(early_adopters)
    final_adopters.extend(early_adopters)

    new_adopters=[]
    time =0 

    # --+ adopt node attributes
    for adopter in adopters:
        dg.nodes[adopter]['adopting'] = 1
    
   # print('In the beginning of the stage', str(time), 'we have', str(len(adopters)),'adopters.')
    # draw the network (change its color)
    colors = []
    for n in dg.nodes():
        if dg.nodes[n]['adopting'] == 1:
            colors.append('orange')
        else:
            colors.append('white')
    nx.draw(dg, pos=pos1, with_labels=True, node_color=colors)
    plt.show()



    while len(adopters) != len(new_adopters):

        if time != 0:
            adopters = new_adopters.copy()
      #  print(adopters)
        print('-----------------------------------------------------')
        # %% let's simulate what happens in the following periods as nodes make decisions

        for focal in nodes:   # focal is each node in the graph's nodes
            # count adopting neighbors
            focal_nbrs = list(dg.neighbors(focal))
            # calculating p for the number of adopters neighbours  
            p = np.sum([dg.nodes[nbr]['adopting'] for nbr in focal_nbrs]) 
            # pay-off of adopting new behavior
            d = dg.degree(focal)   # calculating d for the total number of neighbours  
            a_payoff = p * dg.nodes[focal]['a']
            b_payoff = (d - p ) * dg.nodes[focal]['b']

            # decision to adopt
            if (dg.nodes[focal]['adopting'] == 0) & (a_payoff > b_payoff):
                dg.nodes[focal]['adopting'] = 1
                new_adopters.append(focal)
            else:
                pass
        time+=1
        print('In stage', str(time), 'we have',
              str(len(adopters)+len(new_adopters)),'adopters.')

        colors = []
        for n in dg.nodes():
            if dg.nodes[n]['adopting'] == 1:
                colors.append('orange')
            else:
                colors.append('white')
        nx.draw(dg, pos=pos1, with_labels=True, node_color=colors)
        plt.show()
        
        final_adopters.extend(new_adopters)
        print(final_adopters)
    print(thres)
    dict_diffusion_thres['Threshold '+str(thres)] = {'Number_of_Stages': time,
                                                     'Early_Adopters': len(early_adopters),
                                                     'Final_Adopters': len(final_adopters) }

In [None]:
dict_diffusion_thres = {}
diffusion_threshold(1)
print("=======================================")
diffusion_threshold(0.9)
print("=======================================")
diffusion_threshold(0.8)
print("=======================================")
diffusion_threshold(0.7)
print("=======================================")
diffusion_threshold(0.6)
print("=======================================")
diffusion_threshold(0.5)

In [None]:
# Creating a function to find diffussion outcome based on threshold

def diffusion_nodes(adopt,name):
    # Create empty list of adopters
    adopters = []
    final_adopters = []
    # Initializing node-level attribute reflecting adoption equal 0
    for node in dg.nodes:
        dg.nodes[node]["adopting"] = 0
        
    # at time 1 there are early adopters emerge for some reasons
    # --+ new adopters
    early_adopters = adopt
    # --+ expand the set of adopters
    adopters.extend(early_adopters)
    final_adopters.extend(early_adopters)

    new_adopters=[]
    time =0 

    # --+ adopt node attributes
    for adopter in adopters:
        dg.nodes[adopter]['adopting'] = 1
    
   # print('In the beginning of the stage', str(time), 'we have', str(len(adopters)),'adopters.')
    # draw the network (change its color)
    colors = []
    for n in dg.nodes():
        if dg.nodes[n]['adopting'] == 1:
            colors.append('orange')
        else:
            colors.append('white')
    nx.draw(dg, pos=pos1, with_labels=True, node_color=colors)
    plt.show()



    while len(adopters) != len(new_adopters):

        if time != 0:
            adopters = new_adopters.copy()
      #  print(adopters)
        print('-----------------------------------------------------')
        # %% let's simulate what happens in the following periods as nodes make decisions

        for focal in nodes:   # focal is each node in the graph's nodes
            # count adopting neighbors
            focal_nbrs = list(dg.neighbors(focal))
            # calculating p for the number of adopters neighbours  
            p = np.sum([dg.nodes[nbr]['adopting'] for nbr in focal_nbrs]) 
            # pay-off of adopting new behavior
            d = dg.degree(focal)   # calculating d for the total number of neighbours  
            a_payoff = p * dg.nodes[focal]['a']
            b_payoff = (d - p ) * dg.nodes[focal]['b']

            # decision to adopt
            if (dg.nodes[focal]['adopting'] == 0) & (a_payoff > b_payoff):
                dg.nodes[focal]['adopting'] = 1
                new_adopters.append(focal)
            else:
                pass
        time+=1
        #print('In stage', str(time), 'we have', str(len(adopters)+len(new_adopters)),'adopters.')

        colors = []
        for n in dg.nodes():
            if dg.nodes[n]['adopting'] == 1:
                colors.append('orange')
            else:
                colors.append('white')
        nx.draw(dg, pos=pos1, with_labels=True, node_color=colors)
        plt.show()
        
        final_adopters.extend(new_adopters)
        print(final_adopters)
    
    print(name)
    community_diffusion[str(name)] = {'Number_of_Stages': time,
                                      'Early_Adopters': len(early_adopters),
                                      'Final_Adopters': len(final_adopters) }

In [None]:
community_diffusion ={}
for i, com in enumerate(solution):
    name= 'Community '+str(i)
    diffusion_nodes(com,name)
    print("=======================================")

In [None]:
diffusion = dict_diffusion_thres.copy()
diffusion.update(community_diffusion)

In [None]:
difussion_Results = pd.DataFrame(diffusion).T

In [None]:
difussion_Results.loc[:,'New_Adopters'] = difussion_Results['Final_Adopters']-difussion_Results['Early_Adopters']
difussion_Results.loc[:,'Percentage_Change'] = (difussion_Results['Final_Adopters']-
                                                difussion_Results['Early_Adopters'])/ difussion_Results['Early_Adopters']

In [None]:
difussion_Results

In [None]:
np.mean(difussion_Results['Number_of_Stages'])