In [None]:
# pip install names -qq

import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import names

# For visualization
# pip install -U bokeh -qq
# pip install -q holoviews -qq

sns.set()

# Import the libraries and link to the bokeh backend
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
from bokeh.plotting import show

# Setting the default figure size a bit larger
defaults = dict(width=750, height=750, padding=0.1,
                xaxis=None, yaxis=None)
hv.opts.defaults(
    opts.EdgePaths(**defaults), opts.Graph(**defaults), opts.Nodes(**defaults))

# Introduction

Welcome to your second part of the introduction to network analysis. In this session you will learn:

1. What directed networks are, and when that matters.
2. How different measures have to be calculated in directed networks.
3. What multidimensional networks are, and how they matter.
4. How to compare network measures between graphs, and with random graphs



# Directed networks

* Up to now, we did not pay attention to the direction of edges, and assumed them to be symetric (`A->B == B->A`). this makes sense in a lot of setting, for instance when we look at co-occurence networks.
* However, in many cases, such as friendship networks, that might not be the case (the person you name a close friend not necessarily thinks the same about you).
* In such cases, we would like to take this directionality into account, and analyse **directed networks**.

Lets look a brief example of highschool students data, which had to name their close friends.


the `!`in front of the commands indicates the use of the unix/linux system behind the Python notebook.


In [None]:
import requests
import zipfile
import os

# Step 1: Download the file
url = 'https://networks.skewed.de/net/highschool/files/highschool.csv.zip'
response = requests.get(url)

# Write the content to a local file
with open('highschool.csv.zip', 'wb') as f:
    f.write(response.content)

print("Download complete.")

# Step 2: Create the 'data' folder if it doesn't exist
output_folder = 'data'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Step 3: Unzip the file to the 'data' folder
with zipfile.ZipFile('highschool.csv.zip', 'r') as zip_ref:
    zip_ref.extractall(output_folder)  # Extracts all contents to the 'data' folder

print(f"Unzip complete. Files extracted to '{output_folder}'")



In [3]:
# wget https://networks.skewed.de/net/highschool/files/highschool.csv.zip
# unzip highschool.csv.zip


Again, here it sometimes happens that friendship is not reciprocal, so we will create a directed friendship graph.

It takes some munging to get the data right and attach the names.


In [4]:
# load edges
edges = pd.read_csv('./data/edges.csv')

In [None]:
edges.head(5)

In [None]:
# Test the ransom name generator. Need only males since the study is about a male highschool in the 50s
names.get_full_name(gender='male').replace(' ','-')

In [7]:
edges.columns = ['source', 'target', 'weight']

In [8]:
# Collect all unique nodes from the edgelist

nodes = set(edges.source) | set(edges.target)

In [9]:
# Generate 70 ransom names with '-' separator using a list comprehension - a loop in a list

names = [names.get_full_name(gender='male').replace(' ','-') for i in range(70)]

In [10]:
# Generate a mapper dictionary to translate number-nodes into fake names

mapper = dict(zip(nodes,names))

In [11]:
# Tr99anslate

edges['source_name'] = edges.source.map(lambda t: mapper[t])
edges['target_name'] = edges.target.map(lambda t: mapper[t])

In [12]:
# Now generate the network

g = nx.from_pandas_edgelist(edges,
                            source='source_name',
                            target='target_name',
                            edge_attr='weight',
                            create_using = nx.DiGraph)


* Lets plot this network briefly to get a sense.
* Notice that we have edges for two years, so we can do a facet plot for every year.



In [None]:
# Quick visualization
nx.draw(g, with_labels = False, node_size=10)


## Centrality measures

Our network is now directed, meaning a node-pair now has two different roles:

* **Ego:** The node the edge loriginates from.
* **Alter:** The node the edge leads to.

Consequently, most network metrics have to take this directionality into account. For example, degree centrality is now differentiated between the **in-degree** centrality (now many edges lead to the node) and the **out-degree** centrality (now many edges lead to the node)



In [14]:
nx.set_node_attributes(g, dict(g.in_degree(weight='weight')), 'in_degree')
nx.set_node_attributes(g, dict(g.out_degree(weight='weight')), 'out_degree')


## Community Structures

Now it is getting a bit more complicated. Most community detection algorithms implemented in `NetworkX` only work with undirected networks. So, now we could do 2 things:

1. Convert the network in an undirected one.
2. Use the "edge betweenness" algorithm, the only one implemented that can handle directed networks.



In [15]:
import community.community_louvain as community_louvain

In [16]:
# We go for option 1
g_und = nx.to_undirected(g)

In [17]:
partition = community_louvain.best_partition(g_und)

In [None]:
nx.draw_kamada_kawai(g, node_color=list(partition.values()), node_size=[v * 10 for v in dict(g.degree()).values()])

## Introduction to the case

* Emmanuel Lazega, The Collegial Phenomenon: The Social Mechanisms of Cooperation Among Peers in a Corporate Law Partnership, Oxford University Press (2001).

### Data
This data set comes from a network study of corporate law partnership that was carried out in a Northeastern US corporate law firm, referred to as SG&R, 1988-1991 in New England. It includes (among others) measurements of networks among the 71 attorneys (partners and associates) of this firm, i.e. their strong-coworker network, advice network, friendship network, and indirect control networks. Various members' attributes are also part of the dataset, including seniority, formal status, office in which they work, gender, lawschool attended, individual performance measurements (hours worked, fees brought in), attitudes concerning various management policy options, etc. This dataset was used to identify social processes such as bounded solidarity, lateral control, quality control, knowledge sharing, balancing powers, regulation, etc. among peers.

### Setting
* What do corporate lawyers do? Litigation and corporate work.
* Division of work and interdependencies.
* Three offices, no departments, built-in pressures to grow, intake and assignment rules.
* Partners and associates: hierarchy, up or out rule, billing targets.
* Partnership agreement (sharing benefits equally, 90% exclusion rule, governance structure, elusive committee system) and incompleteness of the contracts.
* Informal, unwritten rules (ex: no moonlighting, no investment in buildings, no nepotism, no borrowing to pay partners, etc.).
* Huge incentives to behave opportunistically ; thus the dataset is appropriate for the study of social processes that make cooperation among rival partners possible.
* Sociometric name generators used to elicit coworkers, advice, and 'friendship' ties at SG&R:"Here is the list of all the members of your Firm."

The networks where created according to the follwoing questionaire:

* Strong coworkers network: "Because most firms like yours are also organized very informally, it is difficult to get a clear idea of how the members really work together. Think back over the past year, consider all the lawyers in your Firm. Would you go through this list and check the names of those with whom you have worked with. By "worked with" I mean that you have spent time together on at least one case, that you have been assigned to the same case, that they read or used your work product or that you have read or used their work product; this includes professional work done within the Firm like Bar association work, administration, etc."
* Basic advice network: "Think back over the past year, consider all the lawyers in your Firm. To whom did you go for basic professional advice? For instance, you want to make sure that you are handling a case right, making a proper decision, and you want to consult someone whose professional opinions are in general of great value to you. By advice I do not mean simply technical advice."
* 'Friendship' network:
"Would you go through this list, and check the names of those you socialize with outside work. You know their family, they know yours, for instance. I do not mean all the people you are simply on a friendly level with, or people you happen to meet at Firm functions."

## Data preperation

###  Load the data

Lets load the data! The three networks refer to cowork, friendship, and advice. The first 36 respondents are the partners in the firm.

In [19]:
mat_friendship = pd.read_table("https://sds-aau.github.io/SDS-master/M2/data/LazegaLawyers/ELwork.dat", sep= ' ', header=None)
mat_advice = pd.read_table("https://sds-aau.github.io/SDS-master/M2/data/LazegaLawyers/ELadv.dat", sep= ' ', header=None)
mat_work = pd.read_table("https://sds-aau.github.io/SDS-master/M2/data/LazegaLawyers/ELwork.dat", sep= ' ', header=None)

In [None]:
mat_friendship.head()

In [21]:
G_friendship = nx.from_pandas_adjacency(mat_friendship, create_using=nx.DiGraph)
G_advice = nx.from_pandas_adjacency(mat_advice, create_using=nx.DiGraph)
G_work = nx.from_pandas_adjacency(mat_work, create_using=nx.DiGraph)

In [None]:
print(G_work)

In [None]:
print(G_friendship)
print(G_advice)
print(G_work)

In [24]:
attributes = pd.read_table("https://sds-aau.github.io/SDS-master/M2/data/LazegaLawyers/ELattr.dat", delimiter=r"\s+", header=None)

In [None]:
attributes.head()

In [26]:
attributes=attributes.round().astype(int)

In [27]:
attributes.columns = ["id", "seniority", "gender", "office", "tenure", "age", "practice", "school"]

In [28]:
attributes.set_index('id',inplace=True)

In [29]:
cleanup_nums = {"seniority":     {1: "Partner", 2: "Associate"},
                "gender":     {1: "Male", 2: "Female"},
                "office":     {1: "Boston", 2: "Hartford", 3:"Providence"},
                "practice":     {1: "Litigation", 2: "Corporate"},
                "school":     {1: "Harvard, Yale", 2: "Ucon", 3: "Others"}
                }

In [30]:
attributes.replace(cleanup_nums, inplace=True)

In [None]:
attributes.head()

In [None]:
attributes.info()

In [33]:
attributes_dict=attributes.T.to_dict()

In [34]:
nx.set_node_attributes(G_friendship, attributes_dict)
nx.set_node_attributes(G_advice, attributes_dict)
nx.set_node_attributes(G_work, attributes_dict)

In [None]:
print(nx.get_node_attributes(G_friendship, 'seniority'))

## Calculate dimensional centralities

There might be better ways to do that (still experimenting), but for now lets first create centralities upfront for all networks. We for now only look at the in-degree.

In [36]:
cent_degree_friendship = dict(G_friendship.in_degree)
cent_degree_advice = dict(G_advice.in_degree)
cent_degree_work = dict(G_work.in_degree)

In [37]:
nx.set_node_attributes(G_friendship, cent_degree_friendship, 'cent_degree')
nx.set_node_attributes(G_advice, cent_degree_advice, 'cent_degree')
nx.set_node_attributes(G_work, cent_degree_work, 'cent_degree')

In [38]:
# Create and save a layout.
G_layout = nx.layout.kamada_kawai_layout(G_work)

In [39]:
g_plot = hv.Graph.from_networkx(G_friendship, G_layout).opts(tools=['hover'],
                                                                        directed=True,
                                                                        edge_alpha=0.25,
                                                                        node_size='cent_degree',
                                                                        #node_color='seniority', cmap='Set1',
                                                                        legend_position='right'
                                                                        )



In [None]:
show(hv.render(g_plot))

In [None]:
g_plot = hv.Graph.from_networkx(G_advice, G_layout).opts(tools=['hover'],
                                                                        directed=True,
                                                                        edge_alpha=0.25,
                                                                        node_size='cent_degree',
                                                                        #node_color='cent_degree', cmap='Set1',
                                                                        legend_position='right')
show(hv.render(g_plot))

In [None]:
g_plot = hv.Graph.from_networkx(G_work, G_layout).opts(tools=['hover'],
                                                                        directed=True,
                                                                        edge_alpha=0.25,
                                                                        node_size='cent_degree',
                                                                        #node_color='seniority', cmap='Set1',
                                                                        legend_position='right')
show(hv.render(g_plot))

## Assortiativity

We can also calculate another interested measure, particularly in social networks: Assortiativity. In a nutshell, it measures if two nodes that share certain characteristics ahve a higher or lower probability to be connected.

For details, check:

* Newman, M. E. J. (27 February 2003). "Mixing patterns in networks". Physical Review E. American Physical Society (APS). 67 (2): 026126

In [None]:
nx.attribute_assortativity_coefficient(G_friendship, 'seniority')

In [None]:
nx.attribute_assortativity_coefficient(G_friendship, 'school')

In [None]:
nx.attribute_assortativity_coefficient(G_friendship, 'office')

## Reciprocity

Anotyher interesting question usually is, if directed edges are reciptocated, meaning that an edge between `i,j` makes an edge between `j,i` more likely

In [None]:
nx.overall_reciprocity(G_friendship)

In [None]:
nx.overall_reciprocity(G_advice)

In [None]:
nx.overall_reciprocity(G_work)