# Exercises 13 March 2024: Interlocks 

## Preparation

In [18]:
# import networkx (and other useful packages)
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

We use data from: 
Seierstad C. and Opsahl T. 2011. For the few not the many? The effects of affirmative action on presence, prominence, and social capital of female directors in Norway. Scandinavian Journal of Management, 27 (1), 44-54

In [19]:
## Import data with Pandas
# First set of nodes: Directors
Directors = pd.read_csv('Directors.txt', sep=' ', encoding='latin1') # specified encoding due to Norwegian names
Directors

Unnamed: 0,Director_ID,Director_Name,Director_Gender
0,1,Aage Jakobsen,1
1,2,Aage Johan Remøy,1
2,3,Aage Rasmus Bjelland Figenschou,1
3,4,Aagot Irene Skjeldal,2
4,5,Aase Gundersen,2
...,...,...,...
5762,5763,Yuhong Jin Hermansen,2
5763,5764,Yvonne Berntzen,2
5764,5765,Yvonne Skramstad,2
5765,5766,Zhong Bei,2


In [20]:
# Second set of nodes: Companies
Companies = pd.read_csv('Companies.txt', header=None, sep='\t') 
Companies

Unnamed: 0,0,1,2,3
0,1,879447992,24SEVENOFFICE ASA,0667 OSLO
1,2,990031479,A-COM NORGE ASA,0355 OSLO
2,3,890687792,ABERDEEN EIENDOMSFOND ASIA ASA,0230 OSLO
3,4,989761390,ABERDEEN EIENDOMSFOND NORDEN/BALTIKUM ASA,0255 OSLO
4,5,988671258,ABERDEEN EIENDOMSFOND NORGE II ASA,0255 OSLO
...,...,...,...,...
379,380,882757692,WILSON ASA,5003 BERGEN
380,381,985224323,WINTERSHALL NORGE ASA,4008 STAVANGER
381,382,987778490,YA HOLDING ASA,0369 OSLO
382,383,986228608,YARA INTERNATIONAL ASA,0257 OSLO


In [21]:
# rename columns
Companies.rename(columns = {0 :'Company', 1:'Org_nb', 2:'Company_name', 3:'Postcode_City'}, inplace=True)
Companies.head()

Unnamed: 0,Company,Org_nb,Company_name,Postcode_City
0,1,879447992,24SEVENOFFICE ASA,0667 OSLO
1,2,990031479,A-COM NORGE ASA,0355 OSLO
2,3,890687792,ABERDEEN EIENDOMSFOND ASIA ASA,0230 OSLO
3,4,989761390,ABERDEEN EIENDOMSFOND NORDEN/BALTIKUM ASA,0255 OSLO
4,5,988671258,ABERDEEN EIENDOMSFOND NORGE II ASA,0255 OSLO


In [22]:
## Now import edges (let's take August 2011)
interlocks8 = pd.read_csv('Directors_boards_2011-08-01.txt', sep=' ', header = None)
interlocks8

Unnamed: 0,0,1
0,1,2149
1,1,2910
2,1,3684
3,1,3754
4,2,766
...,...,...
1741,383,3410
1742,383,4031
1743,384,958
1744,384,4233


In [23]:
# rename columns
interlocks8.rename(columns = {0 :'Company', 1:'Director'}, inplace=True)
interlocks8.head()

Unnamed: 0,Company,Director
0,1,2149
1,1,2910
2,1,3684
3,1,3754
4,2,766


In [24]:
# It turns out that both Directors and Companies are coded with numbers that start at 1
# Rename IDs to avoid confusion
interlocks8['Director'] = 'D' + interlocks8['Director'].astype(str)
interlocks8['Company'] = 'C' + interlocks8['Company'].astype(str)
interlocks8.head()

Unnamed: 0,Company,Director
0,C1,D2149
1,C1,D2910
2,C1,D3684
3,C1,D3754
4,C2,D766


In [25]:
# Let's do the same in attribute datasets
Companies['Company'] = 'C' + Companies['Company'].astype(str)
Companies

Unnamed: 0,Company,Org_nb,Company_name,Postcode_City
0,C1,879447992,24SEVENOFFICE ASA,0667 OSLO
1,C2,990031479,A-COM NORGE ASA,0355 OSLO
2,C3,890687792,ABERDEEN EIENDOMSFOND ASIA ASA,0230 OSLO
3,C4,989761390,ABERDEEN EIENDOMSFOND NORDEN/BALTIKUM ASA,0255 OSLO
4,C5,988671258,ABERDEEN EIENDOMSFOND NORGE II ASA,0255 OSLO
...,...,...,...,...
379,C380,882757692,WILSON ASA,5003 BERGEN
380,C381,985224323,WINTERSHALL NORGE ASA,4008 STAVANGER
381,C382,987778490,YA HOLDING ASA,0369 OSLO
382,C383,986228608,YARA INTERNATIONAL ASA,0257 OSLO


In [26]:
# Let's do the same in attribute datasets
Directors['Director_ID'] = 'D' + Directors['Director_ID'].astype(str)
Directors

Unnamed: 0,Director_ID,Director_Name,Director_Gender
0,D1,Aage Jakobsen,1
1,D2,Aage Johan Remøy,1
2,D3,Aage Rasmus Bjelland Figenschou,1
3,D4,Aagot Irene Skjeldal,2
4,D5,Aase Gundersen,2
...,...,...,...
5762,D5763,Yuhong Jin Hermansen,2
5763,D5764,Yvonne Berntzen,2
5764,D5765,Yvonne Skramstad,2
5765,D5766,Zhong Bei,2


## Create graph

In [27]:
### Create a graph with the August interlocks
interlocks_G = nx.Graph()  # start by generating empty graph

# First add nodes
interlocks_G.add_nodes_from(interlocks8.iloc[:, 0], bipartite=0) # companies
interlocks_G.add_nodes_from(interlocks8.iloc[:, 1], bipartite=1) # persons

print(interlocks_G)

Graph with 1776 nodes and 0 edges


In [29]:
# now add edges (August interlocks)
interlocks_G.add_edges_from([(Company, Director) for (Company, Director)
              in interlocks8[['Company', 'Director']].to_numpy()])
print((interlocks_G))

Graph with 1776 nodes and 1746 edges


In [30]:
# Is the graph bipartite?
nx.is_bipartite(interlocks_G)

True

In [31]:
# check bipartite attribute
nx.get_node_attributes(interlocks_G, "bipartite")

{'C1': 0,
 'C2': 0,
 'C3': 0,
 'C4': 0,
 'C5': 0,
 'C6': 0,
 'C7': 0,
 'C8': 0,
 'C10': 0,
 'C11': 0,
 'C12': 0,
 'C13': 0,
 'C14': 0,
 'C15': 0,
 'C16': 0,
 'C17': 0,
 'C18': 0,
 'C19': 0,
 'C20': 0,
 'C21': 0,
 'C22': 0,
 'C23': 0,
 'C24': 0,
 'C25': 0,
 'C26': 0,
 'C27': 0,
 'C28': 0,
 'C29': 0,
 'C30': 0,
 'C31': 0,
 'C32': 0,
 'C33': 0,
 'C34': 0,
 'C35': 0,
 'C36': 0,
 'C37': 0,
 'C38': 0,
 'C39': 0,
 'C40': 0,
 'C41': 0,
 'C42': 0,
 'C43': 0,
 'C44': 0,
 'C45': 0,
 'C47': 0,
 'C48': 0,
 'C49': 0,
 'C51': 0,
 'C52': 0,
 'C53': 0,
 'C54': 0,
 'C55': 0,
 'C56': 0,
 'C57': 0,
 'C58': 0,
 'C59': 0,
 'C60': 0,
 'C61': 0,
 'C62': 0,
 'C63': 0,
 'C64': 0,
 'C65': 0,
 'C66': 0,
 'C67': 0,
 'C70': 0,
 'C71': 0,
 'C72': 0,
 'C73': 0,
 'C74': 0,
 'C75': 0,
 'C76': 0,
 'C77': 0,
 'C78': 0,
 'C79': 0,
 'C80': 0,
 'C81': 0,
 'C82': 0,
 'C83': 0,
 'C84': 0,
 'C85': 0,
 'C86': 0,
 'C87': 0,
 'C88': 0,
 'C89': 0,
 'C90': 0,
 'C91': 0,
 'C92': 0,
 'C93': 0,
 'C94': 0,
 'C95': 0,
 'C96': 0,
 'C97':

## Basic metrics

In [32]:
# is the graph connected?
nx.is_connected(interlocks_G)

False

In [34]:
# let's extract largest component
interlocks_gG = sorted(nx.connected_components(interlocks_G), key=len, reverse=True)
interlocks_gG0 = interlocks_G.subgraph(interlocks_gG[0])
print((interlocks_gG0))

Graph with 1066 nodes and 1148 edges


In [35]:
# For the following metrics, it is helpful to define two groups of nodes
company_nodes = {n for n, d in interlocks_G.nodes(data=True) if d["bipartite"] == 0} # these are the companies
director_nodes = set(interlocks_G) - company_nodes # directors

In [36]:
# Density (we now go back to the full graph)

# we can calculate it by hand: nb. edges / (company_nodes * director_nodes)
len(interlocks_G.edges())/(len(company_nodes)*len(director_nodes))

0.0034611610549999503

In [37]:
# there is also a pre-defined function for bipartite density
nx.bipartite.density(interlocks_G, company_nodes)
# it requires specifying name of graph and one of the two node groups

0.0034611610549999503

In [39]:
# Low level of density is partly due to presence of isolates
# Let's compute it now on the largest component only
nx.bipartite.density(interlocks_gG0, company_nodes)

0.004548245874685526

In [41]:
# Degree
print(nx.degree(interlocks_G, company_nodes)) # degree of companies
print(nx.degree(interlocks_G, director_nodes)) # degree of directors

[('C88', 3), ('C8', 3), ('C210', 4), ('C222', 5), ('C57', 10), ('C331', 12), ('C174', 7), ('C305', 4), ('C29', 3), ('C11', 7), ('C106', 6), ('C302', 4), ('C227', 11), ('C13', 5), ('C336', 3), ('C108', 6), ('C40', 3), ('C126', 5), ('C42', 5), ('C101', 5), ('C73', 7), ('C368', 7), ('C96', 5), ('C172', 6), ('C85', 5), ('C241', 5), ('C353', 6), ('C39', 7), ('C63', 4), ('C365', 4), ('C252', 3), ('C379', 6), ('C259', 7), ('C326', 3), ('C356', 5), ('C167', 5), ('C26', 3), ('C78', 2), ('C198', 5), ('C282', 4), ('C283', 3), ('C93', 5), ('C132', 6), ('C134', 7), ('C4', 5), ('C273', 5), ('C48', 5), ('C151', 5), ('C100', 5), ('C279', 8), ('C138', 4), ('C307', 5), ('C55', 4), ('C67', 5), ('C248', 4), ('C56', 5), ('C312', 3), ('C86', 5), ('C309', 4), ('C47', 5), ('C231', 5), ('C232', 3), ('C18', 3), ('C142', 5), ('C187', 5), ('C17', 5), ('C229', 5), ('C22', 6), ('C25', 5), ('C342', 3), ('C159', 3), ('C5', 5), ('C99', 3), ('C325', 7), ('C311', 3), ('C354', 6), ('C228', 4), ('C220', 10), ('C144', 5), 

In [42]:
# Clustering coefficient, CC (local)
nx.bipartite.clustering(interlocks_G, company_nodes, mode='dot') 
# same as regular CC but considers second-order neighbors

{'C88': 1.0,
 'C8': 0.1714285714285714,
 'C210': 0.0,
 'C222': 0.11871263656977941,
 'C57': 0.07269345238095237,
 'C331': 0.05601851851851852,
 'C174': 0.0,
 'C305': 0.23214285714285718,
 'C29': 0.0,
 'C11': 0.08841491841491841,
 'C106': 0.1,
 'C302': 0.14583333333333331,
 'C227': 0.0857142857142857,
 'C13': 0.14285714285714285,
 'C336': 0.2,
 'C108': 0.09074074074074073,
 'C40': 0.0,
 'C126': 0.17671957671957672,
 'C42': 0.14285714285714285,
 'C101': 0.12316239316239316,
 'C73': 0.10690235690235689,
 'C368': 0.08927738927738928,
 'C96': 0.10555555555555556,
 'C172': 0.125,
 'C85': 0.09090909090909091,
 'C241': 0.09601139601139601,
 'C353': 0.1,
 'C39': 0.1372377622377622,
 'C63': 0.12698412698412698,
 'C365': 0.09090909090909091,
 'C252': 0.20000000000000004,
 'C379': 0.09166666666666667,
 'C259': 0.0886243386243386,
 'C326': 0.2,
 'C356': 0.0625,
 'C167': 0.1,
 'C26': 0.0,
 'C78': 0.0,
 'C198': 0.1111111111111111,
 'C282': 0.75,
 'C283': 0.0,
 'C93': 0.09814814814814815,
 'C132': 0.1

In [43]:
# Average of local CC
nx.bipartite.average_clustering(interlocks_G, nodes=company_nodes, mode='dot')
# calculation as above

0.10920942816394318

## Projections

In [44]:
# first the graph of companies
company_graph = nx.bipartite.projection.projected_graph(interlocks_G, company_nodes) # does not take into account weights
print((company_graph))

Graph with 355 nodes and 421 edges


In [48]:
# Let's look at the edges
company_graph.edges(data=True)

EdgeDataView([('C88', 'C87', {}), ('C8', 'C176', {}), ('C8', 'C7', {}), ('C8', 'C49', {}), ('C8', 'C322', {}), ('C8', 'C36', {}), ('C222', 'C305', {}), ('C222', 'C303', {}), ('C222', 'C277', {}), ('C222', 'C113', {}), ('C222', 'C28', {}), ('C222', 'C304', {}), ('C222', 'C301', {}), ('C57', 'C166', {}), ('C57', 'C157', {}), ('C57', 'C126', {}), ('C57', 'C109', {}), ('C57', 'C325', {}), ('C57', 'C162', {}), ('C57', 'C232', {}), ('C57', 'C31', {}), ('C331', 'C23', {}), ('C331', 'C356', {}), ('C331', 'C264', {}), ('C305', 'C303', {}), ('C305', 'C277', {}), ('C305', 'C304', {}), ('C305', 'C301', {}), ('C305', 'C300', {}), ('C11', 'C328', {}), ('C11', 'C286', {}), ('C11', 'C85', {}), ('C11', 'C108', {}), ('C11', 'C10', {}), ('C106', 'C241', {}), ('C106', 'C177', {}), ('C302', 'C301', {}), ('C302', 'C300', {}), ('C227', 'C101', {}), ('C227', 'C181', {}), ('C227', 'C43', {}), ('C227', 'C144', {}), ('C13', 'C263', {}), ('C336', 'C338', {}), ('C336', 'C337', {}), ('C108', 'C271', {}), ('C108', '

In [45]:
# let's look at density
nx.density(company_graph)

0.0067000875308347255

In [46]:
# Degree
company_graph.degree()

DegreeView({'C88': 1, 'C8': 5, 'C210': 0, 'C222': 7, 'C57': 8, 'C331': 3, 'C174': 0, 'C305': 6, 'C29': 0, 'C11': 5, 'C106': 2, 'C302': 2, 'C227': 4, 'C13': 1, 'C336': 2, 'C108': 6, 'C40': 0, 'C126': 6, 'C42': 1, 'C101': 5, 'C73': 6, 'C368': 3, 'C96': 2, 'C172': 1, 'C85': 2, 'C241': 3, 'C353': 1, 'C39': 10, 'C63': 2, 'C365': 1, 'C252': 3, 'C379': 2, 'C259': 3, 'C326': 1, 'C356': 1, 'C167': 1, 'C26': 0, 'C78': 0, 'C198': 1, 'C282': 1, 'C283': 0, 'C93': 3, 'C132': 3, 'C134': 0, 'C4': 5, 'C273': 9, 'C48': 4, 'C151': 3, 'C100': 4, 'C279': 8, 'C138': 0, 'C307': 4, 'C55': 6, 'C67': 4, 'C248': 1, 'C56': 3, 'C312': 1, 'C86': 2, 'C309': 0, 'C47': 0, 'C231': 6, 'C232': 6, 'C18': 0, 'C142': 2, 'C187': 4, 'C17': 5, 'C229': 2, 'C22': 5, 'C25': 3, 'C342': 0, 'C159': 0, 'C5': 3, 'C99': 5, 'C325': 1, 'C311': 1, 'C354': 1, 'C228': 2, 'C220': 2, 'C144': 3, 'C89': 9, 'C267': 4, 'C103': 2, 'C107': 0, 'C316': 0, 'C118': 1, 'C75': 1, 'C341': 0, 'C27': 6, 'C181': 3, 'C31': 1, 'C214': 0, 'C36': 8, 'C185': 0, '

In [47]:
# to build a company graph that takes weights into account:
company_graph_2 = nx.bipartite.weighted_projected_graph(interlocks_G, company_nodes, ratio=False)
print((company_graph_2))

Graph with 355 nodes and 421 edges


In [49]:
# Let's look at the edges
company_graph_2.edges(data=True) # this time it takes into account weights

EdgeDataView([('C88', 'C87', {'weight': 3}), ('C8', 'C176', {'weight': 1}), ('C8', 'C7', {'weight': 2}), ('C8', 'C49', {'weight': 1}), ('C8', 'C322', {'weight': 1}), ('C8', 'C36', {'weight': 1}), ('C222', 'C305', {'weight': 1}), ('C222', 'C303', {'weight': 1}), ('C222', 'C277', {'weight': 1}), ('C222', 'C113', {'weight': 1}), ('C222', 'C28', {'weight': 1}), ('C222', 'C304', {'weight': 1}), ('C222', 'C301', {'weight': 1}), ('C57', 'C166', {'weight': 1}), ('C57', 'C157', {'weight': 1}), ('C57', 'C126', {'weight': 1}), ('C57', 'C232', {'weight': 1}), ('C57', 'C109', {'weight': 1}), ('C57', 'C162', {'weight': 1}), ('C57', 'C325', {'weight': 1}), ('C57', 'C31', {'weight': 1}), ('C331', 'C23', {'weight': 1}), ('C331', 'C356', {'weight': 1}), ('C331', 'C264', {'weight': 1}), ('C305', 'C303', {'weight': 2}), ('C305', 'C277', {'weight': 1}), ('C305', 'C304', {'weight': 3}), ('C305', 'C301', {'weight': 1}), ('C305', 'C300', {'weight': 1}), ('C11', 'C328', {'weight': 1}), ('C11', 'C286', {'weight

### Home exercise:

##### 1: Re-do the same analyses with the file used in class, but using the Directors projection.

##### 2: Re-do the same analyses done in class (Companies projection), but with data for a different month.
