# Job Hop Net Data Analysis
## A. Mazzetto
### December 2022

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
# Load the data from file

job_hop_ext_unique = []
job_hop_ext_unique_wt = []
job_hop_ext_unique_weighted = []
firm_sector = []

with open('../data/job-hop-net.dat','r') as file:
    for line in file.readlines():
        source_job, source_sector, destination_job, destination_sector, num_hops, weight_1, weight_2 = line.split('\t')
        job_hop = (source_job, destination_job)
        job_hop_ext_unique.append(job_hop)
        job_hop_ext_unique_weighted.append(job_hop + (dict(weight = float(weight_1), weight_tgt = float(weight_2)),))
        job_hop_ext_unique_wt.append(num_hops)
        firm_sector.append((source_job, source_sector))
        firm_sector.append((destination_job, destination_sector))

In [None]:
# Construct firm sector dictionary
firms_sector = dict(set(firm_sector))

In [None]:
# Create graphs
job_hops_graph = nx.from_edgelist(job_hop_ext_unique, create_using=nx.DiGraph)
nx.set_node_attributes(job_hops_graph, firms_sector, 'sector')
job_hops_graph_wt = nx.from_edgelist(job_hop_ext_unique_weighted, create_using=nx.DiGraph)
nx.set_node_attributes(job_hops_graph_wt, firms_sector, 'sector')

In [None]:
job_hops_graph_wt.edges(data=True)

In [None]:
job_hops_graph.nodes(data=True)

In [None]:
pos = nx.shell_layout(job_hops_graph)
fig = plt.figure(figsize=(3,3))
nx.draw(job_hops_graph,pos,node_color="blue",node_size=10,with_labels = False,font_color="w")

In [None]:
job_hops_graph.degree()

In [None]:
job_hops_graph.is_directed()

In [None]:
job_hops_graph.number_of_nodes()

In [None]:
job_hops_graph.number_of_edges()

In [None]:
# nx.connectivity.is_k_edge_connected(job_hops_graph, 1) # Only for non-directed graph

In [None]:
nx.find_cycle(job_hops_graph)

In [None]:
# nx.dag_longest_path_length(job_hops_graph)

## Turnover: in-degree and out-degree

### In-degree

In [None]:
# To be normalized by number of employees for companies A and B
[job_hops_graph.in_degree()[i] for i in ['companyA','companyB']]

In [None]:
%%script false --no-raise-error
# Hidden to not give away information on number of employees that might be used for reverse engineering

# Normalized by number of employees for companies A and B
[job_hops_graph.in_degree()[i]/firms_number[i] for i in ['companyA','companyB']]

In [None]:
[job_hops_graph_wt.in_degree(weight='weight_tgt')[i] for i in ['companyA', 'companyB']]

### Out-degree

In [None]:
# To be normalized by number of employees for companies A and B
[job_hops_graph.out_degree()[i] for i in ['companyA','companyB']]

In [None]:
%%script false --no-raise-error
# Hidden to not give away information on number of employees that might be used for reverse engineering

# Normalized by number of employees for companies A and B
[job_hops_graph.out_degree()[i]/firms_number[i] for i in ['companyA','companyB']]

In [None]:
[job_hops_graph_wt.out_degree(weight= 'weight_tgt')[i] for i in ['companyA','companyB']]

In [None]:
# How prominent
nx.in_degree_centrality(job_hops_graph)['companyA'], nx.in_degree_centrality(job_hops_graph)['companyB']

In [None]:
# How influential
nx.out_degree_centrality(job_hops_graph)['companyA'], nx.out_degree_centrality(job_hops_graph)['companyB']

## Where it is happening: sub-graph with companies with highest weights

In [None]:
# For the weighted graph, draw the graph with edges with the highest weight (to be updated when the weight is normelised)
idx_edge_ordered = sorted(range(len(job_hop_ext_unique_wt)), key=lambda k: job_hop_ext_unique_wt[k], reverse= True)
# Keep only 20 edges with highest weight
job_hop_ext_unique_weighted_small = [job_hop_ext_unique_weighted[i] for i in idx_edge_ordered[0:20]]
job_hops_graph_wt_small = nx.from_edgelist(job_hop_ext_unique_weighted_small, create_using= nx.DiGraph)

In [None]:
job_hop_ext_unique_weighted_small

In [None]:
pos = nx.shell_layout(job_hops_graph_wt_small)
fig = plt.figure(figsize=(10,10))
nx.draw_networkx(job_hops_graph_wt_small,pos,node_color="blue",node_size=300,with_labels = True,font_color="red")
edge_labels = {i[0:2]: '{:.1E}'.format(i[2]['weight']) for i in job_hops_graph_wt_small.edges(data=True)}
nx.draw_networkx_edge_labels(job_hops_graph_wt_small,pos, edge_labels= edge_labels);

### Innovation and age: degree and closeness centrality

In [None]:
# Age and hiring strategy of a company

In [None]:
# Degree centrality (network breadth)
nx.degree_centrality(job_hops_graph)['companyA'], nx.degree_centrality(job_hops_graph)['companyB']

In [None]:
# Closeness centrality (network depth)
nx.closeness_centrality(job_hops_graph)['companyA'], nx.closeness_centrality(job_hops_graph)['companyB']

In [None]:
# Company B is more innovative

### Aspiration: Google PageRank

In [None]:
# Pagerank
job_hop_pagerank = nx.pagerank(job_hops_graph, alpha= 0.15, max_iter= 100)
job_hop_pagerank['companyA'], job_hop_pagerank['companyB']

In [None]:
job_hop_pagerank_wt = nx.pagerank(job_hops_graph_wt, alpha= 0.15, max_iter= 100, weight= 'weight')
job_hop_pagerank_wt['companyA'], job_hop_pagerank_wt['companyB']

In [None]:
fig = plt.figure(figsize=(10,2))
plt.subplot(1,2,1)
plt.hist(job_hop_pagerank.values())
plt.xlim([0,0.1])
plt.ylim([0,10])
plt.subplot(1,2,2)
plt.hist(job_hop_pagerank_wt.values())
plt.xlim([0,0.1])
plt.ylim([0,10]);

### Fast track to a new job: shortest path

In [None]:
# Stortest paths unweighted
nx.single_source_shortest_path_length(job_hops_graph,source='companyB');

In [None]:
dict(nx.single_target_shortest_path_length(job_hops_graph,target='companyB'))

In [None]:
nx.single_source_shortest_path_length(job_hops_graph,source='companyA');

In [None]:
dict(nx.single_target_shortest_path_length(job_hops_graph,target='companyA'))

In [None]:
nx.shortest_path(job_hops_graph, 'companyA', 'bulgari')

In [None]:
simple_path_gen = nx.all_simple_paths(job_hops_graph, 'companyB', 'pirelli', cutoff= 5)

In [None]:
nx.shortest_path(job_hops_graph, 'companyB', 'bulgari')

In [None]:
# Stortest paths weighted
nx.single_source_dijkstra_path_length(job_hops_graph_wt,source='companyB');

In [None]:
def single_target_dijkstra_path_length(G, target):
    pl_dict = {}
    for n in G.nodes():
        try:
            path_len = len(nx.dijkstra_path(G, source= n, target= target, weight= 'weight')) - 1
            pl_dict.update({n: path_len})
        except nx.NetworkXNoPath:
            None
    return(dict(sorted(pl_dict.items(), key=lambda item: item[1])))

In [None]:
single_target_dijkstra_path_length(job_hops_graph_wt, target= 'companyB')

In [None]:
nx.single_source_dijkstra_path_length(job_hops_graph_wt,source='companyA');

In [None]:
single_target_dijkstra_path_length(job_hops_graph_wt, target= 'companyA')

In [None]:
nx.dijkstra_path(job_hops_graph_wt, 'companyA', 'bulgari')

In [None]:
nx.dijkstra_path(job_hops_graph_wt, 'companyB', 'bulgari')

### Community detection

In [None]:
# Betweenness centrality
nx.edge_betweenness_centrality(job_hops_graph);

In [None]:
from networkx.algorithms.community import girvan_newman

In [None]:
communities_generator = girvan_newman(job_hops_graph)
communities = ()
while len(communities)==0 or max([len(i) for i in communities]) > 10:
    communities = next(communities_generator)
# Drop communities with only one element
communities = tuple(i for i in communities if len(i) > 2)

In [None]:
communities

In [None]:
[[(j,firms_sector[j]) for j in i] for i in communities]

In [None]:
# Useful for the graph, but not used here
community_nodes_set = set()
_ = [community_nodes_set.update(i) for i in communities]

In [None]:
# Assortativity
nx.algorithms.degree_assortativity_coefficient(
    job_hops_graph,
    x= 'out',
    y= 'out',
    nodes= community_nodes_set
)

In [None]:
nx.algorithms.degree_assortativity_coefficient(
    job_hops_graph,
    x= 'in',
    y= 'in',
    nodes= community_nodes_set
)

In [None]:
nx.algorithms.degree_assortativity_coefficient(
    job_hops_graph_wt,
    x= 'out',
    y= 'out',
    nodes= community_nodes_set,
    weight= 'weight'
)

In [None]:
nx.algorithms.degree_assortativity_coefficient(
    job_hops_graph_wt,
    x= 'in',
    y= 'in',
    nodes= community_nodes_set,
    weight= 'weight'
)

In [None]:
# Look at how similar the companies are
nx.algorithms.attribute_assortativity_coefficient(job_hops_graph, 'sector')

### End of file