### Notebook 2 - technology graph description (solution 1)

In [1]:
from networkx.readwrite import json_graph
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np
import warnings
import json
import os

warnings.filterwarnings('ignore')
year_start = 1963
year_end = 1999
graph_folder = '/Users/koshelev/Documents/lmu/thesis/1-graph_construction/data/preprocessed/technology_graphs_1/'
weight = 'Number Citations'
seed = 100

In [2]:
def read_json_file(filename: str) -> nx.Graph:
    with open(filename) as f:
        js_graph = json.load(f)
    return json_graph.node_link_graph(js_graph)

In [3]:
def graph_algebraic_connectivity(graph: nx.DiGraph, 
                                 is_normalized: bool = False, 
                                 weight_attribute: str = None, 
                                 seed: int = seed) -> float:
    alg_con = nx.linalg.algebraic_connectivity(G=graph, normalized=is_normalized, weight=weight_attribute, 
                                               seed=seed)
    return alg_con


def graph_wiener_index(graph: nx.DiGraph, weight_attribute: str = weight) -> float:
    wiener_index = nx.wiener_index(G=graph, weight=weight_attribute)
    return wiener_index

In [4]:
def graph_descr(graph: nx.DiGraph, weight: str) -> dict:
    output_dict = {}
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    average_degree_w = np.nanmean(list(dict(graph.degree(weight=weight)).values()))
    average_degree_nw = np.nanmean(list(dict(graph.degree(weight=None)).values()))
    average_lcc_nw = nx.average_clustering(G=graph, weight=None)
    try:
        average_spl_nw = nx.average_shortest_path_length(G=graph, weight=None)
    except nx.NetworkXError:
        average_spl_nw = np.nan
    algebraic_connect_nw = graph_algebraic_connectivity(graph=graph.to_undirected(), weight_attribute=None)
    average_deg_centr = np.mean(list(nx.degree_centrality(G=graph).values()))
    
    output_dict['num_nodes'] = num_nodes
    output_dict['num_edges'] = num_edges
    output_dict['average_degree_w'] = average_degree_w
    output_dict['average_degree_nw'] = average_degree_nw
    output_dict['average_lcc_nw'] = average_lcc_nw
    output_dict['average_spl_nw'] = average_spl_nw
    output_dict['algebraic_connect_nw'] = algebraic_connect_nw
    output_dict['average_deg_centr'] = average_deg_centr
    
    return output_dict

In [5]:
nodes = []
edges = []
degrees_w = []
degrees_nw = []
alcc_nw = []
spl_nw = []
algebrcon_nw = []
deg_centr = []
years = []

for year in tqdm(range(year_start, year_end + 1)):
    g = read_json_file(graph_folder + f'technology_graph_{int(year)}.json')
    print(year)
    years.append(year)
    output_dict = graph_descr(g, weight=weight)
    nodes.append(output_dict['num_nodes'])
    edges.append(output_dict['num_edges'])
    degrees_w.append(output_dict['average_degree_w'])
    degrees_nw.append(output_dict['average_degree_nw'])
    alcc_nw.append(output_dict['average_lcc_nw'])
    spl_nw.append(output_dict['average_spl_nw'])
    algebrcon_nw.append(output_dict['algebraic_connect_nw'])
    deg_centr.append(output_dict['average_deg_centr'])
    
descr_df = pd.DataFrame()
descr_df['nodes'] = nodes
descr_df['edges'] = edges
descr_df['degrees_w'] = degrees_w
descr_df['degrees_nw'] = degrees_nw
descr_df['alcc_nw'] = alcc_nw
descr_df['spl_nw'] = spl_nw
descr_df['algebrcon_nw'] = algebrcon_nw
descr_df['deg_centr'] = deg_centr
descr_df.index = years

  0%|          | 0/37 [00:00<?, ?it/s]

1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999


In [6]:
descr_df.to_csv('graph_descriptions/technology1.csv', index=True)

In [7]:
descr_df.head()

Unnamed: 0,nodes,edges,degrees_w,degrees_nw,alcc_nw,spl_nw,algebrcon_nw,deg_centr
1963,99,655,2673.818182,13.232323,0.540952,1.108328,0.880071,0.135024
1964,97,646,2937.773196,13.319588,0.554266,1.03125,0.88187,0.138746
1965,108,777,3723.851852,14.388889,0.57951,1.02648,0.8825,0.134476
1966,106,795,4385.735849,15.0,0.59194,1.130638,0.880569,0.142857
1967,106,853,4468.226415,16.09434,0.63169,1.192093,0.908201,0.153279
