# Load networks

In [None]:
%reload_ext autoreload
%autoreload 2
import os,sys
sys.path.insert(1, os.path.join(sys.path[0], '..', 'module'))
import wiki
import numpy as np
import pandas as pd
import networkx as nx

In [None]:
path_networks = '/Users/harangju/Developer/data/wiki/graphs/'

In [None]:
topics = ['anatomy', 'biochemistry', 'cognitive science', 'evolutionary biology',
          'genetics', 'immunology', 'molecular biology', 'chemistry', 'biophysics',
          'energy', 'optics', 'earth science', 'geology', 'meteorology',
          'philosophy of language', 'philosophy of law', 'philosophy of mind',
          'philosophy of science', 'economics', 'accounting', 'education',
          'linguistics', 'law', 'psychology', 'sociology', 'electronics',
          'software engineering', 'robotics',
          'calculus', 'geometry', 'abstract algebra',
          'Boolean algebra', 'commutative algebra', 'group theory', 'linear algebra',
          'number theory', 'dynamical systems and differential equations']

In [None]:
networks = {}
for topic in topics:
    print(topic, end=' ')
    networks[topic] = wiki.Net()
    networks[topic].load_graph(path_networks+'dated/'+topic+'.pickle')

In [None]:
topics += ['physics']
networks['physics'] = wiki.Net()
networks['physics'].load_graph(os.path.join(path_networks, 'dated', 'physics.pickle'))

# Plot

In [None]:
from ipywidgets import interact, widgets, Layout
import plotly
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from IPython.display import display
plotly.offline.init_notebook_mode(connected=True)

In [None]:
path_fig = '/Users/harangju/Box Sync/Research/my papers/wikipedia/results/'
save_fig = False

# Growing networks

In [None]:
comm_t = pd.DataFrame()
for topic, network in networks.items():
    print(topic, end=' ')
    comm_t = pd.concat([comm_t] +
                       [pd.DataFrame([[topic,
                                       node,
                                       network.graph.nodes[node]['year'],
                                       network.graph.nodes[node]['community'],
                                       network.graph.nodes[node]['core_be'],
                                       network.graph.nodes[node]['core_rb'],
                                       1]],
                                     columns=['topic','node','year',
                                              'comm','core_be','core_rb',
                                              'count'])
                        for node in network.graph.nodes],
                       ignore_index=True)
comm_t = comm_t.merge(comm_t.groupby(['topic','comm'])['count'].sum(),
                      on=['topic','comm'],
                      suffixes=('','_topic_comm'))\
               .merge(comm_t.groupby(['topic','core_be'])['count'].sum(),
                      on=['topic','core_be'],
                      suffixes=('','_topic_core_be'))\
               .sort_values(by=['topic','year'])\
               .reset_index(drop=True)
comm_t['comm_count'] = comm_t.groupby(['topic','comm'])['count']\
                             .transform(pd.Series.cumsum)
comm_t['core_be_count'] = comm_t.groupby(['topic','core_be'])['count']\
                                .transform(pd.Series.cumsum)
comm_t['comm_frac'] = comm_t['comm_count']/comm_t['count_topic_comm']
comm_t['core_be_frac'] = comm_t['core_be_count']/comm_t['count_topic_core_be']
comm_t = comm_t.drop(['count','count_topic_comm','count_topic_core_be'], axis=1)

In [None]:
comm_t

In [None]:
community = 1
network = networks['anatomy']
[node for node in network.graph.nodes if network.graph.nodes[node]['community']==community][:3]

In [None]:
communities = set([network.graph.nodes[node]['community'] for node in network.graph.nodes])

In [None]:
import operator

In [None]:
hubs = {}
for topic in topics:
    hubs[topic] = {}
    network = networks[topic]
    communities = set([network.graph.nodes[node]['community'] for node in network.graph.nodes])
    for community in communities:
        subgraph = network.graph.subgraph(
            [node for node in network.graph.nodes if network.graph.nodes[node]['community']==community]
        )
        hub = max(dict(subgraph.degree).items(), key=operator.itemgetter(1))[0]
        hubs[topic][community] = hub

# Modularity

In [None]:
import os

path_plot = '5 modules'

if not os.path.exists(f"{path_fig}/{path_plot}"):
    os.mkdir(f"{path_fig}/{path_plot}")

## Count

In [None]:
import os

if not os.path.exists(f"{path_fig}/{path_plot}/count/"):
    os.mkdir(f"{path_fig}/{path_plot}/count")

In [None]:
for topic in ['physics']: #networks.keys():
    fig = go.Figure()
    data = comm_t[comm_t.topic==topic]
    for i in range(16): #sorted(pd.unique(data.comm)):
        fig.add_trace(
            go.Scatter(
                x=data[data.comm==i]['year'],
                y=data[data.comm==i]['comm_count'],
                mode='lines', name=hubs[topic][i],
            )
        )
    fig.update_layout(template='plotly_white',
                      title_text=topic,
                      xaxis={'range': [0,2100],
                             'title': 'year'},
                      yaxis={'title': '# nodes',
                             'range': [1,np.log10(np.max(data.comm_count))],
                             'type': 'linear'})
    fig.show()
    fig.write_image(f"{path_fig}/{path_plot}/count/{topic}.pdf")

## Growth

In [None]:
import os

if not os.path.exists(f"{path_fig}/{path_plot}/growth/"):
    os.mkdir(f"{path_fig}/{path_plot}/growth")

In [None]:
for topic in ['physics']: #networks.keys():
    fig = go.Figure()
    data = comm_t[comm_t.topic==topic]
    for i in range(16): #sorted(pd.unique(data.comm)):
        fig.add_trace(
            go.Scatter(
                x=data[data.comm==i]['year'],
                y=i*np.ones(len(data[data.comm==i].index)),
                mode='markers', name=hubs[topic][i],
            )
        )
    fig.update_layout(template='plotly_white',
                      title_text=topic,
                      showlegend=False,
                      xaxis={'range': [0,2100],
                             'title': 'year'},
                      yaxis={'title': 'communities (by hubs)',
                             'range': [-1,i+1]})
    fig.update_yaxes(
        tickvals=list(range(16)),
        ticktext=[hubs[topic][i] for i in range(0,16)]
    )
    fig.show()
    fig.write_image(f"{path_fig}/{path_plot}/growth/{topic}.pdf")

In [None]:
years

In [None]:
for topic in networks.keys():
    fig = go.Figure()
    data = comm_t[comm_t.topic==topic]
    for i in range(10): #sorted(pd.unique(data.comm)):
        years = data[data.comm==i].year
        x = list(range(min(years)-1, max(years)))
        y = np.zeros(len(x))
        for year in years:
            y[year-min(years)] += 1
        fig.add_trace(
            go.Scatter(x=x, y=y, mode='lines', name=hubs[topic][i])
        )
    fig.update_layout(template='plotly_white',
                      title_text=topic,
                      xaxis={'range': [0,2100],
                             'title': 'year'},
                      yaxis={'title': 'growth'})
    fig.show()
    fig.write_image(f"{path_fig}/{path_plot}/growth/{topic}.pdf")

# Change in community

Alternative conceptions to scientific revolution [link](https://plato.stanford.edu/entries/scientific-revolutions/#SomAltConSciRev)

> a prototype for revolutionary reorientation in the sciences. Just because it did not involve the introduction of additional objects or concepts, the transition from Newtonian to Einsteinian mechanics illustrates with particular clarity the scientific revolution as a displacement of the conceptual network through which scientists view the world. (Kuhn, 1970, 102)

Perhaps we can frame paradigm shifts as changes to the organization of knowledge as communities.

In [None]:
import os

path_plot = '5 modules'

if not os.path.exists(f"{path_fig}/{path_plot}"):
    os.mkdir(f"{path_fig}/{path_plot}")

In [None]:
import os

if not os.path.exists(f"{path_fig}/{path_plot}/community"):
    os.mkdir(f"{path_fig}/{path_plot}/community")

In [None]:
import plotly.express as px

px.colors.qualitative.Plotly[:3]

In [None]:
from networkx.algorithms.community import greedy_modularity_communities

## Multilayer

In [None]:
def compute_multinet(g):
    multinet = nx.DiGraph()
    years = sorted(nx.get_node_attributes(g, 'year').values())
    for i, year in enumerate(years):
        nodes = [node for node in g.nodes if g.nodes[node]['year']<=year]
        subgraph = nx.subgraph(g, nodes)
        multinet.add_nodes_from(
            [(f"{n}_{year}", {'year': g.nodes[n]['year']}) for n in subgraph.nodes]
        )
        multinet.add_edges_from(
            [(f"{s}_{year}", f"{t}_{year}") for s,t in subgraph.edges]
        )
        if i>0:
            prev_nodes = [
                node for node in g.nodes if g.nodes[node]['year']<=years[i-1]
            ]
            multinet.add_edges_from(
                [(f"{n}_{years[i-1]}", f"{n}_{year}") for n in prev_nodes],
                weight=0.0001
            )
    return multinet

In [None]:
topics = ['cognitive science']

In [None]:
multinets = {}
for topic in topics:
    multinets[topic] = compute_multinet(graph)

In [None]:
multicomms = {}
for topic in topics:
    multicomms[topic] = greedy_modularity_communities(nx.Graph(multinets[topic]))

In [None]:
for topic in [topic]: #topics:
    fig = go.Figure()
    graph = networks[topic].graph
    years = sorted(nx.get_node_attributes(graph, 'year').values())
    nodes = [n for y in years for n in graph.nodes if graph.nodes[n]['year']==y]
    for i, c in enumerate(multicomms[topic]):
        x = []
        y = []
        for node in c:
            name, year = node.split('_', 1)
            x.append(years.index(int(year)))
            y.append(nodes.index(name))
        fig.add_trace(
            go.Scatter(
                x=x, y=y,
                mode='markers',
                marker={'color': px.colors.qualitative.Plotly[i%10]},
                name=i
            )
        )
    fig.update_yaxes(ticktext=[], tickvals=[])
    fig.update_layout(
        template='plotly_white',
        title_text=topic,
        xaxis={'title': 'time'},
        yaxis={'title': 'nodes'}
    )
    fig.show()
# fig.write_image(f"{path_fig}/{path_plot}/community/{topic}.pdf")

## By layer

In [None]:
def compute_comm_by_layer(graph):
    comms = []
    years = sorted(nx.get_node_attributes(graph, 'year').values())
    for i, year in enumerate(years):
        nodes = [n for n in graph.nodes if graph.nodes[n]['year']<=year]
        if len(nodes)<3:
            comms += [[nodes]]
        else:
            subgraph = nx.subgraph(graph, nodes)
            comms += [greedy_modularity_communities(nx.Graph(subgraph))]
    return comms, years

In [None]:
for topic in ['cognitive science']: #topics:
    fig = go.Figure()
    graph = networks[topic].graph
    comms, years = compute_comm_by_layer(graph)
    nodes = [n for y in years for n in graph.nodes if graph.nodes[n]['year']==y]
    groups = []
    for i, year in enumerate(years):
        for j, c in enumerate(comms[i]):
            
            fig.add_trace(
                go.Scatter(
                    x=i*np.ones(len(c)), y=[nodes.index(n) for n in c],
                    mode='markers', name=j
                )
            )
    fig.update_yaxes(ticktext=[], tickvals=[])
    fig.update_layout(
        template='plotly_white',
        title_text=topic,
        xaxis={'title': 'time'},
        yaxis={'title': 'nodes'}
    )
    fig.show()
# fig.write_image(f"{path_fig}/{path_plot}/community/{topic}.pdf")

## Leiden

[link](http://netwiki.amath.unc.edu/GenLouvain/GenLouvain)
[leidenalg](https://leidenalg.readthedocs.io/en/latest/intro.html)

In [None]:
import scipy as sp
import leidenalg as la
import igraph as ig
import pickle

In [None]:
def networkx_to_igraph(nx_graph, vertex_id=None):
    nodes = list(nx_graph.nodes)
    ig_graph = ig.Graph()
    ig_graph.add_vertices(list(range(len(nodes))))
    ig_graph.vs['name'] = nodes
    ig_graph.vs['year'] = [nx_graph.nodes[n]['year'] for n in nodes]
    ig_graph.add_edges([
        (nodes.index(s), nodes.index(t)) for s,t in nx_graph.edges
    ])
    ig_graph.es['weight'] = [nx_graph.edges[s,t]['weight'] for s,t in nx_graph.edges]
    if vertex_id:
        ig_graph.vs['id'] = vertex_id
    return ig_graph

In [None]:
g = networkx_to_igraph(networks['earth science'].graph)
partition = la.find_partition(g, la.ModularityVertexPartition)
layout = g.layout('circle')
ig.plot(
    partition, layout=layout, bbox=(500, 500), margin=50,
    vertex_size=5, vertex_label_size=10,
    edge_width=0.1, edge_curved=True
)

In [None]:
graph = networks['earth science'].graph
nodes = list(graph.nodes)
years = sorted(nx.get_node_attributes(graph, 'year').values())
nodes_by_year = [
    [n for n in nodes if graph.nodes[n]['year']<=year]
    for year in years
]
membership, improvement = la.find_partition_temporal(
    [
        networkx_to_igraph(
            nx.subgraph(graph, nodes_by_year[i]),
            [nodes.index(n) for n in nodes_by_year[i]]
        )
        for i, year in enumerate(years)
    ],
    la.ModularityVertexPartition,
    interslice_weight=1,
)
membership[:3], improvement

In [None]:
len(networks['evolutionary biology'].graph.nodes)

In [None]:
Cjrs = 0.1

In [None]:
import os

if not os.path.exists(os.path.join(path_fig, path_plot, 'community', f"{Cjrs}")):
    os.mkdir(os.path.join(path_fig, path_plot, 'community', f"{Cjrs}"))

In [None]:
memberships = {}
improvements = {}
for topic in topics:
    fig = go.Figure()
    graph = networks[topic].graph
    nodes = list(graph.nodes)
    sorted_nodes = sorted(
        nodes,
#         key=lambda item: membership[-1][nodes_by_year[-1].index(item)]
        key=lambda node: graph.nodes[node]['year']
    )
    years = sorted(nx.get_node_attributes(graph, 'year').values())
    nodes_by_year = [
        [n for n in nodes if graph.nodes[n]['year']<=year]
        for year in years
    ]
    memberships[topic], improvements[topic] = la.find_partition_temporal(
        [
            networkx_to_igraph(
                nx.subgraph(graph, nodes_by_year[i]),
                [nodes.index(n) for n in nodes_by_year[i]]
            )
            for i, year in enumerate(years)
        ],
        la.ModularityVertexPartition,
        interslice_weight=Cjrs,
    )
    pickle.dump(
        (memberships, improvements),
        open(
            os.path.join(path_fig, path_plot, 'community', f"{Cjrs}", 'memberships.pickle'),
            'wb'
        )
    )
    partitions = sorted(list(set([j for i in memberships[topic] for j in i])))
    for part in partitions:
        xy = [
            (i, sorted_nodes.index(n))
            for i, ns in enumerate(nodes_by_year)
            for j, n in enumerate(ns)
            if memberships[topic][i][j]==part
        ]
        fig.add_trace(
            go.Scatter(
                x=[i[0] for i in xy],
                y=[i[1] for i in xy],
                mode='markers',
                marker={'size': 3},
                name=part
            )
        )
    fig.update_yaxes(ticktext=[], tickvals=[])
    fig.update_xaxes(
        tickvals=list(range(len(years)))[0::int(len(years)/10)],
        ticktext=years[0::int(len(years)/10)]
    )
    fig.update_layout(
        template='plotly_white',
        title_text=topic,
        xaxis={'title': 'years'},
        yaxis={'title': 'nodes'}
    )
    fig.show()
    fig.write_image(
        os.path.join(path_fig, path_plot, 'community', f"{Cjrs}", f"{topic}.pdf")
    )

Paradigm shift measured by how much the addition of a node changes the existing community structure.

### Plot raw

### Measure shifts

### Plot shifts