# Load networks

In [None]:
%reload_ext autoreload
%autoreload 2
import os,sys
sys.path.insert(1, os.path.join(sys.path[0], '..', 'module'))
import wiki
import numpy as np
import pandas as pd
import networkx as nx
import scipy as sp

In [None]:
import plotly.express as px
import plotly.graph_objects as go

In [None]:
topics = ['anatomy', 'biochemistry', 'cognitive science', 'evolutionary biology',
          'genetics', 'immunology', 'molecular biology', 'chemistry', 'biophysics',
          'energy', 'optics', 'earth science', 'geology', 'meteorology',
          'philosophy of language', 'philosophy of law', 'philosophy of mind',
          'philosophy of science', 'economics', 'accounting', 'education',
          'linguistics', 'law', 'psychology', 'sociology', 'electronics',
          'software engineering', 'robotics',
          'calculus', 'geometry', 'abstract algebra',
          'Boolean algebra', 'commutative algebra', 'group theory', 'linear algebra',
          'number theory', 'dynamical systems and differential equations']

In [None]:
path_saved = os.path.join('/','Users','harangju','Developer',
                          'data','wiki','graphs','dated')

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync',
                        'Research','my papers','wikipedia','results')
save_fig = False

In [None]:
path_plot = '4 fields'

In [None]:
# topics = ['earth science']

In [None]:
import wiki

networks = {}
for topic in topics:
    print(topic, end=' ')
    networks[topic] = wiki.Net(
        path_graph=os.path.join(path_saved, topic + '.pickle'),
        path_barcodes=os.path.join(path_saved, topic + '.barcode')
    )

In [None]:
path_null = '/Users/harangju/Developer/data/wiki/graphs/null-target/'
num_nulls = 10
null_targets = {}
for topic in topics:
    print(topic, end=' ')
    null_targets[topic] = [None for i in range(num_nulls)]
    for i in range(num_nulls):
        null_targets[topic][i] = wiki.Net(path_graph=path_null + topic + '-null-' + str(i) + '.pickle',
                                          path_barcodes=path_null + topic + '-null-' + str(i) + '.barcode')

In [None]:
path_null = '/Users/harangju/Developer/data/wiki/graphs/null-year/'
num_nulls = 10
null_years = {}
for topic in topics:
    print(topic, end=' ')
    null_years[topic] = [None for i in range(num_nulls)]
    for i in range(num_nulls):
        null_years[topic][i] = wiki.Net(path_graph=path_null + topic + '-null-' + str(i) + '.pickle',
                                        path_barcodes=path_null + topic + '-null-' + str(i) + '.barcode')

# Network statistics

In [None]:
import pickle
import pandas as pd
path_analysis = '/Users/harangju/Developer/data/wiki/analysis/'
df = pickle.load(open(path_analysis+'stats.pickle', 'rb'))
df_expand = pickle.load(open(path_analysis+'stats_expand.pickle', 'rb'))
df.topic = df.topic.astype('object')
df.measure = df.measure.astype('object')
df_expand.topic = df_expand.topic.astype('object')
df_expand.measure = df_expand.measure.astype('object')

In [None]:
df_mean = df_expand\
    .groupby(['topic', 'measure'], as_index=False)\
    .mean()\
    .pivot(index='topic', columns='measure', values='value')\
    .reset_index()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_mean['coreness-null-target'],
                         y=df_mean['coreness'],
                         mode='markers+text',
                         name='coreness',
                         text=df_mean['topic'],
                         textposition='top left'))
fig.add_trace(go.Scatter(x=[0,1], y=[0,1],
                         mode='lines',
                         line=dict(dash='dash'),
                         name='1:1'))
fig.update_layout(template='plotly_white',
                  title='coreness',
                  width=800, height=800,
                  xaxis=dict(title='null',
                             range=[.3,.8]),
                  yaxis=dict(title='real',
                             range=[.5,1],
                             scaleanchor='x',
                             scaleratio=1))
fig.show()

In [None]:
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'coreness.pdf'))

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_mean['modularity-null-target'],
                         y=df_mean['modularity'],
                         mode='markers+text',
                         name='modularity',
                         text=df_mean['topic'],
                         textposition='bottom right'))
fig.add_trace(go.Scatter(x=[0,1], y=[0,1],
                         mode='lines',
                         line=dict(dash='dash'),
                         name='1:1'))
fig.update_layout(template='plotly_white',
                  title='modularity',
                  width=900, height=900,
                  xaxis=dict(title='null',
                             range=[.1,.7]),
                  yaxis=dict(title='real',
                             range=[.1,.7],
                             scaleanchor='x',
                             scaleratio=1))
fig.show()

In [None]:
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'modularity.pdf'))

## Organizing along network statistics

In [None]:
def distance_to_line(a,b,c,x,y): # ax+by+c=0, (x,y)
    return abs(a*x + b*y+ c) / np.sqrt(a**2 + b**2)

def closest_point_on_line_to_point(m,b,x,y): # y=mx+b, (x,y)
    _x = (y + m*x - b)/(2*m)
    _y = m*_x + b
    return _x, _y

def distance_along_closest_point(m,b,x,y):
    _x, _y = closest_point_on_line_to_point(m,b,x,y)
    return np.sqrt(_x**2 + _y**2)

In [None]:
df_mean = df_mean.set_index('topic')

In [None]:
coreness_line = sp.stats.linregress(df_mean['coreness-null-target'], df_mean['coreness'])
coreness_line

In [None]:
network_stats = pd.DataFrame(
    {
        'topic': topics,
        'modularity (adjusted)': 
            [distance_to_line(-1,1,0,
                              df_mean.loc[t]['modularity-null-target'],
                              df_mean.loc[t]['modularity'])
             for t in topics],
        'coreness (adjusted)': 
            [distance_along_closest_point(coreness_line[0], coreness_line[1],
                                          df_mean.loc[t]['coreness-null-target'],
                                          df_mean.loc[t]['coreness'])
                     for t in topics]
    }
)

In [None]:
sp.stats.linregress(network_stats['modularity (adjusted)'], network_stats['coreness (adjusted)'])

In [None]:
# network_stats.sort_values('modularity (adjusted)', axis=0, ascending=False)
# network_stats.sort_values('coreness', axis=0, ascending=False)
fig = go.Figure()
fig.add_trace(go.Scatter(x=network_stats['modularity (adjusted)'],
                         y=network_stats['coreness (adjusted)'],
                         text=network_stats['topic'],
                         mode='markers'))
fig.update_layout(template='plotly_white',
                  xaxis={'title': 'modularity (adjusted)'},
                  yaxis={'title': 'coreness (adjusted)'})
fig.show()

In [None]:
network_stats.sort_values('modularity (adjusted)', ascending=False)['topic'].values,\
network_stats.sort_values('coreness (adjusted)', ascending=False)['topic'].values

In [None]:
pd.DataFrame({'ordered by adjusted modularity':
                  network_stats.sort_values('modularity (adjusted)', ascending=False)['topic'].values,
              'ordered by adjusted coreness':
                  network_stats.sort_values('coreness (adjusted)', ascending=False)['topic'].values})

# Cavity statistics

More dense connections
In harder sciences?

In [None]:
import os
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
# path_fig = '/Users/harangju/Box Sync/Research/my papers/wikipedia/results/'

In [None]:
barcodes = pd.concat([network.barcodes.assign(topic=topic)\
                                      .assign(type='real')\
                                      .assign(null=0)
                      for topic, network in networks.items()] +
                     [network.barcodes.assign(topic=topic)\
                                      .assign(type='targets')\
                                      .assign(null=i)
                      for topic, nulls in null_targets.items()
                          for i, network in enumerate(nulls)] +
                     [network.barcodes.assign(topic=topic)\
                                      .assign(type='years')\
                                      .assign(null=i)
                      for topic, nulls in null_years.items()
                          for i, network in enumerate(nulls)],
                     ignore_index=True, sort=False)
barcodes = barcodes[barcodes.lifetime!=0]

In [None]:
pd.options.display.max_rows = 5

In [None]:
null_count = barcodes\
    .groupby(['type','topic','dim'], as_index=False)['null'].max()
null_count.null = null_count.null + 1
null_count

In [None]:
dim_count = barcodes\
    .assign(count=1)\
    .groupby(['type','topic','dim'], as_index=False)['count'].sum()\
    .sort_values('type', axis=0, ascending=True)
dim_count

In [None]:
dims = pd\
    .merge(dim_count, null_count, how='left', left_on=['type','topic','dim'], right_on=['type','topic','dim'])\
    .sort_values(by=['type','topic','dim'])\
    .reset_index(drop=True)\
    .rename(columns={'count': 'dim_count', 'null': 'null_count'})
dims['dim_count_norm'] = dims['dim_count'] / dims.null_count
dims

In [None]:
fig = px.box(dims[dims.type=='real'], x='dim', y='dim_count_norm')
for topic in pd.unique(dims['topic']):
    data = dims[(dims['type']=='real') & (dims['topic']==topic)].sort_values(by='dim')
    fig.add_trace(go.Scatter(
        x=data['dim'], y=data['dim_count_norm'], name=topic,
        mode='markers+lines', visible='legendonly'
    ))
fig.update_layout(template='plotly_white',
                  title_text='Dimensionality',
                  yaxis={'range': [0,10000]})
fig.update_traces(marker={'size': 3}, line={'width': 1})
fig.show()
# fig.write_image(os.path.join(path_fig, path_plot, 'dimensionality_real.pdf'))

In [None]:
pd.options.display.max_rows = 10
dim_stats = pd.DataFrame(dims\
    .groupby(['type','topic'])['dim_count_norm'].idxmax())\
    .reset_index()\
    .rename(columns={'dim_count_norm': 'dim_mode_idx'})
dim_stats['mode'] = dims.iloc[dim_stats['dim_mode_idx']]['dim'].values
dim_stats = dim_stats.drop('dim_mode_idx', axis=1)
dim_stats

In [None]:
avg = dims\
    .assign(dimXcount=dims['dim'] * dims['dim_count_norm'])\
    .groupby('topic', as_index=False)[['dim_count_norm','dimXcount']].sum()\
    .rename(columns={'dim_count_norm': 'dim_count_norm_sum'})
dim_stats['mean'] = avg['dimXcount'] / avg['dim_count_norm_sum']

In [None]:
pd.options.display.max_rows = 111
dim_stats[dim_stats['type']=='real']\
    .sort_values(['mean'])\
    .reset_index()\
    .drop(['type','index'], axis=1)

Can we say that the dimensionality of cavities reveals the **complexity of the information**?

# Big network

In [None]:
big_net = wiki.Net(path_graph=os.path.join(path_saved, 'big_network.pickle'))

In [None]:
A = nx.adjacency_matrix(big_net.graph)
A

In [None]:
big_net.graph['Commercial law']['Unfair competition']

In [None]:
big_net.graph['Unfair competition']

In [None]:
list(big_net.graph.nodes).index('Commercial law'), list(big_net.graph.nodes).index('Unfair competition')

In [None]:
A[0,1], A[1,0]

Notation: `a[i,j]` means that `i` points to `j` and that page `i` is linked to from page `j` in Wikipedia

# Births

In [None]:
pd.DataFrame(
    {
        'topic': topics,
        'year (mean)': [np.mean(list(nx.get_node_attributes(network.graph, 'year').values()))
                        for network in networks.values()],
        'year (std)': [np.std(list(nx.get_node_attributes(network.graph, 'year').values()))
                       for network in networks.values()]
    }
).sort_values(['year (mean)'], ignore_index=True)

# Controllability

Is there a spectrum of controllability in nodes & topics (as summarized over nodes) from "pure" to "applied" fields?

Make sure to check outdegree.

In [None]:
import scipy as sp
# import control

def gramians(A, M):
    '''
    
    Parameters
    ----------
    A: scipy.sparse.csc_matrix or csr_matrix
        turns csr_matrix into csc_matrix
        A[i,j] should have j->i
    M: int
    
    Returns
    -------
    CG, OG: scipy.sparse.csc_matrix
        controllability & observability Gramians
    '''
    if isinstance(A, sp.sparse.csr_matrix):
        A = A.transpose()
    val, vec = sp.sparse.linalg.eigs(A.transpose())
    # pre-calculate A^m and (A^T)^m
    Anorm = A / (1 + val[0])
    AnormT = Anorm.transpose().tocsc()
    Am = [sp.sparse.identity(A.shape[0], dtype=np.float64, format='csc'), Anorm]
    ATm = [sp.sparse.identity(A.shape[0], dtype=np.float64, format='csc'), AnormT]
    for m in range(2,M+1):
        Am += [Am[-1] * Anorm]
        ATm += [ATm[-1] * AnormT]
    # calculate controllability & observability Gramians
    CG = Am[0] * ATm[0]
    OG = ATm[0] * Am[0]
    for m in range(1,M+1):
        print('G ' + str(m))
        CG += Am[m] * ATm[m]
        OG += ATm[m] * Am[m]
    return CG, OG

## Calculate Gramians

In [None]:
nodes = list(big_net.graph.nodes)
grams = pd.DataFrame({'node': nodes})

In [None]:
M = 5
for m in range(1,M+1):
    print(f"m={m}")
    CG, OG = gramians(A, m)
    grams[f"CG_{m}"] = CG.diagonal()
    grams[f"OG_{m}"] = OG.diagonal()

In [None]:
del CG, OG

In [None]:
grams = grams.set_index('node')

In [None]:
grams

## Save

In [None]:
import pickle
path_save = os.path.join('/','Users','harangju','Developer','data','wiki','analysis')
pickle.dump(grams, open(os.path.join(path_save, 'grams.pickle'), 'wb'))

## Load

In [None]:
import pickle
path_save = os.path.join('/','Users','harangju','Developer','data','wiki','analysis')
grams = pickle.load(open(os.path.join(path_save, 'grams.pickle'), 'rb'))

In [None]:
pd.options.display.max_rows = 100
grams

## Controllability statistics

In [None]:
M=5

### Node statistics

In [None]:
num_rows = 20
pd.DataFrame(
    np.concatenate(
        [[grams.sort_values(f"OG_{m}", ascending=False).iloc[0:num_rows].index.values]
         for m in range(1,M+1)],
        axis=0
    ).transpose(),
    columns=[f"OG_{m}" for m in range(1,M+1)]
)

In [None]:
# pd.options.display.max_rows = 100
num_rows = 20
pd.DataFrame(
    np.concatenate(
        [[grams.sort_values(f"CG_{m}", ascending=False).iloc[0:num_rows].index.values]
         for m in range(1,M+1)],
        axis=0
    ).transpose(),
    columns=[f"CG_{m}" for m in range(1,M+1)]
)

### Node statistics averaged in topics

In [None]:
grams_topics = pd.DataFrame()
for topic in topics:
    vals = {key: 0 for key in grams.columns.values}
    for key in vals:
        vals[key] = np.mean([grams.loc[node][key] for node in networks[topic].graph.nodes])
    grams_topics = pd.concat([grams_topics,
                              pd.DataFrame([[topic] + [v for k,v in vals.items()]], 
                                           columns=['topic']+list(vals.keys()))
                             ])
grams_topics = grams_topics.set_index('topic')

In [None]:
OG = pd.DataFrame(
    np.concatenate(
        [[grams_topics.sort_values(f"OG_{m}", ascending=False).index.values]
         for m in range(1,M+1)],
        axis=0
    ).transpose(),
    columns=[f"OG_{m}" for m in range(1,M+1)]
)
OG

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research','my papers','wikipedia','results')
path_plot = '4 fields'
OG.to_csv(os.path.join(path_fig, path_plot, 'OG.csv'))

In [None]:
CG = pd.DataFrame(
    np.concatenate(
        [[grams_topics.sort_values(f"CG_{m}", ascending=False).index.values]
         for m in range(1,M+1)],
        axis=0
    ).transpose(),
    columns=[f"CG_{m}" for m in range(1,M+1)]
)
CG

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research','my papers','wikipedia','results')
path_plot = '4 fields'
CG.to_csv(os.path.join(path_fig, path_plot, 'CG.csv'))

## Cavity vs controllability

In [None]:
z = pd.DataFrame({'topic': topics,
                  'weights_mean': [np.mean([big_net.graph[n][s]['weight']
                                            for n in networks[topic].graph.nodes
                                            for s in big_net.graph[n]])
                                   for topic in topics],
                  'weights_sum': [np.sum([big_net.graph[n][s]['weight']
                                            for n in networks[topic].graph.nodes
                                            for s in big_net.graph[n]])
                                   for topic in topics]
                 })
z

In [None]:
y = np.array([grams_topics.loc[t]['CG_5'] for t in topics]).astype(np.float64)#.reshape((-1,1))
x = dim_stats[dim_stats['type']=='real']\
    .sort_values(['mean'])\
    .reset_index()\
    .drop(['type','index'], axis=1)\
    .set_index('topic')
x = np.array([x.loc[t]['mean'] for t in topics])

In [None]:
x_y = sp.stats.linregress(x, y)
z_y_mean = sp.stats.linregress(z['weights_mean'].values, y)
z_y_sum = sp.stats.linregress(z['weights_sum'].values, y)

In [None]:
x_y, z_y_mean, z_y_sum

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y,
                         mode='markers', 
                         name='Fields',
                         hovertext=topics))
fig.add_trace(go.Scatter(x=np.linspace(min(x), max(x), num=100),
                         y=np.linspace(min(x), max(x), num=100) * x_y[0] + x_y[1],
                         mode='lines',
                         name=f"R^2={x_y[2]:.2f}, p={x_y[3]:.1e}"))
fig.update_layout(template='plotly_white',
                  yaxis={'title': 'mean impulse response'},
                  xaxis={'title': 'mean cavity dimension'})
fig.show()

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research',
                        'my papers','wikipedia','results')
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'cavity_dim_vs_ctrb.pdf'))

This result suggests that the more complex the relationships are defined between knowledge in a field the more influential that field is. I guess here, the controllability is averaged across nodes in a field, but I should try with the topic-summarized network. But I expect the same result to hold true--that the higher the dimensionality of knowledge, the more impact it has on the rest of the knowledge network. See the fields with low dimensionality. Those fields are 'biophysics'...

## Node stats

In [None]:
node_stats = pd.DataFrame()
for node in big_net.graph.nodes:
    node_stats = pd.concat(
        [
            node_stats, 
            pd.DataFrame(
                [[
                    node,
                    len(list(big_net.graph.successors(node))),
                    [
                        big_net.graph.edges[node,t]['weight'] 
                        for t in big_net.graph.successors(node)
                    ],
                    len(
                        list(big_net.graph.successors(node)) + \
                        list(big_net.graph.predecessors(node))
                    ),
                    nx.clustering(big_net.graph, node)
                ]],
                columns=[
                    'node',
                    'outdegree',
                    'weighted_outdegree',
                    'degree',
                    'clustering'
                ]
            )
        ],
        ignore_index=True
    )
node_stats = node_stats.set_index('node')
node_stats

### Controllability vs outdegree

In [None]:
outdeg_ir1 = sp.stats.linregress(
    [node_stats.loc[n].outdegree for n in big_net.graph.nodes],
    [float(grams.loc[n].CG_1) for n in big_net.graph.nodes]
)
outdeg_ir2 = sp.stats.linregress(
    [node_stats.loc[n].outdegree for n in big_net.graph.nodes],
    [float(grams.loc[n].CG_2) for n in big_net.graph.nodes]
)
outdeg_ir3 = sp.stats.linregress(
    [node_stats.loc[n].outdegree for n in big_net.graph.nodes],
    [float(grams.loc[n].CG_3) for n in big_net.graph.nodes]
)
outdeg_ir4 = sp.stats.linregress(
    [node_stats.loc[n].outdegree for n in big_net.graph.nodes],
    [float(grams.loc[n].CG_4) for n in big_net.graph.nodes]
)
outdeg_ir5 = sp.stats.linregress(
    [node_stats.loc[n].outdegree for n in big_net.graph.nodes],
    [float(grams.loc[n].CG_5) for n in big_net.graph.nodes]
)

In [None]:
outdeg_ir1, outdeg_ir2, outdeg_ir3, outdeg_ir4, outdeg_ir5

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=[node_stats.loc[n].outdegree for n in big_net.graph.nodes],
        y=[float(grams.loc[n].CG_5) for n in big_net.graph.nodes],
        mode='markers'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    title=f"{outdeg_ir5.slope:.2f}x+{outdeg_ir5.intercept:.2f}; "+\
          f"r={outdeg_ir5.rvalue:.2f}, p={outdeg_ir5.pvalue:.2e}",
    xaxis={'title': 'mean outdegree'},
    yaxis={'title': 'mean IR'}
)
fig.show()

### Controllability vs degree

In [None]:
deg_ir5 = sp.stats.linregress(
    [node_stats.loc[n].degree for n in big_net.graph.nodes],
    [float(grams.loc[n].CG_5) for n in big_net.graph.nodes]
)
deg_ir5

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=[degree.loc[n].degree for n in big_net.graph.nodes],
        y=[float(grams.loc[n].CG_5) for n in big_net.graph.nodes],
        mode='markers'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    title=f"{deg_ir5.slope:.2f}x+{deg_ir5.intercept:.2f}; "+\
          f"r={deg_ir5.rvalue:.2f}, p={deg_ir5.pvalue:.2e}",
    xaxis={'title': 'degree'},
    yaxis={'title': 'IR'}
)
fig.show()

### Controllability vs clustering

In [None]:
clstr_ir5 = sp.stats.linregress(
    [node_stats.loc[n].clustering for n in big_net.graph.nodes],
    [float(grams.loc[n].CG_5) for n in big_net.graph.nodes]
)
clstr_ir5

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=[node_stats.loc[n].clustering for n in big_net.graph.nodes],
        y=[float(grams.loc[n].CG_5) for n in big_net.graph.nodes],
        mode='markers'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    title=f"{clstr_ir5.slope:.2f}x+{clstr_ir5.intercept:.2f}; "+\
          f"r={clstr_ir5.rvalue:.2f}, p={clstr_ir5.pvalue:.2e}",
    xaxis={'title': 'degree'},
    yaxis={'title': 'IR'}
)
fig.show()

## Cavity vs outdegree

In [None]:
cav_dim = dim_stats[dim_stats['type']=='real']\
    .sort_values(['mean'])\
    .reset_index()\
    .drop(['type','index'], axis=1)\
    .set_index('topic')

In [None]:
cav_dim.loc['anatomy']['mean']

In [None]:
cav_dim_means = [cav_dim.loc[t]['mean'] for t in topics]

In [None]:
outdegree.loc['Water']

In [None]:
out = [
    np.mean([node_stats.loc[n].outdegree for n in networks[t].graph.nodes])
    for t in topics
]
outdeg_dim = sp.stats.linregress(cav_dim_means, out)
outdeg_dim

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=cav_dim_means,
        y=out,
        mode='markers', 
        name='Fields',
        hovertext=topics
    )
)
x = np.linspace(min(cav_dim_means), max(cav_dim_means), 1000)
fig.add_trace(
    go.Scatter(
        x=x, y=outdeg_dim.slope*x + outdeg_dim.intercept,
        mode='lines'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    title=f"{outdeg_dim.slope:.2f}x+{outdeg_dim.intercept:.2f}\n"+\
          f"r={outdeg_dim.rvalue:.2f}, p={outdeg_dim.pvalue:.3f}",
    xaxis={'title': 'mean cavity dimension'},
    yaxis={'title': 'mean outdegree'}
)
fig.show()

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research',
                        'my papers','wikipedia','results')
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'cavity_dim_vs_out.pdf'))

In [None]:
deg = [
    np.mean([node_stats.loc[n].degree for n in networks[t].graph.nodes])
    for t in topics
]
deg_dim = sp.stats.linregress(cav_dim_means, deg)
deg_dim

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=cav_dim_means,
        y=deg,
        mode='markers', 
        name='Fields',
        hovertext=topics
    )
)
x = np.linspace(min(cav_dim_means), max(cav_dim_means), 1000)
fig.add_trace(
    go.Scatter(
        x=x, y=deg_dim.slope*x + deg_dim.intercept,
        mode='lines'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    title=f"{deg_dim.slope:.2f}x+{deg_dim.intercept:.2f}\n"+\
          f"r={deg_dim.rvalue:.2f}, p={deg_dim.pvalue:.3f}",
    xaxis={'title': 'mean cavity dimension'},
    yaxis={'title': 'mean degree'}
)
fig.show()

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research',
                        'my papers','wikipedia','results')
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'cavity_dim_vs_deg.pdf'))

In [None]:
clu = [
    np.mean([node_stats.loc[n].clustering for n in networks[t].graph.nodes])
    for t in topics
]
clu_dim = sp.stats.linregress(cav_dim_means, clu)
clu_dim

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=cav_dim_means,
        y=clu,
        mode='markers', 
        name='Fields',
        hovertext=topics
    )
)
x = np.linspace(min(cav_dim_means), max(cav_dim_means), 1000)
fig.add_trace(
    go.Scatter(
        x=x, y=clu_dim.slope*x + clu_dim.intercept,
        mode='lines'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    title=f"{clu_dim.slope:.2f}x+{clu_dim.intercept:.2f}\n"+\
          f"r={clu_dim.rvalue:.2f}, p={clu_dim.pvalue:.3f}",
    xaxis={'title': 'mean cavity dimension'},
    yaxis={'title': 'mean clustering'}
)
fig.show()

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research',
                        'my papers','wikipedia','results')
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'cavity_dim_vs_clustering.pdf'))

In [None]:
node = 'Water'
[big_net.graph.edges[node,t]['weight'] for t in big_net.graph.successors(node)][:3]

In [None]:
weighted_outdegree = pd.DataFrame()
for node in big_net.graph.nodes:
    weighted_outdegree = pd.concat(
        [
            weighted_outdegree, 
            pd.DataFrame(
                [[
                    node,
                    [big_net.graph.edges[node,t]['weight'] 
                     for t in big_net.graph.successors(node)]
                ]],
                columns=['node', 'weighted outdegree']
            )
        ],
        ignore_index=True
    )
weighted_outdegree = weighted_outdegree.set_index('node')

In [None]:
weighted_outdegree

In [None]:
t = topics[0]
[
    w 
    for n in networks[t].graph.nodes
    for w in weighted_outdegree.loc[n]['weighted outdegree']
][:3]

In [None]:
wout = [
    np.mean(
        [
            w
            for n in networks[t].graph.nodes
            for w in weighted_outdegree.loc[n]['weighted outdegree']
        ]
    )
    for t in topics
]
weighted_outdeg_dim = sp.stats.linregress(cav_dim_means, wout)

In [None]:
weighted_outdeg_dim

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=cav_dim_means,
        y=wout,
        mode='markers', 
        name='Fields',
        hovertext=topics
    )
)
x = np.linspace(min(cav_dim_means), max(cav_dim_means), 1000)
fig.add_trace(
    go.Scatter(
        x=x, y=weighted_outdeg_dim.slope*x + weighted_outdeg_dim.intercept,
        mode='lines'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    title=f"{weighted_outdeg_dim.slope:.2f}x+{weighted_outdeg_dim.intercept:.2f}\n"+\
          f"r={weighted_outdeg_dim.rvalue:.2f}, p={weighted_outdeg_dim.pvalue:.2f}",
    xaxis={'title': 'mean cavity dimension'},
    yaxis={'title': 'mean weighted outdegree'}
)
fig.show()

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research',
                        'my papers','wikipedia','results')
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'cavity_dim_vs_wout.pdf'))

# Community summarization

In [None]:
wiki.Net.assign_communities(big_net.graph)

In [None]:
big_net.graph.graph

In [None]:
path_saved = os.path.join('/','Users','harangju','Developer',
                          'data','wiki','graphs','dated')

In [None]:
big_net.save_graph(os.path.join(path_saved,'big_network.pickle'))

In [None]:
big_net_sum = wiki.Net()
communities = set(nx.get_node_attributes(big_net.graph, 'community').values())
communities
for community_a in communities:
    for community_b in communities:
        if community_a==community_b:
            continue
        nodes_a = []
        nodes_b = []
        for n in big_net.graph.nodes:
            community_n = big_net.graph.nodes[n]['community']
            if community_n==community_a:
                nodes_a.append(n)
            elif community_n==community_b:
                nodes_b.append(n)
        weight_a_b = 0
        weight_b_a = 0
        for node_a in nodes_a:
            for node_b in nodes_b:
                if node_b in big_net.graph[node_a]:
                    weight_a_b += big_net.graph[node_a][node_b]['weight']
                if node_a in big_net.graph[node_b]:
                    weight_b_a += big_net.graph[node_b][node_a]['weight']
        print((community_a, community_b),
              (weight_a_b, weight_b_a))
        big_net_sum.graph.add_edge(community_a, community_b,
                                   weight=weight_a_b)
        big_net_sum.graph.add_edge(community_b, community_a,
                                   weight=weight_b_a)

In [None]:
big_net_sum.save_graph(os.path.join(path_saved,
                                    'big_network_summary.pickle'))
big_net_sum.save_graph(os.path.join(path_saved,
                                    'big_network_summary.gexf'))

# Topic summarization

In [None]:
'Commercial law' in networks['law'].graph,\
'Null' in networks['law'].graph

In [None]:
np.sum([1,2,3])

In [None]:
big_net_topics = wiki.Net()
for i, topic_a in enumerate(topics):
    print(i, len(topics), topic_a)
    nodes_a = [n for n in big_net.graph.nodes
               if n in networks[topic_a].graph]
    for topic_b in topics:
        if topic_a==topic_b:
            continue
        nodes_b = [n for n in big_net.graph.nodes
                   if n in networks[topic_b].graph]
        weight_a_b = []
        weight_b_a = []
        for node_a in nodes_a:
            for node_b in nodes_b:
                if node_b in big_net.graph[node_a]:
                    weight_a_b += [big_net.graph[node_a][node_b]['weight']]
                if node_a in big_net.graph[node_b]:
                    weight_b_a += [big_net.graph[node_b][node_a]['weight']]
        big_net_topics.graph.add_edge(topic_a, topic_b,
                                      weight_sum=np.sum(weight_a_b),
                                      weight_mean=np.mean(weight_a_b))
        big_net_topics.graph.add_edge(topic_b, topic_a,
                                      weight_sum=np.sum(weight_b_a),
                                      weight_mean=np.mean(weight_b_a))

In [None]:
import math
for a, b in big_net_topics.graph.edges:
    if math.isnan(big_net_topics.graph.edges[a,b]['weight_mean']):
        big_net_topics.graph.add_edge(a, b, weight_mean=0)

In [None]:
big_net_topics.save_graph(os.path.join(path_saved,
                                       'big_network_topics.pickle'))
big_net_topics.save_graph(os.path.join(path_saved,
                                       'big_network_topics.gexf'))

## Load

In [None]:
big_net_topics = wiki.Net()
big_net_topics.load_graph(os.path.join(path_saved, 'big_network_topics.pickle'))

# Topology

## Topic network

In [None]:
A_topics_mean = nx.adjacency_matrix(big_net_topics.graph, weight='weight_mean').transpose()
val, vec = sp.sparse.linalg.eigs(A_topics_mean)
A_topics_mean = A_topics_mean / (1 + np.max(val))

In [None]:
data = A_topics_mean**1
fig = px.imshow(data.toarray().astype(np.float64),
                x=topics, y=topics)
fig.update_layout(width=950, height=950,
                  coloraxis = {'colorscale':'Greens'},
                  title='Topic network (weights averaged across topics)',
                  xaxis={'side': 'top'})
fig.show()

In [None]:
# path_fig = os.path.join('/','Users','harangju','Box Sync','Research','my papers','wikipedia','results')
# path_plot = '4 fields'
# fig.write_image(os.path.join(path_fig, path_plot, 'communicability_avg_across_topics.pdf'))

## Topic^T

In [None]:
T = 10
data = A_topics_mean**T
fig = px.imshow(data.toarray().astype(np.float64),
                x=topics, y=topics)
fig.update_layout(width=950, height=950,
                  coloraxis = {'colorscale':'Greens'},
                  title=f"Topic network ^{T}",
                  xaxis={'side': 'top'})
fig.show()

In [None]:
path_fig = os.path.join('/','Users','harangju','Box Sync','Research','my papers','wikipedia','results')
path_plot = '4 fields'
fig.write_image(os.path.join(path_fig, path_plot, 'topic_network_power_10.pdf'))

## Flow

In [None]:
flow = pd.DataFrame()
for t in range(1,T):
    data = (A_topics_mean**t).toarray().astype(np.float)
    flow = pd.concat([flow, pd.DataFrame([np.sum(data, axis=1) - np.sum(data, axis=0)],
                                         columns=topics)],
                     ignore_index=True)
flow = flow.transpose().sort_values(0)
flow

In [None]:
fig = go.Figure()
for t in flow.index:
    fig.add_trace(go.Scatter(x=flow.columns,
                             y=flow.loc[t].values,
                             mode='lines',
                             name=t))
fig.update_layout(template='plotly_white')
fig.show()

## Cavity vs topic controllability

In [None]:
x = dim_stats[dim_stats['type']=='real']\
    .sort_values(['mean'])\
    .reset_index()\
    .drop(['type','index'], axis=1)\
    .set_index('topic')
x = np.array([x.loc[t]['mean'] for t in topics])

In [None]:
CG, OG = gramians(A_topics_mean, 10)
y = CG.diagonal().astype(np.float)

In [None]:
pd.options.display.max_rows = len(topics)
pd.DataFrame({'topic': topics, 'controllability (t=10)': y})\
    .sort_values('controllability (t=10)')\
    .reset_index().drop('index', axis=1)

In [None]:
x_y = sp.stats.linregress(x, y)

In [None]:
x_y

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y,
                         mode='markers', 
                         name='Fields',
                         hovertext=topics))
fig.add_trace(go.Scatter(x=np.linspace(min(x), max(x), num=100),
                         y=np.linspace(min(x), max(x), num=100) * x_y[0] + x_y[1],
                         mode='lines',
                         name=f"R^2={x_y[2]:.2f}, p={x_y[3]:.2f}"))
fig.update_layout(template='plotly_white',
                  yaxis={'title': 'Topic controllability'},
                  xaxis={'title': 'Mean cavity dimension'})
fig.show()

In [None]:
# path_fig = os.path.join('/','Users','harangju','Box Sync','Research','my papers','wikipedia','results')
# path_plot = '4 fields'
# fig.write_image(os.path.join(path_fig, path_plot, 'cavity_dim_vs_topic_ctrb.pdf'))

# Communicability

In [None]:
communicability_topics_mean = sp.sparse.linalg.expm(A_topics_mean).toarray().astype(np.float64)
communicability_topics_mean

In [None]:
import plotly.express as px

In [None]:
heat = communicability_topics_mean - \
    np.multiply(communicability_topics_mean, np.eye(communicability_topics_mean.shape[0]))

fig = px.imshow(heat, x=topics, y=topics)
fig.update_layout(width=950, height=950,
                  coloraxis = {'colorscale':'Greens'},
                  title='Communicability (weights averaged across topics)',
                  xaxis={'side': 'top'})
fig.show()

In [None]:
# path_fig = os.path.join('/','Users','harangju','Box Sync','Research','my papers','wikipedia','results')
# path_plot = '4 fields'
# fig.write_image(os.path.join(path_fig, path_plot, 'communicability_avg_across_topics.pdf'))

In [None]:
A_topics_sum = nx.adjacency_matrix(big_net_topics.graph, weight='weight_sum').transpose()
val, vec = sp.sparse.linalg.eigs(A_topics_sum)
A_topics_sum = A_topics_sum / (1 + val[0])

In [None]:
communicability_topics_sum = sp.sparse.linalg.expm(A_topics_sum).toarray().astype(np.float64)
communicability_topics_sum

In [None]:
import plotly.express as px

fig = px.imshow(communicability_topics_sum - \
                np.multiply(communicability_topics_sum, np.eye(communicability_topics_sum.shape[0])),
                x=topics, y=topics)
fig.update_layout(width=1000, height=1000,
                  coloraxis = {'colorscale':'Teal'},
                  title='Communicability (weights summed across topics)',
                  xaxis={'side': 'top'})
fig.show()

In [None]:
def communicability_by_topics(A, networks):
    for topic_1, network_1 in networks.items():
        for topic_2, network_2 in networks.items():
            if topic_1==topic_2:
                pass
            else:
                pass
    return sp.sparse.linalg.expm(A)

In [None]:
eigval, eigvec = sp.sparse.linalg.eigs(A.transpose())
# G = 

In [None]:
eigvec*eigvec.transpose()

In [None]:
comm

In [None]:
A.shape[0]**2

# Awards

* [Nobel prize](https://en.wikipedia.org/wiki/List_of_Nobel_laureates)
* [Fields medal](https://en.wikipedia.org/wiki/Fields_Medal)
* [Turing award](https://en.wikipedia.org/wiki/Turing_Award)
* [National Medal of Science](https://en.wikipedia.org/wiki/List_of_National_Medal_of_Science_laureates)
    * No because only American

In [None]:
path_base = '/Users/harangju/Developer/data/wiki/dumps/'
name_xml = 'enwiki-20190801-pages-articles-multistream.xml.bz2'
name_index = 'enwiki-20190801-pages-articles-multistream-index.txt.bz2'
path_xml = path_base + name_xml
path_index = path_base + name_index
dump = wiki.Dump(path_xml, path_index)

## Nobel prizes

* [Physics](https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Physics)
* [Chemistry](https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Chemistry)
* [Physiology or Medicine](https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Physiology_or_Medicine)

In [None]:
dump.load_page('List of Nobel laureates in Physics');
laureate_section = [s for s in dump.page.get_sections() if 'Laureates' in s[:100]][0]
laureate_section[:1000]

In [None]:
physics_links = [
    link.title
    for link in laureate_section.filter_wikilinks() if 'px' not in link
]
physics_links[:10]

In [None]:
dump.load_page('Kip Thorne')
dump.page.filter_templates('Infobox')[0][:300]

In [None]:
dump.load_page('Radiation')[:300]

In [None]:
dump.load_page('List of Nobel laureates in Chemistry');
laureate_section = [s for s in dump.page.get_sections() if 'Laureates' in s[:100]][0]
laureate_section[:1000]

In [None]:
chemistry_links = [
    link.title
    for link in laureate_section.filter_wikilinks() if 'px' not in link
]
chemistry_links[:10]

In [None]:
dump.load_page('List of Nobel laureates in Physiology or Medicine');
laureate_section = [s for s in dump.page.get_sections() if 'Laureates' in s[:100]][0]
laureate_section[:1000]

In [None]:
medicine_links = [
    link.title
    for link in laureate_section.filter_wikilinks() if 'px' not in link
]
medicine_links[:10]

In [None]:
nobel_links = physics_links + chemistry_links + medicine_links

## Fields medal

## Turing award

## Big net

In [None]:
len(big_net.graph.nodes), list(big_net.graph.nodes)[:5]

In [None]:
nobel_nodes = [
    node
    for node in big_net.graph.nodes
    if node.upper() in (link.upper() for link in nobel_links)
]

In [None]:
nobel_nodes[:5]

In [None]:
len(nobel_nodes), len(nobel_links)

In [None]:
non_nobel_nodes = list(set(big_net.graph.nodes) - set(nobel_nodes))

## Impulse response

In [None]:
grams

In [None]:
px.colors.qualitative.Plotly[:3]

In [None]:
nobel_grams = grams.loc[nobel_nodes]['CG_5'].values.astype(np.float32)
non_nobel_grams = grams.loc[
    set(big_net.graph.nodes)-set(nobel_nodes)
]['CG_5'].values.astype(np.float32)

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Box(
        x=nobel_grams,
#         boxpoints='all', jitter=0.3,
        marker_color=px.colors.qualitative.Plotly[0],
        name='Nobel prize-winning nodes'
    )
)
fig.add_trace(
    go.Box(
        x=non_nobel_grams,
        marker_color=px.colors.qualitative.Plotly[0],
        name='non-Nobel prize-winning nodes'
    )
)
fig.update_layout(
    height=300,
    template='plotly_white',
    yaxis={'title': ''},
    xaxis={'title': 'impulse response'},
    showlegend=False
)
fig.show()

In [None]:
np.var(nobel_grams), np.var(non_nobel_grams)

In [None]:
sp.stats.ttest_ind(nobel_grams, non_nobel_grams)

In [None]:
sp.stats.ttest_ind(nobel_grams, non_nobel_grams, equal_var=False)

In [None]:
sp.stats.mannwhitneyu(
    nobel_grams, non_nobel_grams,
    use_continuity=True, alternative='two-sided'
)

In [None]:
ks = sp.stats.ks_2samp(
    nobel_grams, non_nobel_grams, alternative='two-sided'
)
ks

In [None]:
ir = np.concatenate((nobel_grams, non_nobel_grams))
ir_range = np.arange(np.min(ir), np.max(ir), 0.01)
cum_freq_nobel = np.zeros(ir_range.size)
cum_freq_non_nobel = np.zeros(ir_range.size)
for i, ir in enumerate(ir_range):
    cum_freq_nobel[i] = np.sum(nobel_grams<ir) / nobel_grams.size
    cum_freq_non_nobel[i] = np.sum(non_nobel_grams<ir) / non_nobel_grams.size

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=ir_range,
        y=cum_freq_nobel,
        name='Nobel'
    )
)
fig.add_trace(
    go.Scatter(
        x=ir_range,
        y=cum_freq_non_nobel,
        name='non-Nobel'
    )
)
fig.update_layout(
    width=400, height=400,
    template='plotly_white',
    yaxis={'title': 'cumulative frequency'},
    xaxis={'title': 'impulse response',
           'type': 'log'},
    legend={'x': .5, 'y':.8}
)
fig.add_annotation(
    x=.2, y=.5, text='IR higher here', showarrow=False
)
fig.add_annotation(
    x=.2, y=.4, text=f"KS={ks.statistic:.2f}, p={ks.pvalue:.1e}", showarrow=False
)
fig.show()
fig.write_image(os.path.join(path_fig, path_plot, 'ir_cum_freq.pdf'))

## Cavity

In [None]:
barcodes = pd.concat(
    [
        network.barcodes.assign(topic=topic)\
            .assign(type='real')\
        .assign(null=0)
        for topic, network in networks.items()
    ],
    ignore_index=True,
    sort=False
)
barcodes = barcodes[barcodes.lifetime!=0]
barcodes = barcodes.reset_index().drop('index', axis=1)

In [None]:
barcodes

In [None]:
keys = ['birth simplex', 'death simplex', 'birth nodes', 'death nodes', 'homology nodes']

In [None]:
barcodes.index

In [None]:
set(barcodes.index) - set(range(len(barcodes.index)))

### Compute

In [None]:
cavity_participation = pd.DataFrame(columns=keys)
for node in big_net.graph.nodes:
    cavity_participation.loc[node] = 0
cavity_participation

In [None]:
cavities = {key: [] for key in keys}
for i, row in barcodes.iterrows():
    for key in keys:
        cavities[key] += row[key]

In [None]:
cavity_counts = {
    key: {
        node: cavities[key].count(node)
        for node in set(cavities[key])
    }
    for key in keys
}

In [None]:
cavity_participation = pd.DataFrame(cavity_counts).fillna(0)
cavity_participation

In [None]:
for node in big_net.graph.nodes:
    if node not in cavity_participation.index:
        cavity_participation.loc[node] = 0
cavity_participation

In [None]:
len(big_net.graph.nodes)

### Save

In [None]:
path_analysis = os.path.join(
    '/','Users','harangju','Developer','data','wiki','analysis'
)

In [None]:
pickle.dump(
    cavity_participation,
    open(os.path.join(path_analysis, 'cavity_participation.pickle'), 'wb')
)

### Load

In [None]:
path_analysis = os.path.join(
    '/','Users','harangju','Developer','data','wiki','analysis'
)

In [None]:
import pickle
cavity_participation = pickle.load(
    open(os.path.join(path_analysis, 'cavity_participation.pickle'), 'rb')
)

### Plot

In [None]:
path_plot = '4 fields'

In [None]:
keys = [
    'birth simplex', 'death simplex', 'birth nodes', 'death nodes', 'homology nodes'
]
for key in keys:
    fig = go.Figure()
    nobel = cavity_participation.loc[nobel_nodes][key].values
    non_nobel = cavity_participation.loc[non_nobel_nodes][key].values
    fig.add_trace(go.Box(x=nobel, name='Nobel'))
    fig.add_trace(go.Box(x=non_nobel, name='non-Nobel'))
    fig.update_layout(
        width=600, height=300,
        template='plotly_white',
        xaxis={'title': f"participation in {key}"},
        yaxis={'title': ''}
    )
    ks = sp.stats.ks_2samp(nobel, non_nobel, alternative='two-sided')
    fig.add_annotation(
        x=0, y=-1, text=f"KS={ks.statistic:.2f}, p={ks.pvalue:.1e}", showarrow=False
    )
    fig.show()
    fig.write_image(os.path.join(path_fig, path_plot, f"participation_{key}.pdf"))

In [None]:
np.unique(xs)

In [None]:
np.log(10)

In [None]:
for key in keys:
    fig = go.Figure()
    nobel = cavity_participation.loc[nobel_nodes][key].values
    non_nobel = cavity_participation.loc[non_nobel_nodes][key].values
    xs = np.concatenate([nobel, non_nobel])
    x_range = np.arange(np.min(xs), np.max(xs), (np.max(xs)-np.min(xs))/100.)
    cum_freq_nobel = np.zeros(x_range.size)
    cum_freq_non_nobel = np.zeros(x_range.size)
    for i, x in enumerate(x_range):
        cum_freq_nobel[i] = np.sum(nobel<x) / nobel.size
        cum_freq_non_nobel[i] = np.sum(non_nobel<x) / non_nobel.size
    fig.add_trace(
        go.Scatter(
            x=x_range,
            y=cum_freq_nobel,
            name='Nobel'
        )
    )
    fig.add_trace(
        go.Scatter(
            x=x_range,
            y=cum_freq_non_nobel,
            name='non-Nobel'
        )
    )
    fig.update_layout(
        width=400, height=400,
        template='plotly_white',
        yaxis={'title': 'cumulative frequency'},
        xaxis={'title': key},#, 'type': 'log'},
        legend={'x': .5, 'y':.8}
    )
    fig.add_annotation(
        x=(x_range[-1]-x_range[0])/2., y=.5, text='higher here', showarrow=False
    )
    ks = sp.stats.ks_2samp(
        nobel, non_nobel, alternative='two-sided'
    )
    fig.add_annotation(
        x=(x_range[-1]-x_range[0])/2., y=.4, xref='x', yref='y',
        text=f"KS={ks.statistic:.2f}, p={ks.pvalue:.1e}", showarrow=False
    )
#     fig.update_xaxes(range=[0, np.log(np.max(xs))/np.log(10)])
    fig.show()
    fig.write_image(os.path.join(path_fig, path_plot, f"participation_cdf_{key}.pdf"))