In [1]:
import numpy as np
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm,trange
import warnings
import logging
import scipy.sparse
import altair as alt
import json
import os
alt.data_transformers.disable_max_rows()
warnings.filterwarnings("ignore") 
logging.getLogger('matplotlib.font_manager').disabled = True

In [2]:
def createZoomableBarChart(df, X, Y, selection, title = '',zoom_encodings = ['x'], top = 100, figsize = (500, 200)):
    '''
    create an interactive bar chart
    
    
    '''
    
    brush = alt.selection(type="interval", encodings=zoom_encodings)

    base = alt.Chart(df, title="Select a range in the base view below").mark_bar().encode(x=alt.X( X+':O', sort='-y'),
        y=Y+':Q'
    ).add_selection(
        brush
    ).transform_filter(
        selection
    ).properties(
        width=figsize[0],
        height=figsize[1]*0.1
    )

    zoomed = alt.Chart(df, title=title).mark_bar().add_selection(
        selection
    ).transform_filter(
        selection
    ).transform_filter(
        brush
    ).encode(x=alt.X(X+':O', sort='-y'),
        y=Y+':Q',
        tooltip=[X, Y]
    ).properties(
        width=figsize[0],
        height=figsize[1]*0.9
    )

    if top:
        base.transform_window(
            rank='rank('+Y+')',
            sort=[alt.SortField(Y, order='descending')]
        ).transform_filter(
            (alt.datum.rank < top)
        )
        zoomed.transform_window(
            rank='rank('+Y+')',
            sort=[alt.SortField(Y, order='descending')]
        ).transform_filter(
            (alt.datum.rank < top)
        )

    chart = zoomed & base

    return chart


In [3]:
def build_centrality_dfs(centrality_folder, cra_comm_idx_list, government_idx_list, num_period, by = 'month'):
    df_cra_comm = pd.DataFrame()
    df_gov = pd.DataFrame()
    df_other = pd.DataFrame()
    
    ALPHA = [1]
    centrality_names = ['betweenness', 'closeness', 'degree']
    for k in range(num_period):
        for centrality_name in centrality_names:
            for alpha in ALPHA:
                df_temp = pd.DataFrame()
                df_temp['name'] = unique_people
                infile = centrality_folder+by+'_'+str(k)+'_alpha_'+str(alpha)+'_'+centrality_name+'.npz'
                if os.path.exists(infile):
                    sparse_matrix = scipy.sparse.load_npz(infile)
                    dense = np.asarray(sparse_matrix.todense()).reshape(-1)
                else:
                    dense = np.zeros(len(unique_people))
                df_temp['value'] = dense
                df_temp['centrality_name'] = centrality_name
                df_temp['period'] = k
                df_temp['alpha'] = alpha
                df_cra_temp = df_temp.iloc[cra_comm_idx_list]
                df_cra_comm = df_cra_comm.append(df_cra_temp.nlargest(100, ['value']))
                df_gov_temp = df_temp.iloc[government_idx_list]
                df_gov = df_gov.append(df_gov_temp.nlargest(100, ['value']))
                df_other_temp = df_temp.iloc[~df_temp.index.isin(cra_comm_idx_list)&~df_temp.index.isin(government_idx_list)]
                df_other = df_other.append(df_other_temp.nlargest(100, ['value']))
    return df_cra_comm, df_gov, df_other

# load all necessary list

In [4]:
with open('unique_people.txt', 'r', encoding = 'utf-8') as f:
    unique_people = f.read().splitlines()

In [5]:
with open('cra_idx_list.txt', encoding = 'utf-8') as f:
    cra_idx_list = f.read().splitlines()
with open('commissioner_idx_list.txt', encoding = 'utf-8') as f:
    commissioner_idx_list = f.read().splitlines()
cra_comm_idx_list = cra_idx_list + commissioner_idx_list

In [6]:
with open('government_idx_list.txt', encoding = 'utf-8') as f:
    government_idx_list = f.read().splitlines()

In [7]:
centrality_folder = './centrality_weighted_05072022/'
num_period = 4
by = 'stage'
df_cra_comm, df_gov, df_other = build_centrality_dfs(centrality_folder, cra_comm_idx_list, government_idx_list, num_period, by = by)

In [8]:
# create a reusable selection object, use in three chart
centrality_options = df_cra_comm["centrality_name"].unique().tolist()
dropdown = alt.binding_select(options=centrality_options)
slider1 = alt.binding_range(min=0, max=max(df_cra_comm["period"]), step=1)

selection = alt.selection_single(
    fields=['centrality_name', "period"],
    bind={'centrality_name': dropdown, 'period': slider1},
    name="Select?",
    init={"centrality_name": "betweenness", "period": 0}
)

In [9]:
cra_comm_chart = createZoomableBarChart(df_cra_comm, X='name',Y='value', selection=selection, title = 'Top 100 CRA&Comm nodes with highest centrality')
gov_chart = createZoomableBarChart(df_gov, X='name',Y='value', selection=selection, title = 'Top 100 Gov nodes with highest centrality')
others_chart = createZoomableBarChart(df_other, X='name',Y='value', selection=selection, title = 'Top 100 others nodes with highest centrality ')

In [10]:
cra_comm_chart.save('cra_comm_centrality_by_stage.html')
gov_chart.save('gov_centrality_by_stage.html')
others_chart.save('others_centrality_by_stage.html')

In [11]:
centrality_folder = './centrality_weighted_05072022/'
num_period = 110
by = 'month'
df_cra_comm, df_gov, df_other = build_centrality_dfs(centrality_folder, cra_comm_idx_list, government_idx_list, num_period, by = by)

In [12]:
# create a reusable selection object, use in three chart
centrality_options = df_cra_comm["centrality_name"].unique().tolist()
dropdown = alt.binding_select(options=centrality_options)
slider1 = alt.binding_range(min=0, max=max(df_cra_comm["period"]), step=1)

selection = alt.selection_single(
    fields=['centrality_name', "period"],
    bind={'centrality_name': dropdown, 'period': slider1},
    name="Select?",
    init={"centrality_name": "betweenness", "period": 0}
)

In [13]:
cra_comm_chart = createZoomableBarChart(df_cra_comm, X='name',Y='value', selection=selection, title = 'Top 100 CRA&Comm nodes with highest centrality')
gov_chart = createZoomableBarChart(df_gov, X='name',Y='value', selection=selection, title = 'Top 100 Gov nodes with highest centrality')
others_chart = createZoomableBarChart(df_other, X='name',Y='value', selection=selection, title = 'Top 100 others nodes with highest centrality ')

In [14]:
cra_comm_chart.save('cra_comm_centrality_by_month.html')
gov_chart.save('gov_centrality_by_month.html')
others_chart.save('others_centrality_by_month.html')