In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import pickle
import networkx as nx
from matplotlib import cm
import matplotlib.pyplot as plt
import ast


In [61]:
# Load metadata for plot into pandas df - assume can load into RAM
meta_df = pd.read_csv("../tests/data/scopus/col_au_name_lookup")


In [None]:

with open("../results/scopus_Z.pkl", "rb") as f:
    Z = pickle.load(f)
with open("../tests/data/scopus/col_A.pkl", "rb") as f:
    A = pickle.load(f)

G = nx.from_numpy_array(A[-1])
pos = nx.spring_layout(G)
fig, ax = plt.subplots(dpi=300)
node_size = 5
nx.draw(G, pos, edge_color="k", node_size=node_size, width=0.2, ax=ax)
cmap = cm.get_cmap("tab20b")
# For each community list, draw the nodes, giving it a specific color.
labels = {}
for block in np.unique(Z[:, -1]):
    color = [cmap(block)]
    tmp_nodes = np.nonzero(Z[:, -1] == block)[0]
    labels.update({node: block for node in tmp_nodes})
    nx.draw_networkx_nodes(
        G, pos, nodelist=tmp_nodes, node_size=node_size, node_color=color,
    )
show_labels = True
if show_labels:
    nx.draw_networkx_labels(G, pos, labels, font_size=3, font_color="whitesmoke")
fig.savefig("../results/scopus_colAv1.png", bbox_inches="tight")
plt.show()


## Main interactive clustering

In [62]:
meta_df.rename(columns={'index':'node_idx'},inplace=True)

In [63]:
# add block info to df
meta_df = meta_df.set_index("node_idx").sort_index()
meta_df["block"] = Z[:, -1]


In [64]:
net_df = pd.DataFrame(pos).T.rename(columns={0: "x_pos", 1: "y_pos"})
meta_df = meta_df.join(net_df)


In [None]:
fig = go.FigureWidget()
add_edges = True
if add_edges:
    # make path over all edges
    euler_path = nx.eulerian_circuit(nx.eulerize(G))
    # get first point
    path_points = next(euler_path)
    path_points = [[pos[path_points[0]][0]], [pos[path_points[0]][1]]]
    # now rest of path is just target elem of edges in path
    for _, target in euler_path:
        path_points[0].append(pos[target][0])
        path_points[1].append(pos[target][1])
    fig.add_trace(
        go.Scatter(
            x=path_points[0],
            y=path_points[1],
            mode="lines",
            line=dict(color="lightgray", width=0.3),
            showlegend=False,
        )
    )
    # naive way
    # for edge in np.array(np.nonzero(A[-1])).T:
    #     fig.add_trace(
    #         go.Scatter(
    #             x=[pos[edge[0]][0], pos[edge[1]][0]],
    #             y=[pos[edge[0]][1], pos[edge[1]][1]],
    #             hoverinfo=None,
    #             line=dict(color="black", width=0.5),
    #             # marker=None,
    #             mode="lines",
    #             showlegend=False,
    #         )
    #     )
fig.add_trace(
    go.Scatter(
        x=net_df.x_pos,
        y=net_df.y_pos,
        mode="markers",
        text=meta_df["au_name"],
        name="Authors",
        marker=dict(color=meta_df["block"]),
    )
)
fig.update_xaxes(showgrid=False, zeroline=False, showticklabels=False)
fig.update_yaxes(showgrid=False, zeroline=False, showticklabels=False)
fig.update_layout(plot_bgcolor="#ffffff")
# # Add net plot as image
# fig.add_layout_image(
#         dict(
#             source="../results/scopus_colAv1.png",
#             # xref="x_pos",
#             # yref="y_pos",
#             x=0,
#             y=1,
#             sizex=1,
#             sizey=1,
#             # sizing="stretch",
#             opacity=0.5,
#             layer="below")
# )


In [5]:
scatter = fig.data[-1]
# def fn that inputs opacity and size and updates figure
def set_opacity(opacity, size):
    scatter.marker.opacity = opacity
    scatter.marker.size = size


# and generate control panel for fn
from ipywidgets import interactive

opacity_slider = interactive(set_opacity, opacity=(0.0, 1.0, 0.01), size=(1, 10, 0.25))
# then adjust length of widgets
opacity_slider.children[0].layout.width = "400px"
opacity_slider.children[1].layout.width = "400px"



In [6]:
if type(meta_df["subj_props"].iloc[0]) == str:
    for attrb in ["subj", "ctry"]:
        meta_df[f"{attrb}_props"] = meta_df[f"{attrb}_props"].apply(ast.literal_eval)
        meta_df[f"{attrb}_props"] = meta_df[f"{attrb}_props"].apply(
            lambda x: [(attrb, np.round_(val, 3)) for attrb, val in x]
        )


In [80]:
# create html widget to display hover props
from ipywidgets import HTML

details = HTML()

# create image widget to display image
from ipywidgets import Image, Layout

subj_data = {}
for block in np.unique(Z[:, -1]):
    if block != -1:
        with open(f"../results/scopus_colAv1_subjareas_block{block}.png", "rb") as f:
            b = f.read()
        subj_data[block] = b

ctry_data = {}
for block in np.unique(Z[:, -1]):
    with open(f"../results/scopus_colAv1_ctrys_block{block}.png", "rb") as f:
        b = f.read()
    ctry_data[block] = b

subj_widget = Image(
    value=subj_data[block], layout=Layout(height="252px", width="400px"),
)
ctry_widget = Image(
    value=ctry_data[block], layout=Layout(height="252px", width="400px"),
)

# register callback function for update on hover events
# update image widget along w html widget
def hover_fn(trace, points, state):

    ind = points.point_inds[0]

    # Update details HTML widget
    df_loc = meta_df.iloc[ind]
    cols = ["au_name", "subj_props", "ctry_props"]
    details.value = df_loc[cols].to_frame().to_html()

    # Update image widget
    block = meta_df["block"][ind]
    subj_widget.value = subj_data[block]
    ctry_widget.value = ctry_data[block]


scatter.on_hover(hover_fn)

# create simple dashboard w hbox and vbox containers
from ipywidgets import HBox, VBox

VBox([fig, opacity_slider, HBox([subj_widget, ctry_widget]), details])



VBox(children=(FigureWidget({
    'data': [{'line': {'color': 'lightgray', 'width': 0.3},
              'mode'…

# TODO:  
- Gender
- Top k% cited
- Same but FWCI, and h-index  
- Redo display of metadata to show bars of individual and block side by side in different colours, and nicer vis (seaborn)

## Cluster by degree

In [89]:
# allow explicit clustering by degree
degs = {i: d_i for i, d_i in G.degree}
degs_np = np.array([d if not np.isnan(d) else 0.0 for d in degs.values()])
for q in range(1,16):
    _, bins = pd.qcut(degs_np, q, retbins=True, duplicates="drop")
    meta_df[f"deg_clust_{q}"] = pd.cut(degs_np, np.concatenate([[0], bins]), labels=list(range(len(bins))))



In [92]:
from plotly import colors
# Create figure
fig = go.Figure()

# Add traces, one for each slider step
for step in np.arange(1, 16):
    fig.add_trace(
        go.Scatter(
            visible=False,
            line=dict(color="#00CED1", width=6),
            name="deg_clusts for q = " + str(step),
            x=meta_df['x_pos'],
            y=meta_df['y_pos'], 
            text=meta_df["au_name"]+"<br>Degree: "+degs_np.astype(str),
            marker=dict(color=meta_df[f"deg_clust_{step}"]),
            marker_colorscale=colors.diverging.Portland,
            mode='markers'
        )
    )

# Make 10th trace visible
fig.data[10].visible = True

# Create and add slider
steps = []
for i in range(len(fig.data)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": f"Deg clusts for {i+2} groups"}],  # layout attribute
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=10,
    currentvalue={"prefix": "Q: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.show()

In [43]:
net_df = pd.DataFrame(pos).T.rename(columns={0: "x_pos", 1: "y_pos"})
fig = go.FigureWidget()
add_edges = True
if add_edges:
    # make path over all edges
    euler_path = nx.eulerian_circuit(nx.eulerize(G))
    # get first point
    path_points = next(euler_path)
    path_points = [[pos[path_points[0]][0]], [pos[path_points[0]][1]]]
    # now rest of path is just target elem of edges in path
    for _, target in euler_path:
        path_points[0].append(pos[target][0])
        path_points[1].append(pos[target][1])
    fig.add_trace(
        go.Scatter(
            x=path_points[0],
            y=path_points[1],
            mode="lines",
            line=dict(color="lightgray", width=0.3),
            showlegend=False,
        )
    )
    # naive way
    # for edge in np.array(np.nonzero(A[-1])).T:
    #     fig.add_trace(
    #         go.Scatter(
    #             x=[pos[edge[0]][0], pos[edge[1]][0]],
    #             y=[pos[edge[0]][1], pos[edge[1]][1]],
    #             hoverinfo=None,
    #             line=dict(color="black", width=0.5),
    #             # marker=None,
    #             mode="lines",
    #             showlegend=False,
    #         )
    #     )
fig.add_trace(
    go.Scatter(
        x=net_df.x_pos,
        y=net_df.y_pos,
        mode="markers",
        text=meta_df["au_name"],
        name="Authors",
        marker=dict(color=meta_df["deg_clust"]),
    )
)
fig.update_xaxes(showgrid=False, zeroline=False, showticklabels=False)
fig.update_yaxes(showgrid=False, zeroline=False, showticklabels=False)
fig.update_layout(plot_bgcolor="#ffffff")
# # Add net plot as image
# fig.add_layout_image(
#         dict(
#             source="../results/scopus_colAv1.png",
#             # xref="x_pos",
#             # yref="y_pos",
#             x=0,
#             y=1,
#             sizex=1,
#             sizey=1,
#             # sizing="stretch",
#             opacity=0.5,
#             layer="below")
# )


FigureWidget({
    'data': [{'line': {'color': 'lightgray', 'width': 0.3},
              'mode': 'lines',
    …

In [19]:
with open("../tests/data/scopus/col_au_net_nodelist.pkl", "rb") as f:
    nodelist = pickle.load(f)

with open("../tests/data/scopus/col_dataset.pkl", "rb") as f:
    full_data = pickle.load(f)


In [56]:
col_name_age_lookup = pd.concat(
    [
        full_data.groupby(by=["auid"], as_index=False).agg(
            {"au_name": "first", "career_age": "last",}
        ),
        full_data.groupby(by=["auid_other"], as_index=False)
        .agg({"au_name_other": "first", "career_age_other": "last",})
        .rename(
            columns={
                "auid_other": "auid",
                "au_name_other": "au_name",
                "career_age_other": "career_age",
            }
        ),
    ],
    axis=0,
).drop_duplicates()


In [68]:
fin_lookup = (
    col_name_age_lookup.set_index("auid")
    .loc[pd.Index(nodelist)]
    .join(pd.Series(nodelist, name="auid").reset_index().set_index("auid")).rename(columns={'index':'node_idx'})
)


In [72]:
meta_df = meta_df.join(
    fin_lookup
    .drop_duplicates(subset='node_idx')
    .set_index("node_idx")
    .career_age
)



## Cluster by career age

In [84]:
from plotly import colors
# allow explicit clustering by degree
for q in range(1,16):
    _, bins = pd.qcut(meta_df.career_age, q, retbins=True, duplicates="drop")
    meta_df[f"age_clust_{q}"] = pd.cut(meta_df.career_age, np.concatenate([[-1], bins]), labels=list(range(len(bins))))

# Create figure
fig = go.Figure()

# Add traces, one for each slider step
for step in np.arange(1, 16):
    fig.add_trace(
        go.Scatter(
            visible=False,
            line=dict(color="#00CED1", width=6),
            name="age_clusts for q = " + str(step),
            x=meta_df['x_pos'],
            y=meta_df['y_pos'], 
            text=meta_df["au_name"]+",<br>CA: "+meta_df['career_age'].astype(str),
            marker=dict(color=meta_df[f"age_clust_{step}"]),
            marker_colorscale=colors.diverging.Portland,
            mode='markers'
        )
    )

# Make 10th trace visible
fig.data[10].visible = True

# Create and add slider
steps = []
for i in range(len(fig.data)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": f"Age clusts for {i+2} groups"}],  # layout attribute
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=10,
    currentvalue={"prefix": "Q: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.show()

In [73]:
_, bins = pd.qcut(meta_df.career_age, 9, retbins=True, duplicates="drop")
age_clusts = pd.cut(
    meta_df.career_age, np.concatenate([[-1], bins]), labels=list(range(len(bins)))
)



In [76]:
meta_df["age_clust"] = age_clusts



In [77]:
net_df = pd.DataFrame(pos).T.rename(columns={0: "x_pos", 1: "y_pos"})
fig = go.FigureWidget()
add_edges = True
if add_edges:
    # make path over all edges
    euler_path = nx.eulerian_circuit(nx.eulerize(G))
    # get first point
    path_points = next(euler_path)
    path_points = [[pos[path_points[0]][0]], [pos[path_points[0]][1]]]
    # now rest of path is just target elem of edges in path
    for _, target in euler_path:
        path_points[0].append(pos[target][0])
        path_points[1].append(pos[target][1])
    fig.add_trace(
        go.Scatter(
            x=path_points[0],
            y=path_points[1],
            mode="lines",
            line=dict(color="lightgray", width=0.3),
            showlegend=False,
        )
    )
    # naive way
    # for edge in np.array(np.nonzero(A[-1])).T:
    #     fig.add_trace(
    #         go.Scatter(
    #             x=[pos[edge[0]][0], pos[edge[1]][0]],
    #             y=[pos[edge[0]][1], pos[edge[1]][1]],
    #             hoverinfo=None,
    #             line=dict(color="black", width=0.5),
    #             # marker=None,
    #             mode="lines",
    #             showlegend=False,
    #         )
    #     )
fig.add_trace(
    go.Scatter(
        x=net_df.x_pos,
        y=net_df.y_pos,
        mode="markers",
        text=meta_df["au_name"],
        name="Authors",
        marker=dict(color=meta_df["age_clust"]),
    )
)
fig.update_xaxes(showgrid=False, zeroline=False, showticklabels=False)
fig.update_yaxes(showgrid=False, zeroline=False, showticklabels=False)
fig.update_layout(plot_bgcolor="#ffffff")
# # Add net plot as image
# fig.add_layout_image(
#         dict(
#             source="../results/scopus_colAv1.png",
#             # xref="x_pos",
#             # yref="y_pos",
#             x=0,
#             y=1,
#             sizex=1,
#             sizey=1,
#             # sizing="stretch",
#             opacity=0.5,
#             layer="below")
# )


FigureWidget({
    'data': [{'line': {'color': 'lightgray', 'width': 0.3},
              'mode': 'lines',
    …