# Unipartite Network Comparisons

This notebook explores how we can also represent s&Co data as a unipartite network, and assesses how much information is lost between unipartite and bipartite, as well as how much our exceptional metadata matters downstream.

#### Load Libraries and Initial Data

In [1]:

import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("..")
from network_analysis.load_datasets import get_updated_shxco_data
from network_analysis.generate_network_metrics import *
from network_analysis.create_networks import *
from network_analysis.read_write_networks import *
from network_analysis.visualize_networks import * 


#### Baseline datasets

In [2]:
members_df, books_df, borrow_events, events_df = get_updated_shxco_data(get_subscription=False)

In [3]:
unknown_borrows = borrow_events[borrow_events.start_datetime.isna()]

In [4]:
borrow_events = borrow_events[(borrow_events.start_datetime.isna() == False) & (borrow_events.end_datetime.isna() == False)]


In [5]:
all_borrows = borrow_events[borrow_events.start_datetime < '1942-01-01'].copy()

unexceptional_borrows = all_borrows[all_borrows.exceptional_types.isna()]

## If you want all events regardless of types, use this:
# all_borrows = events_df[events_df.item_uri.isna() == False].copy()

# unexceptional_borrows = all_borrows[all_borrows.exceptional_types.isna()]


#### Load Bipartite Deta



In [6]:
member_attrs = {'uri': 'member_id'}
book_attrs = {'uri': 'item_uri'}
edge_attrs = {'weight': 'counts'}
all_borrows_grouped = all_borrows.groupby(['member_id', 'item_uri']).size().reset_index(name='counts')
unexceptional_borrows_grouped = unexceptional_borrows.groupby(['member_id', 'item_uri']).size().reset_index(name='counts')
should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['HITS', 'CoHITS', 'BiRank', 'BGRM']


all_borrows_bipartite_graph, all_borrows_bipartite_nodelist, all_borrows_bipartite_edgelist, all_borrows_members, all_borrows_books = check_reload_build_bipartite_graphs(all_borrows_grouped, member_attrs, book_attrs, edge_attrs, should_process, write_to_file, '/Volumes/SecondDrive/unknown_borrowers/network_analysis/data/borrow_events_bipartite', sk_metrics, link_metrics, members_df, books_df)

unexceptional_borrows_bipartite_graph, unexceptional_borrows_bipartite_nodelist, unexceptional_borrows_bipartite_edgelist, unexceptional_borrows_members, unexceptional_borrows_books = check_reload_build_bipartite_graphs(unexceptional_borrows_grouped, member_attrs, book_attrs, edge_attrs, should_process, write_to_file,'/Volumes/SecondDrive/unknown_borrowers/network_analysis/data/unexceptional_borrow_events_bipartite', sk_metrics, link_metrics, members_df, books_df)


reloading saved graph: /Volumes/SecondDrive/unknown_borrowers/network_analysis/data/borrow_events_bipartite
reloading saved graph: /Volumes/SecondDrive/unknown_borrowers/network_analysis/data/unexceptional_borrow_events_bipartite


### Unipartite Comparisons

#### Comparing Across Entire Time of Sco Library

In [7]:
member_attrs = {'uri': 'member_id'}
book_attrs = {'uri': 'item_uri'}
edge_attrs = {'weight': 'counts'}
node_attrs = {}
should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['pagerank', 'hubs', 'auth']
is_projected = True

projected_members_graph, projected_members_nodelist, projected_members_edgelist, projected_members, projected_books_graph, projected_books_nodelist, projected_books_edgelist, projected_books = check_reload_build_unipartite_graphs(
    all_borrows_grouped, all_borrows, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, '/Volumes/SecondDrive/unknown_borrowers/network_analysis/data/borrow_events_unipartite_projected', sk_metrics, link_metrics, members_df, books_df, is_projected)

unexceptional_projected_members_graph, unexceptional_projected_members_nodelist, unexceptional_projected_members_edgelist, unexceptional_projected_members, unexceptional_projected_books_graph, unexceptional_projected_books_nodelist, unexceptional_projected_books_edgelist, unexceptional_projected_books = check_reload_build_unipartite_graphs(
    unexceptional_borrows_grouped, unexceptional_borrows, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, '/Volumes/SecondDrive/unknown_borrowers/network_analysis/data/unexceptional_borrow_events_unipartite_projected', sk_metrics, link_metrics, members_df, books_df, is_projected)


reloading saved graph: /Volumes/SecondDrive/unknown_borrowers/network_analysis/data/borrow_events_unipartite_projected
reloading saved graph: /Volumes/SecondDrive/unknown_borrowers/network_analysis/data/unexceptional_borrow_events_unipartite_projected


#### Correlations Between Unipartite and Bipartite Graph Metrics

In [8]:
local_cols = [col for col in all_borrows_members.columns.tolist()
              if 'local' in col]
global_cols = [col for col in all_borrows_members.columns.tolist()
               if 'global' in col]
members_bipartite = all_borrows_members[(
    all_borrows_members.columns[~all_borrows_members.columns.isin(global_cols)])]
unexceptional_members_bipartite = unexceptional_borrows_members[(
    unexceptional_borrows_members.columns[~unexceptional_borrows_members.columns.isin(global_cols)])]
members_chart_bipartite = generate_corr_chart(
    members_bipartite[(members_bipartite.component == 0)], 'bipartite metric correlations for members data without disconnected nodes')
unexceptional_members_chart_bipartite = generate_corr_chart(
    unexceptional_members_bipartite[(unexceptional_members_bipartite.component == 0)], 'bipartite metric correlations for unexceptional members data without disconnected nodes')

local_cols = [col for col in projected_members.columns.tolist()
              if 'local' in col]
global_cols = [col for col in projected_members.columns.tolist()
               if 'global' in col]
members_unipartite = projected_members[(
    projected_members.columns[~projected_members.columns.isin(global_cols)])]
unexceptional_members_unipartite = unexceptional_projected_members[(
    unexceptional_projected_members.columns[~unexceptional_projected_members.columns.isin(global_cols)])]
members_chart_unipartite = generate_corr_chart(
    members_unipartite.loc[members_unipartite.component == 0], 'unipartite metric correlations for members data without disconnected nodes')
unexceptional_members_chart_unipartite = generate_corr_chart(
    unexceptional_members_unipartite.loc[unexceptional_members_unipartite.component == 0], 'unipartite metric correlations for unexceptional members data without disconnected nodes')

alt.vconcat(
    alt.hconcat(*[members_chart_bipartite, unexceptional_members_chart_bipartite]),
    alt.hconcat(*[members_chart_unipartite, unexceptional_members_chart_unipartite])
)


In [9]:
df_type = 'members_bipartite'
df_type2 = 'unexceptional_members_bipartite'
melted_all_borrows_members = get_melted_corr(
    members_bipartite, df_type)
melted_unex_borrows_members = get_melted_corr(
    unexceptional_members_bipartite, df_type2)

df_type_subset = 'members_unipartite'
df_type_subset2 = 'unexceptional_members_unipartite'

melted_all_borrows_members_subset = get_melted_corr(
    members_unipartite, df_type_subset)
melted_unex_borrows_members_subset = get_melted_corr(
    unexceptional_members_unipartite, df_type_subset2)

chart = compare_corr_chart(melted_all_borrows_members,melted_unex_borrows_members, df_type, df_type2)

chart1 = compare_corr_chart(melted_all_borrows_members_subset,melted_unex_borrows_members_subset, df_type_subset, df_type_subset2)

chart2 = compare_corr_chart(melted_all_borrows_members, melted_all_borrows_members_subset, df_type, df_type_subset)

chart3 = compare_corr_chart(melted_unex_borrows_members,melted_unex_borrows_members_subset, df_type2, df_type_subset2)

alt.hconcat(*[chart, chart1, chart2, chart3])

In [10]:
local_cols = [col for col in all_borrows_books.columns.tolist()
              if 'local' in col]
global_cols = [col for col in all_borrows_books.columns.tolist()
               if 'global' in col]
books_bipartite = all_borrows_books[(
    all_borrows_books.columns[~all_borrows_books.columns.isin(global_cols)])]
unexceptional_books_bipartite = unexceptional_borrows_books[(
    unexceptional_borrows_books.columns[~unexceptional_borrows_books.columns.isin(global_cols)])]
books_chart_bipartite = generate_corr_chart(
    books_bipartite[(books_bipartite.component == 0)], 'bipartite metric correlations for books data without disconnected nodes')
unexceptional_books_chart_bipartite = generate_corr_chart(
    unexceptional_books_bipartite[(unexceptional_books_bipartite.component == 0)], 'bipartite metric correlations for unexceptional books data without disconnected nodes')

local_cols = [col for col in projected_books.columns.tolist()
              if 'local' in col]
global_cols = [col for col in projected_books.columns.tolist()
               if 'global' in col]
books_unipartite = projected_books[(
    projected_books.columns[~projected_books.columns.isin(global_cols)])]
unexceptional_books_unipartite = unexceptional_projected_books[(
    unexceptional_projected_books.columns[~unexceptional_projected_books.columns.isin(global_cols)])]
books_chart_unipartite = generate_corr_chart(
    books_unipartite.loc[books_unipartite.component == 0], 'unipartite metric correlations for books data without disconnected nodes')
unexceptional_books_chart_unipartite = generate_corr_chart(
    unexceptional_books_unipartite.loc[unexceptional_books_unipartite.component == 0], 'unipartite metric correlations for unexceptional books data without disconnected nodes')

alt.vconcat(
    alt.hconcat(*[books_chart_bipartite, unexceptional_books_chart_bipartite]),
    alt.hconcat(*[books_chart_unipartite, unexceptional_books_chart_unipartite])
)


In [11]:
df_type = 'books_bipartite'
df_type2 = 'unexceptional_books_bipartite'
melted_all_borrows_books = get_melted_corr(
    books_bipartite, df_type)
melted_unex_borrows_books = get_melted_corr(
    unexceptional_books_bipartite, df_type2)

df_type_subset = 'books_unipartite'
df_type_subset2 = 'unexceptional_books_unipartite'

melted_all_borrows_books_subset = get_melted_corr(
    books_unipartite, df_type_subset)
melted_unex_borrows_books_subset = get_melted_corr(
    unexceptional_books_unipartite, df_type_subset2)

chart = compare_corr_chart(melted_all_borrows_books,melted_unex_borrows_books, df_type, df_type2)

chart1 = compare_corr_chart(melted_all_borrows_books_subset,melted_unex_borrows_books_subset, df_type_subset, df_type_subset2)

chart2 = compare_corr_chart(melted_all_borrows_books, melted_all_borrows_books_subset, df_type, df_type_subset)

chart3 = compare_corr_chart(melted_unex_borrows_books,melted_unex_borrows_books_subset, df_type2, df_type_subset2)

alt.hconcat(*[chart, chart1, chart2, chart3])

In [12]:
disconnected_items_bipartite = pd.concat([all_borrows_members[all_borrows_members.component != 0][['uri', 'component', 'group', 'borrow_count']], all_borrows_books[all_borrows_books.component != 0][['uri', 'component', 'group', 'borrow_count']]])
disconnected_items_bipartite.rename(
    columns={'component': 'component_bipartite'}, inplace=True)


In [13]:
disconnected_items_unipartite = pd.concat([projected_members[projected_members.component != 0][['uri', 'component', 'group', 'borrow_count']], projected_books[projected_books.component != 0][['uri', 'component', 'group', 'borrow_count']]])
disconnected_items_unipartite.rename(columns={'component': 'component_unipartite'}, inplace=True)
pd.merge(disconnected_items_bipartite, disconnected_items_unipartite, on=['uri', 'group', 'borrow_count'], how='outer').sort_values(by=['component_bipartite', 'group'], ascending=True)


Unnamed: 0,uri,component_bipartite,group,borrow_count,component_unipartite
10,rossetti-pre-raphaelite-diaries,1,books,1.0,3
15,dupre-italien-dangleterre-poe,1,books,1.0,3
20,recollections-dante-gabriel,1,books,1.0,3
2,du-bos,1,members,4.0,3
21,wordsworth-prelude,2,books,1.0,1
0,bruno-jean,2,members,1.0,1
11,seton-lives-hunted-containing,3,books,1.0,2
1,dent,3,members,1.0,2
14,waley-translations-chinese,4,books,1.0,4
3,fitzherbert,4,members,2.0,4


In [14]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np


def scale_col(df, cols):
    for col in cols:
        df[col] = MinMaxScaler().fit_transform(df[col].values.reshape(-1, 1))
    return df


In [15]:
bipartite_cols = [col for col in all_borrows_members.columns if 'local' in col]
bipartite_cols.remove('local_louvain')
unipartite_cols = [col for col in projected_members.columns if 'local' in col]
unipartite_cols.remove('local_louvain')
original_cols = list(set(bipartite_cols).intersection(set(unipartite_cols)))
bipartite_members =all_borrows_members[all_borrows_members.component == 0][original_cols + ['uri']]
bipartite_members.columns = bipartite_members.columns.str.replace('local_', 'bipartite_')
bipartite_cols = [col for col in bipartite_members.columns if 'bipartite' in col]
# bipartite_members = scale_col(bipartite_members, bipartite_cols)
bipartite_members = pd.melt(
    bipartite_members, id_vars='uri', value_vars=bipartite_cols, var_name='bipartite_metric', value_name='bipartite_value')
unipartite_members =projected_members[projected_members.component == 0][original_cols + ['uri']]
unipartite_members.columns = unipartite_members.columns.str.replace('local_', 'unipartite_')
unipartite_cols = [
    col for col in unipartite_members.columns if 'unipartite' in col]
# unipartite_members = scale_col(unipartite_members, unipartite_cols)
unipartite_members = pd.melt(unipartite_members, id_vars='uri', value_vars=unipartite_cols, var_name='unipartite_metric', value_name='unipartite_value')
comparison_df = pd.merge(bipartite_members, unipartite_members, on=['uri'], how='outer')
cols = comparison_df.columns.tolist()
cols.remove('uri')


subset_df = comparison_df[(comparison_df.bipartite_metric.str.split('_').str[1] == comparison_df.unipartite_metric.str.split('_').str[1])]
subset_df['metric'] = subset_df.bipartite_metric.str.split('_').str[1]
import altair as alt
selection = alt.selection_multi(fields=['uri'], bind='legend')
chart = alt.Chart(subset_df).mark_circle(size=100).encode(
    x='unipartite_value',
    y='bipartite_value',
    color='metric',
    # color=alt.Color('uri', scale=alt.Scale(scheme='plasma'), legend=alt.Legend(
        # columns=4, symbolLimit=len(subset_df.uri.unique().tolist()))),
    tooltip=['uri', 'bipartite_value', 'unipartite_value'],
    column='metric',
    # opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
).properties(width=200).resolve_scale(y='independent', x='independent')
# # cols.remove('unipartite_betweenness')
# # ranked_exploded, chart = compare_node_variability(comparison_df, cols)
# # chart


In [16]:
chart

In [17]:
bipartite_cols = [col for col in all_borrows_books.columns if 'local' in col]
bipartite_cols.remove('local_louvain')
unipartite_cols = [col for col in projected_books.columns if 'local' in col]
unipartite_cols.remove('local_louvain')
original_cols = list(set(bipartite_cols).intersection(set(unipartite_cols)))
bipartite_books =all_borrows_books[all_borrows_books.component == 0][original_cols + ['uri']]
bipartite_books.columns = bipartite_books.columns.str.replace('local_', 'bipartite_')
bipartite_cols = [col for col in bipartite_books.columns if 'bipartite' in col]
# bipartite_books = scale_col(bipartite_books, bipartite_cols)
bipartite_books = pd.melt(
    bipartite_books, id_vars='uri', value_vars=bipartite_cols, var_name='bipartite_metric', value_name='bipartite_value')
unipartite_books =projected_books[projected_books.component == 0][original_cols + ['uri']]
unipartite_books.columns = unipartite_books.columns.str.replace('local_', 'unipartite_')
unipartite_cols = [
    col for col in unipartite_books.columns if 'unipartite' in col]
# unipartite_books = scale_col(unipartite_books, unipartite_cols)
unipartite_books = pd.melt(unipartite_books, id_vars='uri', value_vars=unipartite_cols, var_name='unipartite_metric', value_name='unipartite_value')
comparison_df = pd.merge(bipartite_books, unipartite_books, on=['uri'], how='outer')
cols = comparison_df.columns.tolist()
cols.remove('uri')


subset_df = comparison_df[(comparison_df.bipartite_metric.str.split('_').str[1] == comparison_df.unipartite_metric.str.split('_').str[1])]
subset_df['metric'] = subset_df.bipartite_metric.str.split('_').str[1]
import altair as alt
selection = alt.selection_multi(fields=['uri'], bind='legend')
chart = alt.Chart(subset_df).mark_circle(size=100).encode(
    x='unipartite_value',
    y='bipartite_value',
    color='metric',
    # color=alt.Color('uri', scale=alt.Scale(scheme='plasma'), legend=alt.Legend(
        # columns=4, symbolLimit=len(subset_df.uri.unique().tolist()))),
    tooltip=['uri', 'bipartite_value', 'unipartite_value'],
    column='metric',
    # opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
).properties(width=200).resolve_scale(y='independent', x='independent')
# # cols.remove('unipartite_betweenness')
# # ranked_exploded, chart = compare_node_variability(comparison_df, cols)
# # chart


In [18]:
alt.data_transformers.disable_max_rows()
chart
