# Unipartite Network Comparisons

This notebook explores how we can also represent s&Co data as a unipartite network, and assesses how much information is lost between unipartite and bipartite, as well as how much our exceptional metadata matters downstream.

#### Load Libraries and Initial Data

In [1]:

import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("..")
from network_analysis.load_datasets import get_updated_shxco_data
from network_analysis.generate_network_metrics import *
from network_analysis.create_networks import *
from network_analysis.read_write_networks import *
from network_analysis.visualize_networks import * 


#### Baseline datasets

In [2]:
members_df, books_df, borrow_events, events_df = get_updated_shxco_data(get_subscription=False)

In [3]:
unknown_borrows = borrow_events[borrow_events.start_datetime.isna()]

In [4]:
borrow_events = borrow_events[(borrow_events.start_datetime.isna() == False) & (borrow_events.end_datetime.isna() == False)]


In [5]:
all_borrows = borrow_events[borrow_events.start_datetime < '1942-01-01'].copy()

unexceptional_borrows = all_borrows[all_borrows.exceptional_types.isna()]

## If you want all events regardless of types, use this:
# all_borrows = events_df[events_df.item_uri.isna() == False].copy()

# unexceptional_borrows = all_borrows[all_borrows.exceptional_types.isna()]


#### Load Bipartite Deta



In [6]:
member_attrs = {'uri': 'member_id'}
book_attrs = {'uri': 'item_uri'}
edge_attrs = {'weight': 'counts'}
all_borrows_grouped = all_borrows.groupby(['member_id', 'item_uri']).size().reset_index(name='counts')
unexceptional_borrows_grouped = unexceptional_borrows.groupby(['member_id', 'item_uri']).size().reset_index(name='counts')
should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['HITS', 'CoHITS', 'BiRank', 'BGRM']


all_borrows_bipartite_graph, all_borrows_bipartite_nodelist, all_borrows_bipartite_edgelist, all_borrows_members, all_borrows_books = check_reload_build_bipartite_graphs(all_borrows_grouped, member_attrs, book_attrs, edge_attrs, should_process, write_to_file, './data/borrow_events_bipartite', sk_metrics, link_metrics, members_df, books_df)

unexceptional_borrows_bipartite_graph, unexceptional_borrows_bipartite_nodelist, unexceptional_borrows_bipartite_edgelist, unexceptional_borrows_members, unexceptional_borrows_books = check_reload_build_bipartite_graphs(unexceptional_borrows_grouped, member_attrs, book_attrs, edge_attrs, should_process, write_to_file,'./data/unexceptional_borrow_events_bipartite', sk_metrics, link_metrics, members_df, books_df)


reloading saved graph: ./data/borrow_events_bipartite
reloading saved graph: ./data/unexceptional_borrow_events_bipartite


### Unipartite Comparisons

#### Comparing Across Entire Time of Sco Library

In [10]:
member_attrs = {'uri': 'member_id'}
book_attrs = {'uri': 'item_uri'}
edge_attrs = {'weight': 'counts'}
node_attrs = {}
should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['pagerank', 'hubs', 'auth']
is_projected = True

projected_members_graph, projected_members_nodelist, projected_members_edgelist, projected_members, projected_books_graph, projected_books_nodelist, projected_books_edgelist, projected_books = check_reload_build_unipartite_graphs(
    all_borrows_grouped, all_borrows, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, './data/borrow_events_unipartite_projected', sk_metrics, link_metrics, members_df, books_df, is_projected)

unexceptional_projected_members_graph, unexceptional_projected_members_nodelist, unexceptional_projected_members_edgelist, unexceptional_projected_members, unexceptional_projected_books_graph, unexceptional_projected_books_nodelist, unexceptional_projected_books_edgelist, unexceptional_projected_books = check_reload_build_unipartite_graphs(
    unexceptional_borrows_grouped, unexceptional_borrows, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, './data/unexceptional_borrow_events_unipartite_projected', sk_metrics, link_metrics, members_df, books_df, is_projected)


reloading saved graph: ./data/borrow_events_unipartite_projected
reloading saved graph: ./data/unexceptional_borrow_events_unipartite_projected


#### Correlations Between Unipartite and Bipartite Graph Metrics

In [20]:
local_cols = [col for col in all_borrows_members.columns.tolist()
              if 'local' in col]
global_cols = [col for col in all_borrows_members.columns.tolist()
               if 'global' in col]
members_bipartite = all_borrows_members[(
    all_borrows_members.columns[~all_borrows_members.columns.isin(global_cols)])]
unexceptional_members_bipartite = unexceptional_borrows_members[(
    unexceptional_borrows_members.columns[~unexceptional_borrows_members.columns.isin(global_cols)])]
members_chart_bipartite = generate_corr_chart(
    members_bipartite[(members_bipartite.component == 0)], 'bipartite metric correlations for members data without disconnected nodes')
unexceptional_members_chart_bipartite = generate_corr_chart(
    unexceptional_members_bipartite[(unexceptional_members_bipartite.component == 0)], 'bipartite metric correlations for unexceptional members data without disconnected nodes')

local_cols = [col for col in projected_members.columns.tolist()
              if 'local' in col]
global_cols = [col for col in projected_members.columns.tolist()
               if 'global' in col]
members_unipartite = projected_members[(
    projected_members.columns[~projected_members.columns.isin(global_cols)])]
unexceptional_members_unipartite = unexceptional_projected_members[(
    unexceptional_projected_members.columns[~unexceptional_projected_members.columns.isin(global_cols)])]
members_chart_unipartite = generate_corr_chart(
    members_unipartite.loc[members_unipartite.component == 0], 'unipartite metric correlations for members data without disconnected nodes')
unexceptional_members_chart_unipartite = generate_corr_chart(
    unexceptional_members_unipartite.loc[unexceptional_members_unipartite.component == 0], 'unipartite metric correlations for unexceptional members data without disconnected nodes')

alt.vconcat(
    alt.hconcat(*[members_chart_bipartite, unexceptional_members_chart_bipartite]),
    alt.hconcat(*[members_chart_unipartite, unexceptional_members_chart_unipartite])
)


In [19]:
df_type = 'members_bipartite'
df_type2 = 'unexceptional_members_bipartite'
melted_all_borrows_members = get_melted_corr(
    members_bipartite, df_type)
melted_unex_borrows_members = get_melted_corr(
    unexceptional_members_bipartite, df_type2)

df_type_subset = 'members_unipartite'
df_type_subset2 = 'unexceptional_members_unipartite'

melted_all_borrows_members_subset = get_melted_corr(
    members_unipartite, df_type_subset)
melted_unex_borrows_members_subset = get_melted_corr(
    unexceptional_members_unipartite, df_type_subset2)

chart = compare_corr_chart(melted_all_borrows_members,melted_unex_borrows_members, df_type, df_type2)

chart1 = compare_corr_chart(melted_all_borrows_members_subset,melted_unex_borrows_members_subset, df_type_subset, df_type_subset2)

chart2 = compare_corr_chart(melted_all_borrows_members, melted_all_borrows_members_subset, df_type, df_type_subset)

chart3 = compare_corr_chart(melted_unex_borrows_members,melted_unex_borrows_members_subset, df_type2, df_type_subset2)

alt.hconcat(*[chart, chart1, chart2, chart3])

In [21]:
local_cols = [col for col in all_borrows_books.columns.tolist()
              if 'local' in col]
global_cols = [col for col in all_borrows_books.columns.tolist()
               if 'global' in col]
books_bipartite = all_borrows_books[(
    all_borrows_books.columns[~all_borrows_books.columns.isin(global_cols)])]
unexceptional_books_bipartite = unexceptional_borrows_books[(
    unexceptional_borrows_books.columns[~unexceptional_borrows_books.columns.isin(global_cols)])]
books_chart_bipartite = generate_corr_chart(
    books_bipartite[(books_bipartite.component == 0)], 'bipartite metric correlations for books data without disconnected nodes')
unexceptional_books_chart_bipartite = generate_corr_chart(
    unexceptional_books_bipartite[(unexceptional_books_bipartite.component == 0)], 'bipartite metric correlations for unexceptional books data without disconnected nodes')

local_cols = [col for col in projected_books.columns.tolist()
              if 'local' in col]
global_cols = [col for col in projected_books.columns.tolist()
               if 'global' in col]
books_unipartite = projected_books[(
    projected_books.columns[~projected_books.columns.isin(global_cols)])]
unexceptional_books_unipartite = unexceptional_projected_books[(
    unexceptional_projected_books.columns[~unexceptional_projected_books.columns.isin(global_cols)])]
books_chart_unipartite = generate_corr_chart(
    books_unipartite.loc[books_unipartite.component == 0], 'unipartite metric correlations for books data without disconnected nodes')
unexceptional_books_chart_unipartite = generate_corr_chart(
    unexceptional_books_unipartite.loc[unexceptional_books_unipartite.component == 0], 'unipartite metric correlations for unexceptional books data without disconnected nodes')

alt.vconcat(
    alt.hconcat(*[books_chart_bipartite, unexceptional_books_chart_bipartite]),
    alt.hconcat(*[books_chart_unipartite, unexceptional_books_chart_unipartite])
)


In [22]:
df_type = 'books_bipartite'
df_type2 = 'unexceptional_books_bipartite'
melted_all_borrows_books = get_melted_corr(
    books_bipartite, df_type)
melted_unex_borrows_books = get_melted_corr(
    unexceptional_books_bipartite, df_type2)

df_type_subset = 'books_unipartite'
df_type_subset2 = 'unexceptional_books_unipartite'

melted_all_borrows_books_subset = get_melted_corr(
    books_unipartite, df_type_subset)
melted_unex_borrows_books_subset = get_melted_corr(
    unexceptional_books_unipartite, df_type_subset2)

chart = compare_corr_chart(melted_all_borrows_books,melted_unex_borrows_books, df_type, df_type2)

chart1 = compare_corr_chart(melted_all_borrows_books_subset,melted_unex_borrows_books_subset, df_type_subset, df_type_subset2)

chart2 = compare_corr_chart(melted_all_borrows_books, melted_all_borrows_books_subset, df_type, df_type_subset)

chart3 = compare_corr_chart(melted_unex_borrows_books,melted_unex_borrows_books_subset, df_type2, df_type_subset2)

alt.hconcat(*[chart, chart1, chart2, chart3])

In [14]:

projected_members_chart = generate_corr_chart(
    projected_members, 'member correlations for all events data')
unexceptional_projected_members_chart = generate_corr_chart(
    unexceptional_projected_members, 'member correlations for unexceptional data')
projected_books_chart = generate_corr_chart(
    projected_books, 'book correlations for all events data')
unexceptional_projected_books_chart = generate_corr_chart(
    unexceptional_projected_books, 'book correlations for unexceptional data')

projected_members_chart_subset = generate_corr_chart(
    projected_members.loc[projected_members.component == 0], 'member correlations for all events data')
unexceptional_projected_members_chart_subset = generate_corr_chart(
    unexceptional_projected_members.loc[unexceptional_projected_members.component == 0], 'member correlations for unexceptional data')
projected_books_chart_subset = generate_corr_chart(
    projected_books.loc[projected_books.component == 0], 'book correlations for all events data')
unexceptional_projected_books_chart_subset = generate_corr_chart(
    unexceptional_projected_books.loc[unexceptional_projected_books.component == 0], 'book correlations for unexceptional data')

charts = [projected_members_chart, unexceptional_projected_members_chart,
                 projected_books_chart, unexceptional_projected_books_chart]
subset_charts = [projected_members_chart_subset, unexceptional_projected_members_chart_subset,
                 projected_books_chart_subset, unexceptional_projected_books_chart_subset]

alt.vconcat(alt.hconcat(*charts), alt.hconcat(*subset_charts))


In [None]:
df_type = 'all_events_members'
df_type2 = 'unexceptional_members'
melted_projected_members = get_melted_corr(
    projected_members, df_type)
melted_projected_unex_members = get_melted_corr(
    unexceptional_projected_members, df_type2)

df_type_subset = 'all_events_members_subset'
df_type_subset2 = 'unexceptional_members_subset'
melted_projected_members_subset = get_melted_corr(
    projected_members.loc[projected_members.component == 0], df_type_subset)
melted_projected_unex_members_subset = get_melted_corr(
    unexceptional_projected_members.loc[unexceptional_projected_members.component == 0], df_type_subset2)

chart = compare_corr_chart(melted_projected_members,
                           melted_projected_unex_members, df_type, df_type2)
chart1 = compare_corr_chart(melted_projected_members_subset,
                           melted_projected_unex_members_subset, df_type_subset, df_type_subset2)
chart_2 = compare_corr_chart(melted_projected_members_subset, melted_projected_members, df_type_subset, df_type)

chart3 = compare_corr_chart(melted_projected_unex_members_subset, melted_projected_unex_members, df_type_subset2, df_type2)

df_type = 'all_events_books'
df_type2 = 'unexceptional_books'
melted_projected_books = get_melted_corr(
    projected_books, df_type)
melted_projected_unex_books = get_melted_corr(
    unexceptional_projected_books, df_type2)

df_type_subset = 'all_events_books_subset'
df_type_subset2 = 'unexceptional_books_subset'
melted_projected_books_subset = get_melted_corr(
    projected_books.loc[projected_books.component ==0], df_type_subset)
melted_projected_unex_books_subset = get_melted_corr(
    unexceptional_projected_books.loc[unexceptional_projected_books.component==0], df_type_subset2)

chart4 = compare_corr_chart(melted_projected_books,
                           melted_projected_unex_books, df_type, df_type2)
chart5 = compare_corr_chart(melted_projected_books_subset,
                            melted_projected_unex_books_subset, df_type_subset, df_type_subset2)
chart6 = compare_corr_chart(melted_projected_books_subset, melted_projected_books, df_type_subset, df_type)
chart7 = compare_corr_chart(melted_projected_unex_books_subset, melted_projected_unex_books, df_type_subset2, df_type2)
alt.vconcat(alt.hconcat(chart, chart1, chart2, chart3), alt.hconcat(chart4, chart5, chart6, chart7))


In [None]:
## Attempt at building an unprojected unipartite graph but leads to memory errors since it takes so long to complete 😭
# is_projected = False
# unprojected_members_graph, unprojected_members_nodelist, unprojected_members_edgelist, unprojected_books_graph, unprojected_books_nodelist, unprojected_books_edgelist, unprojected_members, unprojected_books = check_reload_build_unipartite_graphs(
#     unexceptional_events_grouped, unexceptional_events, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, './data/all_events_unipartite_unprojected', sk_metrics, link_metrics, members_df, books_df, is_projected)


### Temporal Network Comparisons

#### Bipartite Comparisons

In [None]:
events_1920s = all_events[(all_events.end_datetime <= '1930-01-01')]
events_1930s = all_events[(all_events.end_datetime >= '1930-01-01') & (all_events.start_datetime <= '1943-01-01')]


In [None]:
member_attrs = {'uri': 'member_id'}
book_attrs = {'uri': 'item_uri'}
edge_attrs = {'weight': 'counts'}
events_1920s_grouped = events_1920s.groupby(['member_id', 'item_uri']).size().reset_index(name='counts')
events_1930s_grouped = events_1930s.groupby(
    ['member_id', 'item_uri']).size().reset_index(name='counts')
should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['HITS', 'CoHITS', 'BiRank', 'BGRM']


bipartite_graph_1920s, bipartite_nodelist_1920s, bipartite_edgelist_1920s, bipartite_members_1920s, bipartite_books_1920s = check_reload_build_bipartite_graphs(
    events_1920s_grouped, member_attrs, book_attrs, edge_attrs, should_process, write_to_file, './data/events_1920s_bipartite', sk_metrics, link_metrics, members_df, books_df)

bipartite_graph_1930s, bipartite_nodelist_1930s, bipartite_edgelist_1930s, bipartite_members_1930s, bipartite_books_1930s = check_reload_build_bipartite_graphs(
    events_1930s_grouped, member_attrs, book_attrs, edge_attrs, should_process, write_to_file, './data/events_1930s_bipartite', sk_metrics, link_metrics, members_df, books_df)


In [None]:
node_attrs = {}
should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['pagerank', 'hubs', 'auth']
is_projected = True
members_graph_1920s, members_nodelist_1920s, members_edgelist_1920s, joined_members_1920s, books_graph_1920s, books_nodelist_1920s, books_edgelist_1920s, joined_books_1920s = check_reload_build_unipartite_graphs(
    events_1920s_grouped, events_1920s, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, './data/events_1920s_unipartite_projected', sk_metrics, link_metrics, members_df, books_df, is_projected)

members_graph_1930s, members_nodelist_1930s, members_edgelist_1930s, joined_members_1930s, books_graph_1930s, books_nodelist_1930s, books_edgelist_1930s, joined_books_1930s = check_reload_build_unipartite_graphs(
    events_1930s_grouped, events_1930s, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, './data/events_1930s_unipartite_projected', sk_metrics, link_metrics, members_df, books_df, is_projected)


In [None]:
bipartite_members_1920s_chart = generate_corr_chart(
    bipartite_members_1920s, events_1920s, 'bipartite member correlations for 1920s events data', True)
bipartite_members_1930s_chart = generate_corr_chart(
    bipartite_members_1930s, events_1930s, 'bipartite member correlations for 1930s events data', True)

bipartite_members_1920s_chart_subset = generate_corr_chart(
    bipartite_members_1920s.loc[bipartite_members_1920s.component == 0], events_1920s, 'bipartite member correlations for 1920s events data subset', True)
bipartite_members_1930s_chart_subset = generate_corr_chart(
    bipartite_members_1930s.loc[bipartite_members_1930s.component == 0], events_1930s, 'bipartite member correlations for 1930s events data subset', True)

alt.vconcat(alt.hconcat(bipartite_members_1920s_chart, bipartite_members_1930s_chart),
            alt.hconcat(bipartite_members_1920s_chart_subset, bipartite_members_1930s_chart_subset))


In [None]:

joined_members_1920s_chart_subset = generate_corr_chart(
    joined_members_1920s.loc[joined_members_1920s.component == 0], events_1920s, 'unipartite member correlations for 1920s events subset', True)
joined_members_1930s_chart_subset = generate_corr_chart(
    joined_members_1930s.loc[joined_members_1930s.component == 0], events_1930s, 'unipartite member correlations for 1930s events subset', True)

joined_members_1920s_chart = generate_corr_chart(
    joined_members_1920s, events_1920s, 'unipartite member correlations for 1920s events', True)
joined_members_1930s_chart = generate_corr_chart(
    joined_members_1930s, events_1930s, 'unipartite member correlations for 1930s events', True)

alt.vconcat(alt.hconcat(joined_members_1920s_chart, joined_members_1930s_chart),
            alt.hconcat(joined_members_1920s_chart_subset, joined_members_1930s_chart_subset))
