In [1]:

import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
import networkx as nx
from networkx.algorithms import bipartite
# import community
from networkx.readwrite import json_graph
# import nx_altair as nxa
from networkx.algorithms.community import greedy_modularity_communities
from pyvis import network as net
# from node2vec import Node2Vec
import altair as alt
import matplotlib.pyplot as plt
import numpy as np
from igraph import *

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("..")
from network_analysis.load_datasets import get_updated_shxco_data
from network_analysis.generate_network_metrics import *
from network_analysis.create_networks import *
from network_analysis.read_write_networks import *
from network_analysis.visualize_networks import * 


In [2]:
members_df, books_df, borrow_events, events_df = get_updated_shxco_data(get_subscription=False)


In [110]:
genre_df = pd.read_csv('../reconcile/shxco_loc_matches_final.csv')
genre_df["uri"] = genre_df.sco_id.apply(lambda x: x.split("/")[-2])
genre_df["title"] = genre_df.title.str.split("/").str[0]
genre_df = genre_df.rename(columns={"id": "loc_id"})
books_df = books_df.rename(
    columns={"author": "sco_author", "title": "sco_title"})
updated_books_df = pd.merge(books_df, genre_df, on=[
                            "uri", "sco_title", "sco_author"], how="left")


In [111]:
# updated_books_df.subject.fillna("no subject", inplace=True)
# updated_books_df['exploded_subject'] = updated_books_df.subject.apply(
#     lambda x: x.split(";"))
# exploded_books_df = updated_books_df.explode('exploded_subject')
# exploded_grouped = exploded_books_df.groupby(
#     ['exploded_subject']).size().reset_index(name='subject_counts')
# exploded_grouped[exploded_grouped.subject_counts > 1].sort_values(
#     by='subject_counts', ascending=False)


In [112]:
member_attrs = {'uri': 'member_id'}
book_attrs = {'uri': 'item_uri'}
edge_attrs = {'weight': 'counts'}
node_attrs = {}
should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['pagerank', 'hubs', 'auth']
is_projected = True
all_events = borrow_events.copy()
all_events_grouped = all_events.groupby(
    ['member_id', 'item_uri']).size().reset_index(name='counts')

projected_members_graph, projected_members_nodelist, projected_members_edgelist, projected_members, projected_books_graph, projected_books_nodelist, projected_books_edgelist, projected_books = check_reload_build_unipartite_graphs(
    all_events_grouped, all_events, member_attrs, book_attrs, edge_attrs, node_attrs, should_process, write_to_file, './data/all_events_unipartite_projected', sk_metrics, link_metrics, members_df, books_df, is_projected)


reloading saved graph: ./data/all_events_unipartite_projected


In [113]:
for d, v in projected_books_graph.nodes(data=True):
    rows = updated_books_df.loc[(updated_books_df.uri == v['uri']) & (updated_books_df.subject.isna() == False)]
    if len(rows):
        v['subject'] = rows.subject.values[0]


In [114]:
from networkx.algorithms import node_classification
predicted = node_classification.harmonic_function(projected_books_graph, label_name='subject')


In [115]:
for d, v in enumerate(projected_books_graph.nodes(data=True)):
    v_id, v_data = v
    v_data['predicted_subject'] = predicted[d]

In [117]:
nodes_df, edges_df = generate_dataframes(projected_books_graph, False, is_networkx=True)


In [118]:
merged_books = pd.merge(updated_books_df, nodes_df, on=['uri', 'subject'], how='inner')

In [119]:
len(nodes_df.uri.unique()), len(updated_books_df.uri.unique())

(5630, 6018)

In [120]:
books_no_borrows = pd.merge(updated_books_df, nodes_df, on=[
                        'uri', 'subject'], how='outer')
books_no_borrows = books_no_borrows[books_no_borrows.bipartite.isna()]
no_borrow_events = pd.merge(books_no_borrows, events_df, left_on=[
                            'uri', 'exceptional_types', 'year'], right_on=['item_uri', 'exceptional_types', 'year'], how='left')
no_borrow_events.event_type.value_counts()


Purchase       40
Generic        24
Gift            6
Crossed out     5
Name: event_type, dtype: int64

In [121]:
final_books_graph = Graph.from_networkx(projected_books_graph)


In [122]:
multilevel = final_books_graph.community_multilevel(weights='weight')
fastgreedy = final_books_graph.community_fastgreedy(weights='weight')
ml_membership = multilevel.membership
fg_membership = fastgreedy.as_clustering().membership
for v in final_books_graph.vs:
    v['multilevel_community'] = ml_membership[v.index]
    v['fastgreedy_community'] = fg_membership[v.index]


In [123]:
nodelist, edgelist = generate_dataframes(final_books_graph, False, is_networkx=False)

In [124]:
predicted_books = nodelist[nodelist.predicted_subject !=nodelist.subject]


In [130]:
predicted_books[predicted_books.predicted_subject ==
                'detective and mystery stories']


Unnamed: 0,node_id,group,bipartite,uri,global_degree,local_degree,global_eigenvector,local_eigenvector,global_closeness,local_closeness,global_betweenness,local_betweenness,global_clustering,local_clustering,node_title,global_graph_radius,global_diameter,local_graph_radius,local_diameter,component,local_katz,local_louvain,local_pagerank,local_hubs,local_auth,pagerank,hubs,auth,global_pagerank,global_hubs,global_auth,global_katz,global_louvain,label,predicted_subject,subject,multilevel_community,fastgreedy_community
169,169,books,1.0,dreiser-free-stories,277.0,277.0,0.090133,0.090133,0.502739,0.502739,1503.500807,1503.500807,0.473631,0.473631,dreiser-free-stories,0.0,6.0,3.0,6.0,0.0,3.482617e+10,0.0,0.000108,0.090133,0.090133,0.000107,0.090133,0.090133,0.000107,0.090133,0.090133,3.482617e+10,0.0,n169,detective and mystery stories,,0,0
279,279,books,1.0,baedeker-paris-environs-routes,7.0,7.0,0.002389,0.002389,0.400515,0.400515,0.000000,0.000000,1.000000,1.000000,baedeker-paris-environs-routes,0.0,6.0,3.0,6.0,0.0,9.192448e+08,2.0,0.000033,0.002389,0.002389,0.000033,0.002389,0.002389,0.000033,0.002389,0.002389,9.192448e+08,2.0,n279,detective and mystery stories,,2,0
283,283,books,1.0,freeman-joseph-brethren,7.0,7.0,0.002389,0.002389,0.400515,0.400515,0.000000,0.000000,1.000000,1.000000,freeman-joseph-brethren,0.0,6.0,3.0,6.0,0.0,9.192448e+08,2.0,0.000033,0.002389,0.002389,0.000033,0.002389,0.002389,0.000033,0.002389,0.002389,9.192448e+08,2.0,n283,detective and mystery stories,,2,0
408,408,books,1.0,beresford-early-history-jacob,1647.0,525.0,0.738345,0.181759,0.581248,0.514569,5839.714296,2057.275253,0.631984,0.710832,beresford-early-history-jacob,0.0,6.0,3.0,6.0,0.0,6.983084e+10,3.0,0.000155,0.181759,0.181759,0.000155,0.181759,0.181759,0.000155,0.181759,0.181759,2.813498e+11,2.0,n408,detective and mystery stories,,4,3
420,420,books,1.0,frankau-dance-little-gentleman,1984.0,571.0,0.894517,0.185356,0.601547,0.517614,3697.677409,2256.259916,0.609157,0.483080,frankau-dance-little-gentleman,0.0,6.0,3.0,6.0,0.0,7.111968e+10,1.0,0.000172,0.185356,0.185356,0.000172,0.185356,0.185356,0.000172,0.185356,0.185356,3.421053e+11,1.0,n420,detective and mystery stories,,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5461,5461,books,1.0,graham-crimson-hairs-erotic,117.0,216.0,0.025002,0.091192,0.483587,0.474930,0.000000,0.000000,1.000000,1.000000,graham-crimson-hairs-erotic,0.0,6.0,3.0,6.0,0.0,3.505722e+10,4.0,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,9.657394e+09,0.0,n5461,detective and mystery stories,,3,0
5462,5462,books,1.0,great-ellery-queen,117.0,216.0,0.025002,0.091192,0.483587,0.474930,0.000000,0.000000,1.000000,1.000000,great-ellery-queen,0.0,6.0,3.0,6.0,0.0,3.505722e+10,4.0,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,9.657394e+09,0.0,n5462,detective and mystery stories,,3,0
5464,5464,books,1.0,manhood-apples-night,117.0,216.0,0.025002,0.091192,0.483587,0.474930,0.000000,0.000000,1.000000,1.000000,manhood-apples-night,0.0,6.0,3.0,6.0,0.0,3.505722e+10,4.0,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,9.657394e+09,0.0,n5464,detective and mystery stories,,3,0
5465,5465,books,1.0,morley-romany-stain,117.0,216.0,0.025002,0.091192,0.483587,0.474930,0.000000,0.000000,1.000000,1.000000,morley-romany-stain,0.0,6.0,3.0,6.0,0.0,3.505722e+10,4.0,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,0.000073,0.091192,0.091192,9.657394e+09,0.0,n5465,detective and mystery stories,,3,0
