In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
import networkx as nx
from networkx.algorithms import bipartite
# import community
from networkx.readwrite import json_graph
# import nx_altair as nxa
from networkx.algorithms.community import greedy_modularity_communities
from pyvis import network as net
# from node2vec import Node2Vec
import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import itertools
import collections
from tqdm.notebook import trange, tqdm
tqdm.pandas()
import warnings
warnings.filterwarnings("ignore")
from IPython.display import display, Markdown, HTML
import sys
sys.path.append("..")
from bigraph.predict import pa_predict, jc_predict, cn_predict,aa_predict, katz_predict
from bigraph.evaluation import evaluation
from network_analysis.birankpy import BipartiteNetwork
from network_analysis.load_datasets import get_updated_shxco_data
from network_analysis.generate_network_metrics import *
from network_analysis.create_networks import *
from network_analysis.read_write_networks import * 
from network_analysis.link_prediction import * 
members_df, books_df, borrow_events, events_df = get_updated_shxco_data(get_subscription=False)


In [2]:
from dateutil.relativedelta import relativedelta

sixmonths = relativedelta(months=6)
sixmonths


relativedelta(months=+6)

In [3]:
partial_df = pd.read_csv('../dataset_generator/data/partial_borrowers.csv')
partial_df['index_col'] = partial_df.index
partial_df[0:1]



Unnamed: 0,member_id,subscription_start,subscription_end,known_borrows,index_col
0,martin-maud,1923-10-17,1923-11-17,36,0


In [16]:
partial_members = ['raphael-france', 'hemingway',
                       'colens', 'kittredge-eleanor-hayden']
partial_df = partial_df[partial_df.member_id.isin(partial_members)]
# parse subscription dates so we can use them to identify circulating books
partial_df['subscription_starttime'] = pd.to_datetime(
    partial_df['subscription_start'], errors='coerce')
partial_df['subscription_endtime'] = pd.to_datetime(
    partial_df['subscription_end'], errors='coerce')

# all_events = events_df[events_df.item_uri.isna() == False].copy()

borrow_events = borrow_events[(borrow_events.start_datetime.isna() == False) & (
    borrow_events.end_datetime.isna() == False)]
all_borrows = borrow_events[borrow_events.start_datetime < '1942-01-01'].copy()


### Bipartite Link Predictions for All Event Types

In [5]:
member_attrs = {'uri': 'member_id'}
book_attrs = {'uri': 'item_uri'}
edge_attrs = {'weight': 'counts'}
all_borrows_grouped = all_borrows.groupby(
    ['member_id', 'item_uri']).size().reset_index(name='counts')

should_process = True
write_to_file = True
sk_metrics = ['katz', 'louvain']
link_metrics = ['HITS', 'CoHITS', 'BiRank', 'BGRM']


all_borrows_bipartite_graph, all_borrows_bipartite_nodelist, all_borrows_bipartite_edgelist, all_borrows_members, all_borrows_books = check_reload_build_bipartite_graphs(
    all_borrows_grouped, member_attrs, book_attrs, edge_attrs, should_process, write_to_file, '/Volumes/SecondDrive/unknown_borrowers/network_analysis/data/borrow_events_bipartite', sk_metrics, link_metrics, members_df, books_df)


reloading saved graph: /Volumes/SecondDrive/unknown_borrowers/network_analysis/data/borrow_events_bipartite


In [6]:
# Remove disconnected components
remove = all_borrows_bipartite_nodelist[all_borrows_bipartite_nodelist.component != 0 ].uri.tolist()
all_borrows_bipartite_graph.remove_nodes_from(remove)

In [15]:
## Run to get full bipartite link predictions from entire graph
all_preds = get_bipartite_link_predictions(all_borrows_bipartite_graph)


Running jaccard link prediction
Jaccard prediction starting...
Jaccard Executed in 33.88926887512207 seconds 

Running preferential attachment link prediction
Preferential_attachment prediction starting...
Preferential attachment Executed in 22.00365900993347 seconds 

Running common neighbors link prediction
Common neighbor prediction starting...
Common neighbours Executed in 22.020697116851807 seconds 

Running adamic adar link prediction
Adamic_adar prediction starting...
Adamic-adar Executed in 103.37933993339539 seconds 



In [8]:
metrics = ['jc_prediction', 'pa_prediction',
           'cn_prediction', 'aa_prediction']
# all_preds[metrics].corr()

In [9]:
title_lookup = {row.uri: row.title for row in books_df.itertuples()}

In [17]:
output_path = '/Volumes/SecondDrive/unknown_borrowers/network_analysis/data/partial_members_bipartite_network_all_events_link_predictions.csv'
if os.path.exists(output_path):
    os.remove(output_path)
partial_df.progress_apply(get_full_predictions, axis=1, number_of_results=10, limit_to_circulation=True, predictions_df=all_preds, events_df=borrow_events, relative_date=sixmonths, predict_group='books', metrics=metrics, output_path=output_path)


  0%|          | 0/28 [00:00<?, ?it/s]

10    None
11    None
12    None
13    None
14    None
15    None
16    None
36    None
37    None
38    None
39    None
54    None
55    None
56    None
57    None
58    None
59    None
60    None
61    None
62    None
63    None
64    None
65    None
66    None
81    None
82    None
83    None
84    None
dtype: object

In [18]:
processed_predictions = pd.read_csv(output_path)
processed_predictions

Unnamed: 0,popular_all_books,popular_all_counts,popular_current_books,popular_current_counts,jc_prediction_all,jc_prediction_subset,jc_prediction_all_scores,jc_prediction_subset_scores,pa_prediction_all,pa_prediction_subset,pa_prediction_all_scores,pa_prediction_subset_scores,cn_prediction_all,cn_prediction_subset,cn_prediction_all_scores,cn_prediction_subset_scores,aa_prediction_all,aa_prediction_subset,aa_prediction_all_scores,aa_prediction_subset_scores,member_id,subscription_starttime,subscription_endtime,known_borrows
0,joyce-dubliners,19,thayer-dial,6,richardson-pointed-roofs,richardson-pointed-roofs,0.118143,0.118143,joyce-dubliners,joyce-dubliners,13684,13684,richardson-pointed-roofs,richardson-pointed-roofs,28.0,28.0,richardson-pointed-roofs,richardson-pointed-roofs,6.409024,6.409024,kittredge-eleanor-hayden,1924-01-17,1924-04-17,583
1,joyce-portrait-artist-young,13,garnett-lady-fox,5,joyce-portrait-artist-young,joyce-portrait-artist-young,0.111111,0.111111,joyce-portrait-artist-young,joyce-portrait-artist-young,13062,13062,joyce-portrait-artist-young,joyce-portrait-artist-young,27.0,27.0,joyce-portrait-artist-young,joyce-portrait-artist-young,6.081347,6.081347,kittredge-eleanor-hayden,1924-01-17,1924-04-17,583
2,thayer-dial,13,bennett-riceyman-steps,4,joyce-dubliners,joyce-dubliners,0.105691,0.105691,richardson-pointed-roofs,richardson-pointed-roofs,11507,11507,joyce-dubliners,joyce-dubliners,26.0,26.0,joyce-dubliners,joyce-dubliners,5.912031,5.912031,kittredge-eleanor-hayden,1924-01-17,1924-04-17,583
3,squire-london-mercury,12,macaulay-told-idiot,4,mansfield-garden-party-stories,mansfield-garden-party-stories,0.097046,0.097046,mansfield-garden-party-stories,mansfield-garden-party-stories,9952,9952,mansfield-garden-party-stories,mansfield-garden-party-stories,23.0,23.0,mansfield-garden-party-stories,mansfield-garden-party-stories,5.283793,5.283793,kittredge-eleanor-hayden,1924-01-17,1924-04-17,583
4,mansfield-garden-party-stories,12,mansfield-doves-nest-stories,4,douglas-south-wind,douglas-south-wind,0.090129,0.090129,joyce-exiles,joyce-exiles,9019,9019,lawrence-women-love,douglas-south-wind,21.0,21.0,lawrence-women-love,lawrence-women-love,4.874123,4.874123,kittredge-eleanor-hayden,1924-01-17,1924-04-17,583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,dunsany-five-plays,4,conrad-typhoon,3,dreiser-sister-carrie,dreiser-sister-carrie,0.052219,0.052219,bronte-wuthering-heights,dreiser-sister-carrie,18921,18921,dreiser-sister-carrie,dreiser-sister-carrie,20.0,20.0,bronte-wuthering-heights,lewis-main-street-story,5.192636,5.192636,raphael-france,1920-10-30,1921-04-30,1008
276,conrad-lord-jim,4,dunsany-five-plays,3,lewis-main-street-story,lewis-main-street-story,0.049608,0.049608,dreiser-sister-carrie,lewis-main-street-story,18020,18020,lewis-main-street-story,lewis-main-street-story,19.0,19.0,lewis-main-street-story,richardson-honeycomb-pilgrimage-3,5.155936,5.155936,raphael-france,1920-10-30,1921-04-30,1008
277,stephens-mary-mary,3,shaw-plays-pleasant-unpleasant,3,stein-three-lives,stein-three-lives,0.046997,0.046997,lewis-main-street-story,stein-three-lives,17119,17119,stein-three-lives,stein-three-lives,18.0,18.0,richardson-honeycomb-pilgrimage-3,stein-three-lives,4.471390,4.471390,raphael-france,1920-10-30,1921-04-30,1008
278,butler-way-flesh,3,stein-three-lives,3,bronte-wuthering-heights,shaw-plays-pleasant-unpleasant,0.036649,0.036649,stein-three-lives,conrad-lord-jim,14416,14416,bronte-wuthering-heights,conrad-lord-jim,14.0,14.0,stein-three-lives,clouston-lunatic-large-novel,3.899642,3.899642,raphael-france,1920-10-30,1921-04-30,1008


### Circulation Specific Bipartite Link Predictions

In [13]:
# start_library = borrow_events.sort_values(
#     by=['start_datetime'])[0:1].start_datetime.values[0]
output_path = '/Volumes/SecondDrive/unknown_borrowers/network_analysis/data/partial_members_bipartite_circulation_events_predictions.csv'
# if os.path.exists(output_path):
#     os.remove(output_path)
# partial_df[0:1].apply(get_specific_predictions, axis=1, number_of_results=10, limit_to_circulation=True, events_df=events_df, borrow_events=borrow_events, members_df=members_df, books_df=books_df, relative_date=start_library, predict_group='books', output_path=output_path)


In [14]:
predictions = pd.read_csv(output_path)
predictions

Unnamed: 0,member_id,predicted_values,score,metric
0,kittredge-eleanor-hayden,joyce-exiles,0.210526,jc_prediction
1,kittredge-eleanor-hayden,joyce-dubliners,0.185185,jc_prediction
2,kittredge-eleanor-hayden,butler-erewhon,0.166667,jc_prediction
3,kittredge-eleanor-hayden,butler-way-flesh,0.166667,jc_prediction
4,kittredge-eleanor-hayden,joyce-portrait-artist-young,0.142857,jc_prediction
...,...,...,...,...
1015,raphael-france,dunsany-five-plays,0.791186,aa_prediction
1016,raphael-france,squire-london-mercury,0.300102,aa_prediction
1017,raphael-france,conrad-lord-jim,0.233075,aa_prediction
1018,raphael-france,stein-three-lives,,aa_prediction
