In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from ipywidgets import interact, Select
from IPython.display import display


In [2]:
df = pd.read_csv("datasets\dataset.csv")

In [3]:
#sort by user and date because we want to visualize in which order (date) the users (user_id) read the chapters
df = df.sort_values(by=["user_id", "date_created"])

In [4]:
# a chapter belongs to a specific book and chapter alone wouldn't be unique.
# the network is build from nodes (source, target) so we have to combine chapter and book in a single column
df["source"] = df["chapter"].str.strip() + " " + df["book"].str.strip()

In [5]:
#This is a very elegant way to do this. My first idea was about using a row based algorithm. So I learned a useful pattern
df["target"] = df["source"].shift(-1)

In [6]:
# And again: creation of additional columns using shifting and filter afterwards is a usefull pattern to deal with hierarchical
# structures in a flat table
df["user_check"] = df["user_id"].shift(-1)

In [7]:
# I am wondering whether pandas offers a similar way beside apply and a userdefined function to achieve this
df["keep_drop"] = np.where((df["user_check"] == df["user_id"]), df["user_id"], np.nan)

In [8]:
df = df.dropna(subset=["keep_drop", "chapter"])

In [9]:
# create the networkx network
G = nx.from_pandas_edgelist(df, "source", "target", create_using=nx.DiGraph)

In [10]:
#calculate centrality measures and add them to the graph
out = nx.out_degree_centrality(G)
nx.set_node_attributes(G,out,"out-degree")

bb = nx.betweenness_centrality(G)
nx.set_node_attributes(G,bb,"betweenness")

eigen = nx.eigenvector_centrality(G)
nx.set_node_attributes(G,eigen,"eigen")


In [11]:
# create a list from the node data containing also the attributes
data_list = list(G.nodes(data=True))

In [12]:
# create a dictionary from the list
data = {}

data["chapter"] = [x[0] for x in data_list]

data["out-degree"] = [x[1]["out-degree"] for x in data_list]
data["eigen"] = [x[1]["eigen"] for x in data_list]
data["betweenness"] = [x[1]["betweenness"] for x in data_list]


In [13]:
# create a dataframe from dictionary with columns/series chapter, out-degree, eigen, betweenness
df2 = pd.DataFrame(data)

In [14]:
chapter = sorted(df.source.unique().tolist())


In [39]:
def view(chapter=""):
    row = [n for n in G[chapter]]
    df3 = pd.DataFrame(row)
    df3 = df3.set_index(0)
    df4 = df3.join(df2.set_index("chapter"))
    df4 = df4.sort_values(by=["eigen"], ascending=False)
    df5 = df4.join(df.set_index("source"))
    df6 = df5[["chapter", "book"]]
    df6 = df6.dropna(subset=["book","chapter"])
    df6 = df6.drop_duplicates()
    df6 = df6.reset_index(drop=True)
    query = (df6["chapter"].str.strip() + " " + df6["book"].str.strip()) != chapter
    df6 = df6[query]
    return df6

## Chapter recommendation

Please select a chapter. The table contains the recommendation which chapter from which book you should read next sorted by eigenvector centrality

In [40]:
w = Select(options=chapter)
v = interact(view,chapter=w)
display(v)


interactive(children=(Select(description='chapter', options=(' madden', ' preukschat', ' serrano', '1-so-what-…

<function __main__.view(chapter='')>