In [1]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
import glob
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
import matplotlib.pyplot as plt

from bokeh.io import output_notebook, show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine, EdgesAndLinkedNodes, NodesAndLinkedEdges, LabelSet
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
from bokeh.palettes import Blues8, Reds8, Purples8, Oranges8, Viridis8, Spectral8, Turbo256, Viridis256
from bokeh.transform import linear_cmap
from networkx.algorithms import community

from functools import reduce

In [2]:
# Specify source folder
src_data_folder = r"C:\Users\TAYCO\Documents\Github\sparta-butuan-incubation-workshop\Network_Analysis\01_Data"
bm_extracts = glob.glob(join(src_data_folder,"*.csv"))

# Import all CSVs into a dataframe
df_from_each_file = (pd.read_csv(f) for f in bm_extracts)
extract = pd.concat(df_from_each_file, ignore_index=True, sort=True)

In [3]:
#--- Drop duplicates
extract.drop_duplicates(subset="URL",keep="first",inplace=True)

#--- Drop rows with missing shared link
extract = extract[~extract["Link"].isnull()]

#--- Create new fields for Name and Shared Link
extract["Name"] = extract[["Page Name","Group Name","Subreddit"]].bfill(axis=1).iloc[:,0]
extract["Shared Link"] = extract[["Final Link","Link"]].bfill(axis=1).iloc[:,0]
extract["Weight"] = extract['Total Interactions'].apply(lambda i: str(i).replace(",",""))
extract["Weight"] = extract['Weight'].astype('float')


In [20]:
from pyvis.network import Network
import pandas as pd

url_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='white')

# set the physics layout of the network
url_net.force_atlas_2based()

sources = extract['Name']
targets = extract['Shared Link']
weights = extract["Weight"]

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    url_net.add_node(src, src, title=src, color='blue')
    url_net.add_node(dst, dst, title=dst, size=w, color='red')
    url_net.add_edge(src, dst, value=w, color='white')

neighbor_map = url_net.get_adj_list()

# add neighbor data to node hover data
for node in url_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

url_net.show_buttons(filter_=['physics'])
url_net.show('C:/Users/TAYCO/Documents/Github/sparta-butuan-incubation-workshop/Network_Analysis/03_Outputs/url_sharing.html')