In [1]:
from pathlib import Path
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from data.paths.parquet_paths import FRIENDS_VISITORS

In [2]:
df = pd.read_parquet(str(FRIENDS_VISITORS))

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1105 entries, 0 to 1104
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   business_id        1105 non-null   object
 1   business_name      1105 non-null   object
 2   user_id            1105 non-null   object
 3   user_name          1105 non-null   object
 4   friends_attendees  1105 non-null   object
dtypes: object(5)
memory usage: 43.3+ KB


In [4]:
df.sample(4)

Unnamed: 0,business_id,business_name,user_id,user_name,friends_attendees
471,k1c_bC3DK6mKg797vH1T8w,Beckett's Table,VYqRk78kesHn86jPngPUXQ,Sara,"[{'user_id': 'Rs-HbyVraRCP2X4ey1t-IQ', 'user_n..."
74,buTa1akbH0pO_P10v99_eA,Honey Pig,AyjqBovADgbskmLrIBOMlQ,Steff,"[{'user_id': 'GawyLibaMGt3PCsv-hI5sA', 'user_n..."
922,bKSIcXTdr5qg83SFq_gOTA,David & Goliath,XvVkluYp5Sx3aqpo1NFM4Q,Vivian,"[{'user_id': 'iZNs250Zzjga4wSOU1AYcA', 'user_n..."
106,oi6mahpAmEuikfihFVDmCw,Biscuits,d_TBs6J3twMy9GChqUEXkg,Jennifer,"[{'user_id': 'PNx1x8x3wjTIjrjhaNiVAQ', 'user_n..."


In [5]:
df.query("business_id == '1HD5iUUfVJDbfEBIn9yVhw'")

Unnamed: 0,business_id,business_name,user_id,user_name,friends_attendees
243,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,sYtSLVYOyWavDzBU1ot5vw,Geoff,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
266,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,Mv2qM96jDdKXJNDvzANU-g,Robin,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
267,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,iDlkZO2iILS8Jwfdy7DP9A,Judy,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
406,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,9uE0smG2bwgkI95RPj0lPQ,Su-shien,"[{'user_id': 'sYtSLVYOyWavDzBU1ot5vw', 'user_n..."
409,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,DSDL_v5dvLOIJ-o5sRhz4w,Kathy,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
837,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,M9rRM6Eo5YbKLKMG5QiIPA,Aileen,"[{'user_id': 'sYtSLVYOyWavDzBU1ot5vw', 'user_n..."
983,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,2nE0zU6y_F7gkwHi3yL6cQ,Erin,"[{'user_id': 'iDlkZO2iILS8Jwfdy7DP9A', 'user_n..."


# Networking

In [6]:
from pyvis.network import Network
from community.community_louvain import best_partition, partition_at_level
import networkx as nx

business_name to user_name

In [17]:
business_user_network_df = df[['business_name', 'user_id']]

In [18]:
business_user_network_graph = nx.from_pandas_edgelist(
    business_user_network_df, source='business_name',
    target='user_id', create_using=nx.Graph()
)

In [19]:
business_node_degree = dict(business_user_network_graph.degree)

#Setting up node size attribute
nx.set_node_attributes(business_user_network_graph, business_node_degree, 'size')

In [20]:
business_communities = best_partition(business_user_network_graph)
nx.set_node_attributes(business_user_network_graph, business_communities, 'group')

In [21]:
business_net = Network(width="1800px", height="1000px", bgcolor='#222222', font_color='white')

business_net.from_nx(business_user_network_graph)
business_net.show_buttons(filter_=True)
business_net.show("graph-business-users.html")

user_id to friend_id

In [29]:
user_friends_network_df = pd.DataFrame({"user_id": [], "friend_id": []})

for i, row in df.iterrows():
    source = row['user_id']
    targets = row['friends_attendees']
    for user in targets:
        user_friends_network_df.loc[len(user_friends_network_df)] = [source, user['user_id']]  # type: ignore

In [31]:
user_friends_network_df["value"] = 1
user_friends_network_df = user_friends_network_df.groupby(["user_id","friend_id"], sort=False, as_index=False).sum()

In [32]:
user_friends_network_graph = nx.from_pandas_edgelist(
    user_friends_network_df, source='user_id',
    target='friend_id', edge_attr='value', create_using=nx.Graph()
)

In [33]:
friends_node_degree = dict(user_friends_network_graph.degree)

#Setting up node size attribute
nx.set_node_attributes(user_friends_network_graph, friends_node_degree, 'size')

In [34]:
friends_communities = best_partition(user_friends_network_graph)
nx.set_node_attributes(user_friends_network_graph, friends_communities, 'group')

In [35]:
friends_net = Network(width="1800px", height="1013px", bgcolor='#222222', font_color='white')

friends_net.from_nx(user_friends_network_graph)
friends_net.show_buttons(filter_=True)
friends_net.show("graph-user-friends.html")