In [2]:
from pathlib import Path
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from data.paths.parquet_paths import FRIENDS_VISITORS

In [3]:
df = pd.read_parquet(str(FRIENDS_VISITORS))

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1105 entries, 0 to 1104
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   business_id        1105 non-null   object
 1   business_name      1105 non-null   object
 2   user_id            1105 non-null   object
 3   user_name          1105 non-null   object
 4   friends_attendees  1105 non-null   object
dtypes: object(5)
memory usage: 43.3+ KB


In [5]:
df.sample(4)

Unnamed: 0,business_id,business_name,user_id,user_name,friends_attendees
949,rcaPajgKOJC2vo_l3xa42A,Bouchon,DtajKETYfOtSsKJCD33SOw,Belle,"[{'user_id': '0nOIH8qIMnmZdLCTemnRsw', 'user_n..."
216,WCFz9KVJTtIXSVvD5NMYoQ,Chart House,YwaKGmRNnSa3R3N4Hf9jLw,Paolo,"[{'user_id': 'lsSiIjAKVl-QRxKjRErBeg', 'user_n..."
142,SZEFE5hL7aN5nM-A44iPwQ,Prime,53bZ_EsXH71L7iFs5MP9_w,Cynthia,"[{'user_id': '8Z9ugoA4MGwiJVh-CB8bhQ', 'user_n..."
526,2JUDgnmoLDxD97gqmand9Q,Chipotle Mexican Grill,rA97zeqOrUYuEM69n0Xn4w,AMber,"[{'user_id': 'qibGLHABNReGeJr2w4_8yQ', 'user_n..."


In [6]:
df.query("business_id == '1HD5iUUfVJDbfEBIn9yVhw'")

Unnamed: 0,business_id,business_name,user_id,user_name,friends_attendees
243,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,sYtSLVYOyWavDzBU1ot5vw,Geoff,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
266,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,Mv2qM96jDdKXJNDvzANU-g,Robin,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
267,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,iDlkZO2iILS8Jwfdy7DP9A,Judy,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
406,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,9uE0smG2bwgkI95RPj0lPQ,Su-shien,"[{'user_id': 'sYtSLVYOyWavDzBU1ot5vw', 'user_n..."
409,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,DSDL_v5dvLOIJ-o5sRhz4w,Kathy,"[{'user_id': 'M9rRM6Eo5YbKLKMG5QiIPA', 'user_n..."
837,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,M9rRM6Eo5YbKLKMG5QiIPA,Aileen,"[{'user_id': 'sYtSLVYOyWavDzBU1ot5vw', 'user_n..."
983,1HD5iUUfVJDbfEBIn9yVhw,Boba Tea House,2nE0zU6y_F7gkwHi3yL6cQ,Erin,"[{'user_id': 'iDlkZO2iILS8Jwfdy7DP9A', 'user_n..."


# Networking

business_name to user_id

In [7]:
business_user_network_df = df[['business_name', 'user_id']]

In [8]:
business_user_network_graph = nx.from_pandas_edgelist(
    business_user_network_df, source='business_name',
    target='user_id', create_using=nx.Graph()
)

In [9]:
from pyvis.network import Network
from community.community_louvain import best_partition, partition_at_level

business_node_degree = dict(business_user_network_graph.degree)

#Setting up node size attribute
nx.set_node_attributes(business_user_network_graph, business_node_degree, 'business_size')

In [10]:
business_communities = best_partition(business_user_network_graph)
nx.set_node_attributes(business_user_network_graph, business_communities, 'business_group')

In [11]:
business_net = Network(width="1800px", height="1000px", bgcolor='#222222', font_color='white')

business_net.from_nx(business_user_network_graph)
business_net.show_buttons(filter_=True)
business_net.show("graph-business-users.html")

user_id to friend_id

In [None]:
user_friends_network_df = pd.DataFrame({"user_name": [], "friend_name": []})

def create_friends_attendees_network(src: str, tr: str, from_df, to_df) -> pd.DataFrame:
    for i, row in from_df.iterrows():
        source = row[src]
        targets = row[tr]
        for user in targets:
            to_df.loc[len(to_df)] = [source, user['user_name']]
    
    return to_df
    
user_friends_network_df = create_friends_attendees_network('user_name', 'friends_attendees', df, user_friends_network_df)

In [None]:
user_friends_network_df["value"] = 1
user_friends_network_df = user_friends_network_df.groupby(["user_name","friend_name"], sort=False, as_index=False).sum()

In [None]:
user_friends_network_df.head(5)

In [None]:
user_friends_network_graph = nx.from_pandas_edgelist(
    user_friends_network_df, source='user_name',
    target='friend_name', edge_attr='value', create_using=nx.Graph()
)

In [None]:
friends_node_degree = dict(user_friends_network_graph.degree)

#Setting up node size attribute
nx.set_node_attributes(user_friends_network_graph, friends_node_degree, 'friends_size')

In [None]:
friends_communities = best_partition(user_friends_network_graph)
nx.set_node_attributes(user_friends_network_graph, friends_communities, 'friends_group')

In [None]:
friends_net = Network(notebook=True, width="1800px", height="1013px", bgcolor='#222222', font_color='white')

friends_net.from_nx(user_friends_network_graph)
friends_net.show_buttons(filter_=True)
friends_net.show("graph-user-friends.html")