In [46]:
# 📦 IMPORTS
import pandas as pd
import networkx as nx
from networkx.algorithms import bipartite
from ipysigma import Sigma
import kagglehub
from kagglehub import KaggleDatasetAdapter
import ipywidgets as widgets
from IPython.display import display


In [47]:
# 📥 LOAD DATA
df = kagglehub.load_dataset(
    KaggleDatasetAdapter.PANDAS,
    "hinanng/washington-d-c-airbnb-reviews",
    "reviews (3).csv"
)
df["reviewer_id"] = df["reviewer_id"].astype(str)
df["listing_id"] = df["listing_id"].astype(str)
df["date"] = pd.to_datetime(df["date"])
df = df[df["date"] >= "2022-01-01"]

  df = kagglehub.load_dataset(


In [77]:
# 🖛 SLIDERS FOR FILTERING
min_reviews_slider = widgets.IntSlider(value=3, min=1, max=10, description="Min Reviews")
max_reviewers_slider = widgets.IntSlider(value=100, min=10, max=10000, description="Max Reviewers")
max_listings_slider = widgets.IntSlider(value=50, min=10, max=1000, description="Max Listings")

display(min_reviews_slider, max_reviewers_slider, max_listings_slider)


IntSlider(value=3, description='Min Reviews', max=10, min=1)

IntSlider(value=100, description='Max Reviewers', max=10000, min=10)

IntSlider(value=50, description='Max Listings', max=1000, min=10)

In [85]:
# 🧹 APPLY FILTERS
min_reviews = min_reviews_slider.value
max_reviewers = max_reviewers_slider.value
max_listings = max_listings_slider.value

In [86]:
# Filter guests independently
guest_counts = df.groupby("reviewer_id")["listing_id"].nunique()
top_guests = guest_counts[guest_counts >= min_reviews].sort_values(ascending=False).head(max_reviewers).index


In [87]:
# Filter listings independently
listing_counts = df.groupby("listing_id")["reviewer_id"].nunique()
top_listings = listing_counts[listing_counts >= min_reviews].sort_values(ascending=False).head(max_listings).index


In [88]:
# Now filter the dataframe where reviewer OR listing is in our top sets
df_filtered = df[(df["reviewer_id"].isin(top_guests)) & (df["listing_id"].isin(top_listings))]

In [89]:
# 🧱 BUILD BIPARTITE GRAPH
B = nx.Graph()
B.add_nodes_from(df_filtered["reviewer_id"], bipartite=0)
B.add_nodes_from(df_filtered["listing_id"], bipartite=1)
B.add_edges_from(zip(df_filtered["reviewer_id"], df_filtered["listing_id"]))

In [90]:
# 🎨 NODE ATTRIBUTES
guest_nodes = [n for n, d in B.nodes(data=True) if d.get("bipartite") == 0]
listing_nodes = [n for n, d in B.nodes(data=True) if d.get("bipartite") == 1]

for i, n in enumerate(guest_nodes):
    B.nodes[n]["x"] = -1
    B.nodes[n]["y"] = i
    B.nodes[n]["color"] = "green"
    B.nodes[n]["size"] = B.degree[n] * 2
    B.nodes[n]["title"] = f"Guest | Reviews: {B.degree[n]}"

for i, n in enumerate(listing_nodes):
    B.nodes[n]["x"] = 1
    B.nodes[n]["y"] = i
    B.nodes[n]["color"] = "blue"
    B.nodes[n]["size"] = B.degree[n] * 2
    B.nodes[n]["title"] = f"Listing | Reviews received: {B.degree[n]}"


In [91]:
# 📈 INTERACTIVE NETWORK
Sigma(
    B,
    node_color="color",
    node_size="size",
    hide_edges_on_move=True
)


Sigma(nx.Graph with 1,887 nodes and 3,202 edges)