# Kickstarter Network Visualizations

This notebook builds three complementary network views of Kickstarter project behavior using `Kickstarter Projects_639378_Aug 2025 Updated.csv`.

## What this notebook covers
1. **Category similarity network** (success rate + funding profile similarity).
2. **Country-category bipartite network** (specialization by country).
3. **Creator movement network** (category pairs creators most often attempt together).


In [None]:
from itertools import combinations

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns

# Global display style for consistent charts.
sns.set_theme(style="whitegrid", context="talk")
plt.rcParams["figure.facecolor"] = "white"
plt.rcParams["axes.facecolor"] = "white"

DATA_PATH = "Kickstarter Projects_639378_Aug 2025 Updated.csv"
SUCCESS_LABEL = "successful"
MIN_COUNTRY_CATEGORY_PROJECTS = 100
MIN_CREATOR_PAIR_COUNT = 100
TOP_CREATOR_EDGES = 30


## 1) Data Loading and Feature Engineering

We standardize category labels, derive a blurb-length feature, and compute category-level metrics reused across all three visualizations.


In [None]:
# Load data
df = pd.read_csv(DATA_PATH)

# Clean category naming and add a basic text-length feature.
df["category_parent_name"] = df["category_parent_name"].str.title()
df["Blurb_Length"] = df["blurb"].fillna("").str.len()

# Shared per-category metrics.
category_metrics = (
    df.groupby("category_parent_name")
    .agg(
        total_projects=("id", "count"),
        success_rate=("state", lambda s: (s == SUCCESS_LABEL).mean() * 100),
        avg_funding=("usd_pledged", "mean"),
        avg_blurb_length=("Blurb_Length", "mean"),
    )
    .reset_index()
    .rename(columns={"category_parent_name": "category"})
    .sort_values("total_projects", ascending=False)
)

print(f"Loaded {len(df):,} projects across {category_metrics.shape[0]} parent categories.")
print(category_metrics.head(10))


## 2) Graph 1: Category Similarity Network

Categories are connected only when their funding and success profiles are sufficiently similar (`similarity > 0.5`).


In [None]:
def calculate_similarity(row1: pd.Series, row2: pd.Series) -> float:
    # Simple similarity score from success rate and average funding.
    success_diff = abs(row1["success_rate"] - row2["success_rate"]) / 100
    max_funding = max(row1["avg_funding"], row2["avg_funding"], 1)
    funding_diff = abs(row1["avg_funding"] - row2["avg_funding"]) / max_funding
    similarity = 1 - (success_diff + funding_diff) / 2
    return similarity if similarity > 0.5 else 0

# Build weighted edges between similar categories.
edges = []
for i, cat1 in category_metrics.iterrows():
    for j, cat2 in category_metrics.iterrows():
        if i >= j:
            continue
        sim = calculate_similarity(cat1, cat2)
        if sim > 0:
            edges.append(
                {
                    "source": cat1["category"],
                    "target": cat2["category"],
                    "weight": sim,
                }
            )

edges_df = pd.DataFrame(edges).sort_values("weight", ascending=False)

G = nx.Graph()
for _, row in category_metrics.iterrows():
    G.add_node(
        row["category"],
        size=row["total_projects"],
        success_rate=row["success_rate"],
        avg_funding=row["avg_funding"],
    )

for _, edge in edges_df.iterrows():
    G.add_edge(edge["source"], edge["target"], weight=edge["weight"])

print(f"Graph 1: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
print(edges_df.head(10))


In [None]:
plt.figure(figsize=(16, 12))
pos = nx.spring_layout(G, k=2, iterations=50, seed=42)

node_sizes = [G.nodes[node]["size"] / 50 for node in G.nodes()]
node_colors = [G.nodes[node]["success_rate"] for node in G.nodes()]
edge_widths = [G[u][v]["weight"] * 5 for u, v in G.edges()]

nx.draw_networkx_nodes(
    G,
    pos,
    node_size=node_sizes,
    node_color=node_colors,
    cmap="RdYlGn",
    vmin=0,
    vmax=100,
    alpha=0.9,
    edgecolors="black",
    linewidths=1.5,
)

nx.draw_networkx_edges(G, pos, width=edge_widths, alpha=0.4, edge_color="gray")
nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold")

sm = plt.cm.ScalarMappable(cmap="RdYlGn", norm=plt.Normalize(vmin=0, vmax=100))
sm.set_array([])
cbar = plt.colorbar(sm, ax=plt.gca(), shrink=0.8)
cbar.set_label("Success Rate (%)", fontsize=12)

plt.title(
    "Category Similarity Network\n"
    "Node size = total projects | Node color = success rate | Edge thickness = similarity",
    fontsize=14,
    fontweight="bold",
    pad=20,
)
plt.axis("off")
plt.tight_layout()
plt.savefig("category_network.png", dpi=300, bbox_inches="tight", facecolor="white")
plt.show()


## 3) Graph 2: Country-to-Category Specialization

This bipartite view links high-volume countries to high-volume categories. Edge thickness reflects total funding, and edge color reflects success rate.


In [None]:
# Focus on larger groups for readability.
top_countries = df["country"].value_counts().head(10).index.tolist()
top_categories = category_metrics.nlargest(7, "total_projects")["category"].tolist()

df_filtered = df[
    (df["country"].isin(top_countries))
    & (df["category_parent_name"].isin(top_categories))
]

country_category = (
    df_filtered.groupby(["country", "category_parent_name"])
    .agg(
        num_projects=("id", "count"),
        total_funding=("usd_pledged", "sum"),
        success_rate=("state", lambda s: (s == SUCCESS_LABEL).mean() * 100),
    )
    .reset_index()
    .rename(columns={"category_parent_name": "category"})
)

country_category = country_category[
    country_category["num_projects"] >= MIN_COUNTRY_CATEGORY_PROJECTS
].copy()

B = nx.Graph()
for country in country_category["country"].unique():
    B.add_node(country, bipartite=0, node_type="country")
for category in country_category["category"].unique():
    B.add_node(category, bipartite=1, node_type="category")

for _, row in country_category.iterrows():
    B.add_edge(
        row["country"],
        row["category"],
        weight=row["total_funding"],
        num_projects=row["num_projects"],
        success_rate=row["success_rate"],
    )

countries = sorted([n for n, d in B.nodes(data=True) if d["node_type"] == "country"])
categories = sorted([n for n, d in B.nodes(data=True) if d["node_type"] == "category"])

print(
    f"Graph 2: {B.number_of_nodes()} nodes, {B.number_of_edges()} edges "
    f"({len(countries)} countries, {len(categories)} categories)"
)
print(country_category.head(10))


In [None]:
plt.figure(figsize=(18, 12))
pos = {}

country_spacing = 10 / (len(countries) - 1) if len(countries) > 1 else 0
for i, country in enumerate(countries):
    pos[country] = (0, i * country_spacing)

category_spacing = 10 / (len(categories) - 1) if len(categories) > 1 else 0
for i, category in enumerate(categories):
    pos[category] = (5, i * category_spacing)

country_projects = [sum(B[country][cat]["num_projects"] for cat in B.neighbors(country)) for country in countries]
category_projects = [sum(B[country][cat]["num_projects"] for country in B.neighbors(cat)) for cat in categories]

country_sizes = [np.sqrt(p) * 15 for p in country_projects]
category_sizes = [np.sqrt(p) * 20 for p in category_projects]

nx.draw_networkx_nodes(
    B,
    pos,
    nodelist=countries,
    node_size=country_sizes,
    node_color="#4A90E2",
    alpha=0.9,
    edgecolors="black",
    linewidths=2,
)

nx.draw_networkx_nodes(
    B,
    pos,
    nodelist=categories,
    node_size=category_sizes,
    node_color="#FF8C42",
    alpha=0.9,
    edgecolors="black",
    linewidths=2,
)

if B.number_of_edges() > 0:
    max_funding = max(B[u][v]["weight"] for u, v in B.edges())
    edge_widths = [(B[u][v]["weight"] / max_funding) * 6 + 0.5 for u, v in B.edges()]
    edge_colors = [B[u][v]["success_rate"] for u, v in B.edges()]

    nx.draw_networkx_edges(
        B,
        pos,
        width=edge_widths,
        alpha=0.6,
        edge_color=edge_colors,
        edge_cmap=plt.cm.viridis,
        edge_vmin=0,
        edge_vmax=100,
    )

country_labels = {c: c for c in countries}
category_labels = {c: c for c in categories}

nx.draw_networkx_labels(
    B,
    {k: (v[0] - 0.5, v[1]) for k, v in pos.items() if k in countries},
    labels=country_labels,
    font_size=11,
    font_weight="bold",
    horizontalalignment="right",
)

nx.draw_networkx_labels(
    B,
    {k: (v[0] + 0.5, v[1]) for k, v in pos.items() if k in categories},
    labels=category_labels,
    font_size=11,
    font_weight="bold",
    horizontalalignment="left",
)

sm = plt.cm.ScalarMappable(cmap=plt.cm.viridis, norm=plt.Normalize(vmin=0, vmax=100))
sm.set_array([])
cbar = plt.colorbar(sm, ax=plt.gca(), fraction=0.02, pad=0.02)
cbar.set_label("Success Rate (%)", fontsize=13, weight="bold")

plt.text(-1, 11, "COUNTRIES", fontsize=14, weight="bold", color="#4A90E2")
plt.text(5, 11, "CATEGORIES", fontsize=14, weight="bold", color="#FF8C42")

plt.title(
    "Country-to-Category Specialization Network\n"
    "Node size = project volume | Edge thickness = total funding | Edge color = success rate",
    fontsize=15,
    fontweight="bold",
    pad=25,
)
plt.xlim(-2, 7)
plt.ylim(-1, 12)
plt.axis("off")
plt.tight_layout()
plt.savefig("country_category_bipartite_clean.png", dpi=300, bbox_inches="tight", facecolor="white")
plt.show()


## 4) Graph 3: Creator Movement Across Categories

Each edge counts creators who launched in both categories. We keep only stronger links for readability.


In [None]:
creator_categories = (
    df.groupby("creator_id")["category_parent_name"]
    .apply(lambda x: sorted(set(x)))
    .reset_index()
)

multi_category_creators = creator_categories[
    creator_categories["category_parent_name"].apply(len) >= 2
]

edge_counts = {}
for creator_cats in multi_category_creators["category_parent_name"]:
    for cat1, cat2 in combinations(creator_cats, 2):
        pair = tuple(sorted((cat1, cat2)))
        edge_counts[pair] = edge_counts.get(pair, 0) + 1

creator_edges = pd.DataFrame(
    [
        {"source": pair[0], "target": pair[1], "weight": count}
        for pair, count in edge_counts.items()
    ],
    columns=["source", "target", "weight"],
)
if not creator_edges.empty:
    creator_edges = creator_edges.sort_values("weight", ascending=False)

G_creators = nx.Graph()
for category in category_metrics["category"].unique():
    G_creators.add_node(category)

strong_creator_edges = creator_edges[
    creator_edges["weight"] >= MIN_CREATOR_PAIR_COUNT
]
for _, edge in strong_creator_edges.iterrows():
    G_creators.add_edge(edge["source"], edge["target"], weight=edge["weight"])

print(
    f"Graph 3: {G_creators.number_of_nodes()} nodes, {G_creators.number_of_edges()} edges "
    f"(from {len(multi_category_creators):,} multi-category creators)"
)
print(creator_edges.head(10))


In [None]:
plt.figure(figsize=(18, 16))
pos = nx.circular_layout(G_creators)

node_sizes = []
node_colors = []
for node in G_creators.nodes():
    metrics_row = category_metrics[category_metrics["category"] == node].iloc[0]
    node_sizes.append(metrics_row["total_projects"] / 30)
    node_colors.append(metrics_row["success_rate"])

top_edges = strong_creator_edges.nlargest(TOP_CREATOR_EDGES, "weight")
edges_to_draw = [(row["source"], row["target"]) for _, row in top_edges.iterrows()]
edge_widths_dict = {
    (row["source"], row["target"]): row["weight"] for _, row in top_edges.iterrows()
}

nx.draw_networkx_nodes(
    G_creators,
    pos,
    node_size=node_sizes,
    node_color=node_colors,
    cmap=plt.cm.viridis,
    vmin=0,
    vmax=100,
    alpha=0.95,
    edgecolors="black",
    linewidths=3,
)

if edges_to_draw:
    max_weight = top_edges["weight"].max()
    edge_widths = [
        (edge_widths_dict.get((u, v), edge_widths_dict.get((v, u), 0)) / max_weight) * 12 + 1
        for u, v in edges_to_draw
    ]
    edge_colors = [
        edge_widths_dict.get((u, v), edge_widths_dict.get((v, u), 0))
        for u, v in edges_to_draw
    ]
    nx.draw_networkx_edges(
        G_creators,
        pos,
        edgelist=edges_to_draw,
        width=edge_widths,
        alpha=0.6,
        edge_color=edge_colors,
        edge_cmap=plt.cm.viridis,
        edge_vmin=min(edge_colors),
        edge_vmax=max(edge_colors),
    )

label_pos = {node: (x * 1.22, y * 1.22) for node, (x, y) in pos.items()}
for node, (x, y) in label_pos.items():
    plt.text(
        x,
        y,
        node,
        fontsize=13,
        fontweight="bold",
        ha="center",
        va="center",
        bbox=dict(
            boxstyle="round,pad=0.4",
            facecolor="white",
            edgecolor="black",
            linewidth=2,
            alpha=0.95,
        ),
    )

sm = plt.cm.ScalarMappable(cmap=plt.cm.viridis, norm=plt.Normalize(vmin=0, vmax=100))
sm.set_array([])
cbar = plt.colorbar(sm, ax=plt.gca(), fraction=0.02, pad=0.02)
cbar.set_label("Success Rate (%)", fontsize=14, weight="bold")

plt.title(
    "Top Creator Movement Patterns Between Categories\n"
    "Node size = projects | Node color = success rate | Edge thickness = creators who tried both\n"
    f"Showing top {TOP_CREATOR_EDGES} strongest connections",
    fontsize=16,
    fontweight="bold",
    pad=30,
)
plt.axis("off")
plt.xlim(-1.6, 1.6)
plt.ylim(-1.6, 1.6)
plt.tight_layout()
plt.savefig("creator_movement_network.png", dpi=300, bbox_inches="tight", facecolor="white")
plt.show()
