In [8]:
import pandas as pd
import json
from itertools import combinations
from collections import defaultdict

# Function to clean and split investor names
def clean_and_split(investors_str):
    return [investor.strip() for investor in investors_str.split('+')] if pd.notnull(investors_str) else []

# Load the data
df = pd.read_csv("~/raises.csv")

# Keeping track of co-investments between VC funds and total investments
co_investments = defaultdict(int)
total_investments = defaultdict(lambda: defaultdict(int))  # Structure: {VC1: {VC2: deals, VC3: deals}, VC2: {...}}

# Process the dataframe
for _, row in df.iterrows():
    investors = clean_and_split(row['Lead Investor']) + clean_and_split(row['Other Investors'])
    for investor_a, investor_b in combinations(set(investors), 2):
        co_investments[(investor_a, investor_b)] += 1
        total_investments[investor_a][investor_b] += 1
        total_investments[investor_b][investor_a] += 1

# Preparing nodes and links for JSON
nodes = [{"id": investor, "group": 1} for investor in set(total_investments.keys())]
links = [{"source": a, "target": b, "value": deals} for (a, b), deals in co_investments.items()]

# Export to JSON
graph_data = {"nodes": nodes, "links": links}
with open("vc_investments_graph.json", "w") as f:
    json.dump(graph_data, f, indent=4)