In [None]:
import pandas as pd
import networkx as nx
from tqdm import tqdm
import matplotlib.pyplot as plt

def load_data():
    title_basics = pd.read_csv(
        "/content/drive/MyDrive/title.basics.tsv",
        sep="\t",
        na_values="\\N",
        low_memory=False,
        usecols=["tconst", "titleType", "startYear", "isAdult"]
    )

    title_crew = pd.read_csv(
        "/content/drive/MyDrive/title.crew.tsv",
        sep="\t",
        na_values="\\N",
        low_memory=False,
        usecols=["tconst", "directors", "writers"]
    )

    title_principals = pd.read_csv(
        "/content/drive/MyDrive/title.principals.tsv",
        sep="\t",
        na_values="\\N",
        low_memory=False,
        usecols=["tconst", "nconst"]
    )

    return title_basics, title_crew, title_principals

B = nx.Graph()
def build_bipartite_graph(movies, crew_edges,principal_edges):
    """Construct a bipartite graph from movies, crew, and principal data."""

    # Add movie nodes
    B.add_nodes_from(movies["tconst"], bipartite=0)

    # Add crew nodes and edges
    tqdm.pandas(desc="Processing Crew Data")
    crew_edges.progress_apply(lambda row: add_crew_edges(B, row), axis=1)

    # Add principal nodes and edges
    tqdm.pandas(desc="Processing Principal Data")
    principal_edges.progress_apply(lambda row: add_principal_edges(B, row), axis=1)

    return B

def add_crew_edges(B, row):
    if pd.notna(row["directors"]):
        directors = row["directors"].split(",")
        for director in directors:
            B.add_node(director, bipartite=1)
            B.add_edge(row["tconst"], director)

    if pd.notna(row["writers"]):
        writers = row["writers"].split(",")
        for writer in writers:
            B.add_node(writer, bipartite=1)
            B.add_edge(row["tconst"], writer)

def add_principal_edges(B, row):
    B.add_node(row["nconst"], bipartite=1)
    B.add_edge(row["tconst"], row["nconst"])

def main():
    # Load data
    title_basics, title_crew,title_principals = load_data()

    # Filter movies
    movies = title_basics[title_basics["titleType"] == "movie"]

    # Merge crew and principals data with movies
    crew_edges = title_crew.merge(movies, on="tconst")[["tconst", "directors", "writers"]]
    principal_edges = title_principals.merge(movies, on="tconst")[["tconst", "nconst"]]

    # Build bipartite graph
    B = build_bipartite_graph(movies, crew_edges,principal_edges)

    # Separate nodes by bipartite groups
    movies_count = len({n for n, d in B.nodes(data=True) if d.get("bipartite") == 0})
    people_count = len({n for n, d in B.nodes(data=True) if d.get("bipartite") == 1})

    # Print graph statistics
    print(f"BiPartite Graph:>>>>>>>: {len(B.nodes)} Total nodes, {len(B.edges)} Edges")
    print(f"------> Movie Nodes: {movies_count}")
    print(f"------> People Nodes (Directors/Writers/Principals): {people_count}")
    

if __name__ == "__main__":
    main()
