In [7]:
import pandas as pd
import csv
from queue import Queue
import sys

sys.setrecursionlimit(10000)

In [4]:
df1 = pd.read_csv('degrees/large/movies.csv')
df2 = pd.read_csv('degrees/large/people.csv')
df3 = pd.read_csv('degrees/large/stars.csv')

df1.rename(columns = {'id':'movie_id'}, inplace = True)
df2.rename(columns = {'id':'person_id'}, inplace = True)

df = df3.merge(df1, on='movie_id', how='left').merge(df2, on='person_id', how='left')

df = df.drop(['birth', 'person_id', 'movie_id'], axis = 1)
df = df.sort_index(axis=1)

df.to_csv('large.csv', index=False)

## 1a) DFS

In [10]:
def load_data(filename):
    data = {}
    with open(filename, "r") as file:
        reader = csv.reader(file)
        for row in reader:
            actor = row[0]
            movie = row[1]
            if actor not in data:
                data[actor] = set()
            data[actor].add(movie)
            if movie not in data:
                data[movie] = set()
            data[movie].add(actor)
    return data


def dfs(data, start, end, path=None):
    if path is None:
        path = [start]
    if start == end:
        return path
    for actor in data[start]:
        if actor not in path:
            new_path = dfs(data, actor, end, path + [actor])
            if new_path is not None:
                return new_path
    return None

def dfs(data, start, end, path=None):
    if path is None:
        path = [start]
    if start == end:
        return path
    for actor in data[start]:
        if actor not in path:
            new = dfs(data, actor, end, path + [actor])
            if new is not None:
                return new
    return None
def bfs(data, start, end):
    q = Queue()
    visited = set()
    q.put((start, 0))

    while not q.empty():
        actor, degree = q.get()
        if actor == end:
            return degree
        visited.add(actor)
        for neighbor in data[actor]:
            if neighbor not in visited:
                q.put((neighbor, degree + 1))
    return None

data = load_data("large.csv")
path = dfs(data, "Kevin Bacon", "Tom Hanks")

if path is None:
    print("No path found")
else:
    print(" -> ".join(path))
    print(f"Degree of Separation: {len(path)-1}")

RecursionError: maximum recursion depth exceeded in comparison

## 1b) BFS


In [9]:
def load_data(filename):
    data = {}
    with open(filename, "r") as file:
        reader = csv.reader(file)
        next(reader)
        for row in reader:
            if row[0] not in data:
                data[row[0]] = set()
            data[row[0]].add(row[1])
            if row[1] not in data:
                data[row[1]] = set()
            data[row[1]].add(row[0])
    return data


def bfs(start, end, data):
    visited = set()
    queue = Queue()
    queue.put((start, 0))
    while not queue.empty():
        actor, degree = queue.get()
        if actor == end:
            return degree
        visited.add(actor)
        for neighbor in data[actor]:
            if neighbor not in visited:
                queue.put((neighbor, degree + 1))
    return None


def main():
    data = load_data("large.csv")
    start = "Tom Cruise"
    end = "Tom Hanks"
    degree = bfs(start, end, data)
    print()
    if degree is None:
        print("No connection found")
    else:
        print(f"{start} and {end}: Degree of Separation = {degree} ")


main()


Tom Cruise and Tom Hanks: Degree of Separation = 4 
