In [45]:
import pandas as pd

# Reading all the datasets
df1 = pd.read_csv('degrees/small/movies.csv')
df2 = pd.read_csv('degrees/small/people.csv')
df3 = pd.read_csv('degrees/small/stars.csv')

# Renaming the columns
df1.rename(columns = {'id':'movie_id'}, inplace = True)
df2.rename(columns = {'id':'person_id'}, inplace = True)

# Merging the datasets
df = df3.merge(df1, on='movie_id', how='left').merge(df2, on='person_id', how='left')

#Dropping unnecessary columns
df = df.drop(['birth', 'person_id', 'movie_id'], axis = 1)

#Sorting the columns
df = df.sort_index(axis=1)

#Saving the dataset
df.to_csv('small.csv', index=False)

In [46]:
import csv

# Load the data from the CSV file into a dictionary
def load_data(filename):
    data = {}
    with open(filename, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            actor = row[0]
            movie = row[1]
            if actor not in data:
                data[actor] = set()
            data[actor].add(movie)
            if movie not in data:
                data[movie] = set()
            data[movie].add(actor)
    return data

# Find the shortest path between two actors using depth first search

def dfs(data, start, end, path=None):

    if path is None:
        path = [start]
    if start == end:
        return path
    for actor in data[start]:

        if actor not in path:


            new_path = dfs(data, actor, end, path + [actor])
            if new_path is not None:
                return new_path
    return None

# Load the data from the CSV file
data = load_data('small.csv')

# Find the shortest path between Kevin Bacon and Tom Hanks
path = dfs(data, 'Robin Wright', 'Bill Paxton')

# Print the shortest path
if path is None:
    print("No path found")
else:
    print(" -> ".join(path))
    print(f"Degree of Separation->{len(path)-1}")

Robin Wright -> Forrest Gump -> Gary Sinise -> Apollo 13 -> Bill Paxton
Degree of Separation->4


In [47]:
import csv
import sys

from queue import Queue

# Function to load data from the CSV file
def load_data(filename):
    data = {}
    with open(filename, "r") as file:
        reader = csv.reader(file)
        next(reader)
        for row in reader:
            if row[0] not in data:
                data[row[0]] = set()
            data[row[0]].add(row[1])
            if row[1] not in data:
                data[row[1]] = set()
            data[row[1]].add(row[0])
    return data

# Function to find the shortest path between two actors using BFS
def bfs(start, end, data):
    visited = set()
    queue = Queue()
    queue.put((start, 0))
    while not queue.empty():
        actor, degree = queue.get()
        if actor == end:
            return degree
        visited.add(actor)
        for neighbor in data[actor]:
            if neighbor not in visited:
                queue.put((neighbor, degree + 1))
    return None

# Main function
def main():

    data = load_data('small.csv')
    start = "Bill Paxton"
    end = "Robin Wright"
    degree = bfs(start, end, data)
    print()
    if degree is None:
        print("No connection found")
    else:
        print(f"{start} and {end}: Degree of Separation = {degree} ")

if __name__ == "__main__":
    main()


Bill Paxton and Robin Wright: Degree of Separation = 4 


In [48]:
import pandas as pd

# Reading all the datasets
df1 = pd.read_csv('degrees/large/movies.csv')
df2 = pd.read_csv('degrees/large/people.csv')
df3 = pd.read_csv('degrees/large/stars.csv')

# Renaming the columns
df1.rename(columns = {'id':'movie_id'}, inplace = True)
df2.rename(columns = {'id':'person_id'}, inplace = True)

# Merging the datasets
df = df3.merge(df1, on='movie_id', how='left').merge(df2, on='person_id', how='left')

#Dropping unnecessary columns
df = df.drop(['birth', 'person_id', 'movie_id'], axis = 1)

#Sorting the columns
df = df.sort_index(axis=1)

#Saving the dataset
df.to_csv('large.csv', index=False)

In [49]:
import csv


def load_data(filename):
    data = {}
    with open(filename, "r") as file:
        reader = csv.reader(file)
        for row in reader:
            actor = row[0]
            movie = row[1]
            if actor not in data:
                data[actor] = set()
            data[actor].add(movie)
            if movie not in data:
                data[movie] = set()
            data[movie].add(actor)
    return data


def dfs(data, start, end, path=None):
    if path is None:
        path = [start]
    if start == end:
        return path
    for actor in data[start]:
        if actor not in path:
            new_path = dfs(data, actor, end, path + [actor])
            if new_path is not None:
                return new_path
    return None


data = load_data("large.csv")
path = dfs(data, "Kevin Bacon", "Tom Hanks")

if path is None:
    print("No path found")
else:
    print(" -> ".join(path))
    print(f"Degree of Separation->{len(path)-1}")

Degree of Separation->29698


In [43]:
import csv

from queue import Queue


def load_data(filename):
    data = {}
    with open(filename, "r") as file:
        reader = csv.reader(file)
        next(reader)
        for row in reader:
            if row[0] not in data:
                data[row[0]] = set()
            data[row[0]].add(row[1])
            if row[1] not in data:
                data[row[1]] = set()
            data[row[1]].add(row[0])
    return data


def bfs(start, end, data):
    visited = set()
    queue = Queue()
    queue.put((start, 0))
    while not queue.empty():
        actor, degree = queue.get()
        if actor == end:
            return degree
        visited.add(actor)
        for neighbor in data[actor]:
            if neighbor not in visited:
                queue.put((neighbor, degree + 1))
    return None


def main():
    data = load_data("large.csv")
    start = "Kevin Bacon"
    end = "Tom Hanks"
    degree = bfs(start, end, data)
    print()
    if degree is None:
        print("No connection found")
    else:
        print(f"{start} and {end}: Degree of Separation = {degree} ")


main()


Kevin Bacon and Tom Hanks: Degree of Separation = 2 
