In [1]:
import csv
import sys

from util import Node, StackFrontier, QueueFrontier

# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}


In [2]:

def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass



In [9]:

def main():
    #if len(sys.argv) > 2:
    #    sys.exit("Usage: python degrees.py [directory]")
    #directory = sys.argv[1] if len(sys.argv) == 2 else "large"
    directory = "small"

    # Load data from files into memory
    print("Loading data...")
    load_data(directory)
    print("Data loaded.")

    source = person_id_for_name(input("Name: "))
    if source is None:
        sys.exit("Person not found.")
    target = person_id_for_name(input("Name: "))
    if target is None:
        sys.exit("Person not found.")

    path = shortest_path(source, target)

    if path is None:
        print("Not connected.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        path = [(None, source)] + path
        for i in range(degrees):
            person1 = people[path[i][1]]["name"]
            person2 = people[path[i + 1][1]]["name"]
            movie = movies[path[i + 1][0]]["title"]
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")



In [5]:

def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]


In [6]:

def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors


In [133]:

def check_target_in_cast(target, movie):
    print (f"Checking if {people[target]['name']} is part of the cast of {movies[movie]['title']} ({movie})")
    if target in movies[movie]["stars"]:
        return True
    return False

def shortest_path(source, target):
    """
    Returns the shortest list of (movie_id, person_id) pairs
    that connect the source to the target.

    If no possible path, returns None.
    """
    
    """
    # BEST PERFORMANCE FOR NON-AI ALGORITHM:
    print (f"{people[source]['name']} ({source})")
    print (f"{people[target]['name']} ({target})")
    
    # find pairs of movies an actor performed in:
    neighbors_of_source = neighbors_for_person(source)
    print (f"Neighbors of {source}: {neighbors_of_source}")

    cycle_count    = 1
    link_found     = False
    checked_movies = []
    checked_actors = []
    path_followed  = []
    actors_processed=[]
    last_actor     = 0
    
    for movie, actor in neighbors_of_source:   
        if actor in checked_actors: continue
        print (f"Processing actor {people[actor]['name']} ({actor})")
        checked_actors.append(actor)
        for a_movie in people[actor]["movies"]:
            if a_movie in checked_movies: continue
            checked_movies.append(a_movie)
            if actor not in actors_processed: 
                path_followed.append((a_movie, actor))
                actors_processed.append(actor)
            if check_target_in_cast(target, a_movie):
                path_followed.append((a_movie, target))
                link_found = True
                break
            cycle_count +=1
        if link_found: break

    if link_found:
        print (f"Link between {source} and {target} found on movie {a_movie} after {cycle_count} cycles")
    else:
        print (f"It took me {cycle_count} cycles to find that {source} and {target} are not connected")
    print (path_followed)
    return path_followed

    # TODO
    #raise NotImplementedError
    """


In [136]:

#if __name__ == "__main__":
#    main()

main()
    

Loading data...
Data loaded.


Name:  tom cruise
Name:  gary sinise


Tom Cruise (129)
Gary Sinise (641)
Neighbors of 129: {('104257', '129'), ('95953', '163'), ('95953', '596520'), ('104257', '193'), ('95953', '129'), ('95953', '420'), ('104257', '102'), ('104257', '197')}
Processing actor Tom Cruise (129)
Checking if Gary Sinise is part of the cast of A Few Good Men (104257)
Checking if Gary Sinise is part of the cast of Rain Man (95953)
Processing actor Dustin Hoffman (163)
Processing actor Gerald R. Molen (596520)
Processing actor Demi Moore (193)
Processing actor Valeria Golino (420)
Processing actor Kevin Bacon (102)
Checking if Gary Sinise is part of the cast of Apollo 13 (112384)
Link between 129 and 641 found on movie 112384 after 3 cycles
[('104257', '129'), ('112384', '102'), ('112384', '641')]
3 degrees of separation.
1: Tom Cruise and Tom Cruise starred in A Few Good Men
2: Tom Cruise and Kevin Bacon starred in Apollo 13
3: Kevin Bacon and Gary Sinise starred in Apollo 13
