In [1]:
import pandas as pd
from collections import defaultdict
import queue 
import time

In [2]:
class ActorNetwork:

    def __init__(self):
        """ Creates an actor network object """
        self.costars = defaultdict(set) # actor(string) to acted with (set<string>)
        self.movies  = defaultdict(set) # actor(string) to movies starred in (set<string>)
        
    def getNumActors(self): return len(self.costars)
    
    def getNumConnections(self):
        """ Find the number of edges in a given actor network """
        numEdges = 0
        for _,v in self.costars.items(): numEdges += len(v)
        return int(numEdges/2)
                
    def loadDataSet(self, pthToFile):
        """ Given file name of a dataset """
        with open(pthToFile, 'r') as file:
            for line in file:
                fullLn = line.strip().split(" ")
                movie, actors = fullLn[0],set(fullLn[1:])
                self.addMovie(movie, actors)
    
    def addMovie(self, movie, actors):
        """ Add a movie (string) and a set<actors> string to the network """
        processed = set()
        yetToProcess = actors
        while yetToProcess: # Is only true when actors is non-empty
            actor = yetToProcess.pop()
            self.costars[actor] = self.costars[actor].union(yetToProcess).union(processed)
            self.movies[actor].add(movie)
            processed.add(actor)

In [3]:
!pwd
#!ls

/Users/friana/GraphAlgo


In [4]:
!cd ../..

In [5]:
!pwd

/Users/friana/GraphAlgo


In [6]:
df = pd.read_csv("data/name.basics.tsv", sep = "\t")

FileNotFoundError: [Errno 2] File b'name.basics.tsv' does not exist: b'name.basics.tsv'

In [None]:
df.columns

In [None]:
df[["primaryName","knownForTitles"]]

In [None]:
subDf = df[:100]

In [None]:
# movieDict <movieTitle> -> <set<actors>>
movieDict = defaultdict(set)
for row,col in subDf.iterrows():
    actor = col["primaryName"]
    titles = col["knownForTitles"].split(",")
    for title in titles:
        movieDict[title].add(actor)

In [None]:
#movieDict

## Goal: Take Data Frame of Actors to Movies and Create Either a Dictionary of a Data Frame that maps movie titles to actors

In [None]:
from collections import defaultdict
import queue
class ActorNetwork:

    def __init__(self):
        """ Creates an actor network object """
        self.costars = defaultdict(set) # actor(string) to acted with (set<string>)
        self.movies  = defaultdict(set) # actor(string) to movies starred in (set<string>)
        
    def getNumActors(self): return len(self.costars)
    
    def getNumConnections(self):
        """ Find the number of edges in a given actor network """
        numEdges = 0
        for _,v in self.costars.items(): numEdges += len(v)
        return int(numEdges/2)
                
    def loadDataSet(self, pthToFile):
        """ Given file name of a dataset """
        with open(pthToFile, 'r') as file:
            for line in file:
                fullLn = line.strip().split(" ")
                movie, actors = fullLn[0],set(fullLn[1:])
                self.addMovie(movie, actors)
    
    def addMovie(self, movie, actors):
        """ Add a movie (string) and a set<actors> string to the network """
        processed = set()
        yetToProcess = actors
        while yetToProcess: # Is only true when actors is non-empty
            actor = yetToProcess.pop()
            self.costars[actor] = self.costars[actor].union(yetToProcess).union(processed)
            self.movies[actor].add(movie)
            processed.add(actor)
    def visualize_network(self):
        """ Visualize the actor network using NetworkX and Matplotlib """
        G = nx.Graph()
        
        # Add nodes (actors)
        for actor in self.costars:
            G.add_node(actor)
        
        # Add edges (costar relationships)
        for actor, costars in self.costars.items():
            for costar in costars:
                G.add_edge(actor, costar)
        
        # Create a circular layout for visualization
        pos = nx.circular_layout(G)
        
        # Draw the graph
        plt.figure(figsize=(10, 8))
        nx.draw(G, pos, with_labels=True, node_color='lightblue', 
                node_size=500, font_size=8, font_weight='bold', 
                edge_color='gray')
        
        # Add title and display
        plt.title("Actor Network Visualization")
        plt.show()

    
    def BFS(self,actor):
        Q = queue.SimpleQueue()
        Q.put(actor)
        parent={}
        visited = {}
        for act in self.costars:
            visited[act]=False
            parent[act]=None

        
        visited[actor] = True
    
        while(not Q.empty()):
            u = Q.get()
           # print(u)
            for v in self.costars[u]:
               # print(v)
               # print(self.costars[u])
                if(visited[v]==False):
                    parent[v] = u
                  #  print(parent[v])
                    Q.put(v)
                    visited[u]=True
        return parent
        
    def getShortestPath(self,actor1,actor2):
        startT = time.time()
        p = self.BFS(actor1)
        if(actor2 not in p):
            print(actor2 +" is not in the database")
        if(p[actor2]==None):
            print(actor2 + " is not in the same network")
        curract = actor2
        movie = []
        actor = [actor2]
        while(curract!=actor1):
            s= p[curract]
            actor.append(s)
            l = list(self.movies[s].intersection(self.movies[curract]))
            movie.append(l[0])
            curract = s
        endT = time.time()
        print(endT-startT)
        return actor,movie
            
            
        


In [None]:
network = ActorNetwork()
    
    # Example data: adding movies manually (replace with loadDataSet for file input)
network.addMovie("Movie1", {"Actor1", "Actor2"})
network.addMovie("Movie2", {"Actor2", "Actor4"})
network.addMovie("Movie3", {"Actor3", "Actor5"})
    
    # Visualize the network
#network.visualize_network()
network.BFS("Actor1")



In [None]:
network.getShortestPath("Actor4","Actor1")

In [None]:
ac = ActorNetwork()

In [None]:
for movie,actors in movieDict.items():
    ac.addMovie(movie,movieDict[movie])

In [None]:
ac.getNumActors()

In [None]:
def fib(n):
    current = 1
    prev = 1
    counter = 3
    if(n==1 or n==2):
        return 1
    
    while(counter<=n):
       
        prev,current = current,current+prev
        counter +=1
    return current
    
        
        


In [None]:
print(fib(17))

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
ac.visualize_network()

In [None]:
import queue 

def BFS(s):
    Q = queue.SimpleQueue()
    Q.put(s)
    parent = [None]*100
    visited = [False]*100
    visited[s] = True
    
    while(len(Q)>0):
        u = Q.get()
        for v in adjlist(u):
            if(visited[u]==False):
                pred[v] = u
                Q.put(v)
                visited[u]=True
                

In [None]:
Q=queue.Queue()
Q.put(1)
Q.put(2)
Q.put(3)
print(list(Q.queue))
Q.get()
print(list(Q.queue))

In [None]:
Q.get()
print(list(Q.queue))