In [1]:
import http.client
import json
import csv

In [2]:
class Graph:
    
    def __init__(self, with_nodes_file=None, with_edges_file=None):
        
        self.nodes = []
        self.edges = []
        if with_nodes_file and with_edges_file:
            nodes_CSV = csv.reader(open(with_nodes_file))
            nodes_CSV = list(nodes_CSV)[1:]
            self.nodes = [(n[0],n[1]) for n in nodes_CSV]

            edges_CSV = csv.reader(open(with_edges_file))
            edges_CSV = list(edges_CSV)[1:]
            self.edges = [(e[0],e[1]) for e in edges_CSV]
            
    def add_node(self, id: str, name: str)->None:
        
        if id not in [i[0] for i in self.nodes]:
             self.nodes.append((id,name))
        return self.nodes
    
    def add_edge(self, source: str, target: str)->None:
        
        if ((source,target) not in [i for i in self.edges]) \
            and ((target,source) not in [i for i in self.edges]):
            
            self.edges.append((source,target))
        
        return self.edges
    
    def total_nodes(self)->int:
        
        return len(self.nodes)
    
    def total_edges(self)->int:

        return len(self.edges)
    
    def max_degree_nodes(self)->dict:
        """
        Return the node(s) with the highest degree
        Return multiple nodes in the event of a tie
        Format is a dict where the key is the node_id and the value is an integer for the node degree
        e.g. {'a': 8}
        or {'a': 22, 'b': 22}
        """
        # make a list of src and target nodes from edges file, and concatenate them
        tot_edges = [i[0] for i in self.edges] + [i[1] for i in self.edges]
        # find the frequency of each node, and then find the nodes with max freq
        unique_edges = list(set(tot_edges))
        freq_lst = []
        for i in unique_edges:
            freq_lst.append(tot_edges.count(i))
        # indices of the max degree nodes
        max_indices = [i for i, x in enumerate(freq_lst) if x == max(freq_lst)]
        max_deg_nodes = [unique_edges[i] for i in max_indices]
        # dict of max_deg nodes and their freq
        max_freq = max(freq_lst)
        max_deg_node_dict = dict.fromkeys(max_deg_nodes, max_freq)
        return max_deg_node_dict
    
    def max_degree_nodes(self)->dict:
        ####establish L1, contiaing all IDs in from the list of edge
        L1=[]
        for n in self.edges:
            L1.append(n[0])
            L1.append(n[1])
            
        #Dict for number of ocuurances of each IDs in edges 
        D1={}
        
        for s in set(L1):
            D1[s]= L1.count(s)
        D2={}
        for i in D1.keys():
            if D1[i] == max(D1.values()):
                D2[i] = max(D1.values())
        
        return D2
   
    def print_nodes(self):

        print(self.nodes)
        
    
    def print_edges(self):

        print(self.edges)
    
      
    def write_edges_file(self, path="edges.csv")->None:
        """
        write all edges out as .csv
        :param path: string
        :return: None
        """
        edges_path = path
        edges_file = open(edges_path, 'w', encoding='utf-8')

        edges_file.write("source" + "," + "target" + "\n")

        for e in self.edges:
            edges_file.write(e[0] + "," + e[1] + "\n")

        edges_file.close()
        print("finished writing edges to csv")



    def write_nodes_file(self, path="nodes.csv")->None:
        """
        write all nodes out as .csv
        :param path: string
        :return: None
        """
        nodes_path = path
        nodes_file = open(nodes_path, 'w', encoding='utf-8')

        nodes_file.write("id,name" + "\n")
        for n in self.nodes:
            nodes_file.write(n[0] + "," + n[1] + "\n")
        nodes_file.close()
        print("finished writing nodes to csv")

In [7]:
class  TMDBAPIUtils:

    # Do not modify
    def __init__(self, api_key:str):
        self.api_key='37f0c4c547c0266b916906fb1fd959a3'
        
    def get_movie_cast(self, movie_id:str, limit:int=None, exclude_ids:list=None) -> list:

        import urllib.request
        import json
        with urllib.request.urlopen('https://api.themoviedb.org/3/movie/'
                                    + movie_id
                                    + '/credits?api_key=' + self.api_key +
                                    '&language=en-US') as response:
            json_data = response.read().decode('utf-8')
        cast_data = json.loads(json_data)['cast']

        cast_lst = []
        for i in cast_data:
            if limit != None:
                if i['order'] in range(limit):
                    cast_lst.append(i)
            else:
                cast_lst.append(i)

        final_cast = []
        for i in cast_lst:
            if exclude_ids != None:
                if i['id'] not in exclude_ids:
                    final_cast.append(i)
            else:
                final_cast.append(i)
        return final_cast
    
    
    def get_movie_credits_for_person(self, person_id:str, vote_avg_threshold:float=None)->list:
        """
        Using the TMDb API, get the movie credits for a person serving in a cast role
        documentation url: https://developers.themoviedb.org/3/people/get-person-movie-credits
        :param string person_id: the id of a person
        :param vote_avg_threshold: optional parameter to return the movie credit if it is >=
            the specified threshold.
            e.g., if the vote_avg_threshold is 5.0, then only return credits with a vote_avg >= 5.0
        :rtype: list
            return a list of dicts, one dict per movie credit with the following structure:
                [{'id': '97909' # the id of the movie credit
                'title': 'Long, Stock and Two Smoking Barrels' # the title (not original title) of the credit
                'vote_avg': 5.0 # the float value of the vote average value for the credit}, ... ]
        """
        import urllib.request
        import json
        with urllib.request.urlopen('https://api.themoviedb.org/3/person/'+
                                    person_id+'/movie_credits?api_key='+
                                    self.api_key+'&language=en-US') as response:
            json_data = response.read()
        credit_data =  json.loads(json_data.decode('utf-8'))['cast']
#         print(credit_data)
#         print("this is credit data")
        credit_lst = []
        for i in credit_data:
            try:
                if i['vote_average'] >= vote_avg_threshold:
                    credit_lst.append(i)
            except:
                print("An exception occurred for:")
                print(i)
                
        return credit_lst

In [8]:
    graph = Graph()
    graph.add_node(id='2975', name='Laurence Fishburne')

[('2975', 'Laurence Fishburne')]

In [9]:
tmdb_api_utils = TMDBAPIUtils(api_key='37f0c4c547c0266b916906fb1fd959a3')

In [13]:
A_movies = tmdb_api_utils.get_movie_credits_for_person('2975', 8.0)
A_movie_id = []

In [14]:
# exclude Fishburne, limit = 3
coact_lst = []
for i in A_movie_id:
    coact_lst.append(tmdb_api_utils.get_movie_cast(i, 3, [2975]))
flat_coact = [item for sublist in coact_lst for item in sublist]
#   |   FOR each movie cast member:
#   |   |   using graph.add_node(), add the movie cast member as a node (keep track of all new nodes added to the graph)
tot_nodes = graph.total_nodes()
counter = 0
for e in flat_coact:
    graph.add_node(str(e['id']), e['name'])
    if graph.total_nodes() > tot_nodes:
        counter += 1
    #   |   |   using graph.add_edge(), add an edge between the Laurence Fishburne (actress) node
    #   |   |   and each new node (co-actor/co-actress)
    graph.add_edge('2975', str(e['id']))

In [15]:
# BEGIN LOOP - DO 2 TIMES:
iteration = 0
while iteration < 3:
    if iteration == 0:
        nodes = graph.nodes[:counter]
        st_counter = 1
        end_counter = graph.total_nodes()
    else:
        nodes = graph.nodes[st_counter: end_counter]
        print(nodes)
        for actor in nodes:
            A_movies = tmdb_api_utils.get_movie_credits_for_person(actor[0], 8.0)
            actor_id = actor[0]
            A_movie_id = []
            for e in A_movies:
                A_movie_id.append(str(e['id']))
            coact_lst = []
            for i in A_movie_id:
                coact_lst.append(tmdb_api_utils.get_movie_cast(i, 3, [int(actor_id)]))
                flat_coact = [item for sublist in coact_lst for item in sublist]
                #   |   FOR each movie cast member:
                #   |   |   using graph.add_node(), add the movie cast member as a node (keep track of all new nodes added to the graph)
                tot_nodes = graph.total_nodes()
                for e in flat_coact:
                    graph.add_node(str(e['id']), e['name'])
                    if graph.total_nodes() > tot_nodes:
                        counter += 1
                    #   |   |   using graph.add_edge(), add an edge between the Laurence Fishburne (actress) node
                    #   |   |   and each new node (co-actor/co-actress)
                    graph.add_edge(str(actor_id), str(e['id']))

        st_counter = end_counter
        end_counter = graph.total_nodes()
    iteration += 1

graph.write_edges_file()
graph.write_nodes_file()

[]
[]
finished writing edges to csv
finished writing nodes to csv


In [None]:
tmdb_api_utils.get_movie_credits_for_person(actor[0], 8.0)

In [None]:
class  TMDBAPIUtils:

    # Do not modify
    def __init__(self, api_key:str):
        self.api_key='37f0c4c547c0266b916906fb1fd959a3'


        
     
        
    def get_movie_cast(self, movie_id:str, limit:int=None, exclude_ids:list=None) -> list:
        """
        Get the movie cast for a given movie id, with optional parameters to exclude an cast member
        from being returned and/or to limit the number of returned cast members
        documentation url: https://developers.themoviedb.org/3/movies/get-movie-credits

        :param integer movie_id: a movie_id
        :param integer limit: maximum number of returned cast members by their 'order' attribute
            e.g., limit=5 will attempt to return the 5 cast members having 'order' attribute values between 0-4
            If after exluding, there are fewer cast members than the specified limit or the limit not specified, return all cast members.
            If cast members with 'order' attribute in the specified limit range have been excluded, do not include more cast members to reach the limit.
            e.g., if limit=5 and the actor whose id corresponds to cast member with order=1 is to be excluded, 
            return cast members with order values [0, 2, 3, 4], not [0, 2, 3, 4, 5]
        :param list exclude_ids: a list of ints containing ids (not cast_ids) of cast members  that should be excluded from the returned result
            e.g., if exclude_ids are [353, 455] then exclude these from any result.
        :rtype: list
            return a list of dicts, one dict per cast member with the following structure:
                [{'cast_id': '97909' # the id of the cast member
                'character': 'John Doe' # the name of the character played
                'credit_id': '52fe4249c3a36847f8012927' # id of the credit}, ... ]
        """
        
        def request(BASE_URL, api_key, end_point):
            conn = http.client.HTTPSConnection(BASE_URL)
            url = end_point.format(movie_id,api_key)
            conn.request("GET", url)
            response = conn.getresponse().read()
            return json.loads(response) 
        
        
        
        
        
        BASE_URL = r"api.themoviedb.org"
        #end_point = r"/3/discover/movie?api_key={}&language=en-US&sort_by=popularity.desc&with_genres=18&primary_release_date.gte=2003-12-31&page={}"
        end_point = r"/3/movie/{}/credits?api_key={}&language=en-US"
        readed= request(BASE_URL, self.api_key, end_point)
        #print(readed)
        #print(type(readed))
        
        
        
        r1=[]
        if limit == None:
            for r in readed['cast']:
                r1.append({'id':r['id'],'character': r['character'], 'credit_id': r['credit_id']  })
        else:
            for r in readed['cast']:
                if r['order'] <limit:
                    r1.append({'id':r['id'],'character': r['character'], 'credit_id': r['credit_id']  })
        
            if len(r1)<limit:
                for r in readed['cast']:
                    if r['order'] >= limit:
                        r1.append({'id':r['id'],'character': r['character'], 'credit_id': r['credit_id']  })

        if exclude_ids != None:
            for r in r1:
                if r['id'] in exclude_ids:
                    r1.remove(r)

        
        
        
        
        return r1        
        

    def get_movie_credits_for_person(self, person_id:str, vote_avg_threshold:float=None)->list:
        """
        Using the TMDb API, get the movie credits for a person serving in a cast role
        documentation url: https://developers.themoviedb.org/3/people/get-person-movie-credits

        :param string person_id: the id of a person
        :param vote_avg_threshold: optional parameter to return the movie credit if it is >=
            the specified threshold.
            e.g., if the vote_avg_threshold is 5.0, then only return credits with a vote_avg >= 5.0
        :rtype: list
            return a list of dicts, one dict per movie credit with the following structure:
                [{'id': '97909' # the id of the movie credit
                'title': 'Long, Stock and Two Smoking Barrels' # the title (not original title) of the credit
                'vote_avg': 5.0 # the float value of the vote average value for the credit}, ... ]
        """
        def request(BASE_URL, api_key, end_point):
            conn = http.client.HTTPSConnection(BASE_URL)
            url = end_point.format(person_id,api_key)
            conn.request("GET", url)
            response = conn.getresponse().read()
            return json.loads(response) 
        
        BASE_URL = r"api.themoviedb.org"
        #end_point = r"/3/discover/movie?api_key={}&language=en-US&sort_by=popularity.desc&with_genres=18&primary_release_date.gte=2003-12-31&page={}"
        #end_point = r"/3/movie/{}/credits?api_key={}&language=en-US"
                     
        end_point = r"/3/person/{}/movie_credits?api_key={}&language=en-US"   
        readed= request(BASE_URL, self.api_key, end_point)
        #print(readed)
        #print(type(readed))
        
        r1=[]
        if vote_avg_threshold == None:
            for r in readed['cast']:
                r1.append({'id':r['id'],'title': r['title'], 'vote_average': r['vote_average']  })
        else:
            for r in readed['cast']:
                if r['vote_average'] >=vote_avg_threshold:
                    r1.append({'id':r['id'],'title': r['title'], 'vote_average': r['vote_average']   })
        


        
        
        
        
        return r1        
               
        
        
        
        return NotImplemented

In [None]:
def request(movie_id,BASE_URL, end_point):
            api_key='37f0c4c547c0266b916906fb1fd959a3'
            conn = http.client.HTTPSConnection(BASE_URL)
            url = end_point.format(movie_id,api_key)
            conn.request("GET", url)
            response = conn.getresponse().read()
            return json.loads(response) 
    
    
    
    
def get_movie_cast_C(movie_id:str, limit:int=None, exclude_ids:list=None) -> list:
   # def request(BASE_URL, end_point):
   #     api_key='37f0c4c547c0266b916906fb1fd959a3'
   #     conn = http.client.HTTPSConnection(BASE_URL)
   #     url = end_point.format(movie_id,api_key)
   #     conn.request("GET", url)
   #     response = conn.getresponse().read()
   #     return json.loads(response) 


    BASE_URL = r"api.themoviedb.org"
    end_point = r"/3/movie/{}/credits?api_key={}&language=en-US"
    #readed= request(BASE_URL, api_key='37f0c4c547c0266b916906fb1fd959a3', end_point)
    readed= request(movie_id,BASE_URL, end_point)
    if 'cast' not in readed.keys():
        return None

    r1=[]
    if limit == None:
        for r in readed['cast']:
            r1.append({'id':r['id'],'character': r['character'], 'credit_id': r['credit_id'] ,'name': r['original_name']  })
    else:
        for r in readed['cast']:
            if r['order'] <limit:
                r1.append({'id':r['id'],'character': r['character'], 'credit_id': r['credit_id'],'name': r['original_name']  })

        #if len(r1)<limit:
        #   for r in readed['cast']:
        #        if r['order'] >= limit:
        #            r1.append({'id':r['id'],'character': r['character'], 'credit_id': r['credit_id'],'original_name': r['original_name']  })

    if exclude_ids != None:
        for r in r1:
            if str(r['id']) in exclude_ids:
                r1.remove(r)
    return r1


In [None]:
graph = Graph()
graph.add_node(id='2975', name='Laurence Fishburne')

In [None]:
tmdb_api_utils = TMDBAPIUtils(api_key='5e42aaa727d7bffb274a399c53213859')

In [None]:
#   Find all of Laurence Fishburne's movie credits that have a vote average >= 8.0
A_movies = tmdb_api_utils.get_movie_credits_for_person('2975', 8.0)

In [None]:
#   FOR each movie credit:
#   |   get the movie cast members having an 'order' value between 0-2 (these are the co-actors)
# get Actor  movie ids
A_movie_id =[]
for e in A_movies:
    A_movie_id.append (str(e['id']))
# exclude Fishburne, limit = 3
coact_lst = []
for i in A_movie_id:
    #coact_lst.append (tmdb_api_utils.get_movie_cast(i, 3, [2975]))
    coact_lst.append (get_movie_cast_C(i, 3, [2975]))
flat_coact = [item for sublist in coact_lst for item in sublist]    
#   |   FOR each movie cast member:
#   |   |   using graph.add_node(), add the movie cast member as a node (keep track of all new nodes added to the graph)
tot_nodes = graph.total_nodes()
counter = 0
for e in flat_coact:
    graph.add_node(str(e['id']), e['name'])
    if graph.total_nodes() > tot_nodes:
        counter += 1
#   |   |   using graph.add_edge(), add an edge between the Laurence Fishburne (actress) node
#   |   |   and each new node (co-actor/co-actress)
    graph.add_edge('2975', str(e['id']))