In [179]:
import http.client
import json
import csv

class Graph:

    # Do not modify
    def __init__(self, with_nodes_file=None, with_edges_file=None):
        """
        option 1:  init as an empty graph and add nodes
        option 2: init by specifying a path to nodes & edges files
        """
        self.nodes = []
        self.edges = []
        if with_nodes_file and with_edges_file:
            nodes_CSV = csv.reader(open(with_nodes_file))
            nodes_CSV = list(nodes_CSV)[1:]
            self.nodes = [(n[0],n[1]) for n in nodes_CSV]

            edges_CSV = csv.reader(open(with_edges_file))
            edges_CSV = list(edges_CSV)[1:]
            self.edges = [(e[0],e[1]) for e in edges_CSV]


    def add_node(self, id: str, name: str)->None:
        """
        add a tuple (id, name) representing a node to self.nodes if it does not already exist
        The graph should not contain any duplicate nodes
        """
        name = name.replace(","," ")
        if (id, name) not in self.nodes :
            self.nodes.append((id,name))




    def add_edge(self, source: str, target: str)->None:
        """
        Add an edge between two nodes if it does not already exist.
        An edge is represented by a tuple containing two strings: e.g.: ('source', 'target').
        Where 'source' is the id of the source node and 'target' is the id of the target node
        e.g., for two nodes with ids 'a' and 'b' respectively, add the tuple ('a', 'b') to self.edges
        """
        if (source,target) not in self.edges:
            if (target, source) not in self.edges:
                self.edges.append((source,target))



    def total_nodes(self)->int:
        """
        Returns an integer value for the total number of nodes in the graph
        """

        return len(self.nodes)


    def total_edges(self)->int:
        """
        Returns an integer value for the total number of edges in the graph
        """
        return len(self.edges)


    def max_degree_nodes(self)->dict:
        """
        Return the node(s) with the highest degree
        Return multiple nodes in the event of a tie
        Format is a dict where the key is the node_id and the value is an integer for the node degree
        e.g. {'a': 8}
        or {'a': 22, 'b': 22}
        """
        dic = {}
        for source, target in self.edges:
            if source not in dic and target not in dic:
                dic[source] = 1    
                dic[target] = 1
            elif source in dic and target not in dic:
                dic[source] += 1
                dic[target] = 1
            elif target in dic and source not in dic:
                dic[target] += 1
                dic[source] =1
            elif target in dic and source in dic:
                dic[target] +=1
                dic[source] +=1

        max_val = max(dic.values())
        result = {}
        for i in dic.keys():
            if dic[i] == max_val:
                if i not in result.keys():
                    result[i] = max_val
        return result



    def print_nodes(self):
        """
        No further implementation required
        May be used for de-bugging if necessary
        """
        print(self.nodes)


    def print_edges(self):
        """
        No further implementation required
        May be used for de-bugging if necessary
        """
        print(self.edges)


    # Do not modify
    def write_edges_file(self, path="edges.csv")->None:
        """
        write all edges out as .csv
        :param path: string
        :return: None
        """
        edges_path = path
        edges_file = open(edges_path, 'w')

        edges_file.write("source" + "," + "target" + "\n")

        for e in self.edges:
            edges_file.write(e[0] + "," + e[1] + "\n")

        edges_file.close()
        print("finished writing edges to csv")


    # Do not modify
    def write_nodes_file(self, path="nodes.csv")->None:
        """
        write all nodes out as .csv
        :param path: string
        :return: None
        """
        nodes_path = path
        nodes_file = open(nodes_path, 'w')

        nodes_file.write("id,name" + "\n")
        for n in self.nodes:
            nodes_file.write(n[0] + "," + n[1] + "\n")
        nodes_file.close()
        print("finished writing nodes to csv")



In [180]:
#max degree 
def sum(nodes):
    
    dic = {}
    for source, target in nodes:
        if source not in dic and target not in dic:
            dic[source] = 1
            dic[target] = 1
        elif source in dic and target not in dic:
            dic[source] += 1
            dic[target] =1
        elif target in dic and source not in dic:
            dic[target] += 1
            dic[source] =1
        elif target in dic and source in dic:
            dic[target] +=1
            dic[source] +=1
    print(dic)        
    max_val=max(dic.values())
    result={}
    for i in dic.keys():
        if dic[i]==max_val:
            if i not in result.keys():
                result[i]=max_val
    return result

In [181]:
sum([('a','b'),('b','c'),('a','c'),('e','c')])

{'a': 2, 'b': 2, 'c': 3, 'e': 1}


{'c': 3}

In [182]:
#function 2

In [183]:
class  TMDBAPIUtils:

    # Do not modify
    def __init__(self, api_key:str):
        self.api_key=api_key


    def get_movie_cast(self, movie_id:str, limit:int=None, exclude_ids:list=None) -> list:
        """
        Get the movie cast for a given movie id, with optional parameters to exclude an cast member
        from being returned and/or to limit the number of returned cast members
        documentation url: https://developers.themoviedb.org/3/movies/get-movie-credits

        :param integer movie_id: a movie_id
        :param integer limit: number of returned cast members by their 'order' attribute
            e.g., limit=5 will attempt to return the 5 cast members having 'order' attribute values between 0-4
            If there are fewer cast members than the specified limit or the limit not specified, return all cast members
        :param list exclude_ids: a list of ints containing ids (not cast_ids) of cast members  that should be excluded from the returned result
            e.g., if exclude_ids are [353, 455] then exclude these from any result.
        :rtype: list
            return a list of dicts, one dict per cast member with the following structure:
                [{'cast_id': '97909' # the id of the cast member
                'character': 'John Doe' # the name of the character played
                'credit_id': '52fe4249c3a36847f8012927' # id of the credit}, ... ]
        Important: the exclude_ids processing should occur prior to limiting output.
        """


        base_url = 'api.themoviedb.org'
        connection = http.client.HTTPConnection(base_url)
        connection.request('GET','/3/movie/{}/credits?api_key={}'.format(movie_id,self.api_key))
        r1 = connection.getresponse()

        dat = r1.read().decode()
        data = json.loads(dat)
        if 'cast' in data.keys():
            cast = data['cast']
        else:
            cast = []

        # return the exclude_ids
        result = data['cast']
        if exclude_ids != None:
            for item in exclude_ids:
                for i in result:
                    if item == i['id']:
                        result.remove(i)
                        
        # return limit data
        lim_df = []
        if limit != None and limit <= len(result):
            for item in result:
                if item['order'] < limit:
                    lim_df.append(item)
        else:
            lim_df = result


        return lim_df


    def get_movie_credits_for_person(self, person_id:str, vote_avg_threshold:float=None)->list:
        """
        Using the TMDb API, get the movie credits for a person serving in a cast role
        documentation url: https://developers.themoviedb.org/3/people/get-person-movie-credits

        :param string person_id: the id of a person
        :param vote_avg_threshold: optional parameter to return the movie credit if it is >=
            the specified threshold.
            e.g., if the vote_avg_threshold is 5.0, then only return credits with a vote_avg >= 5.0
        :rtype: list
            return a list of dicts, one dict per movie credit with the following structure:
                [{'id': '97909' # the id of the movie credit
                'title': 'Long, Stock and Two Smoking Barrels' # the title (not original title) of the credit
                'vote_avg': 5.0 # the float value of the vote average value for the credit}, ... ]
        """

        base_url = 'api.themoviedb.org'
        connection = http.client.HTTPConnection(base_url)
        connection.request('GET', '/3/person/{}/movie_credits?api_key={}&language=en-US'.format(person_id, self.api_key))
        r1 = connection.getresponse()
        dat = r1.read().decode()
        data = json.loads(dat)
        if 'cast' in data.keys():
            cast = data['cast']
        else:
            cast = []
        
        result = []
        if vote_avg_threshold == None:
            result = cast
        else:
            for item in cast:
                if item['vote_average'] >= vote_avg_threshold:
                    result.append(item)


        return  result


In [184]:
#function 2.1

In [185]:
movie_id=5
api_key = '1046609b4211aa3a0803e007440f9f8f'
limit=5
exclude_ids=[3129]

re=TMDBAPIUtils(api_key).get_movie_cast(movie_id,limit)
re2=TMDBAPIUtils(api_key).get_movie_cast(movie_id,limit,exclude_ids)

In [186]:
 len(re2)

4

In [199]:
for i in re:
    print(i['order'])
    print(i['id'])

0
3129
1
3130
2
3131
3
3124
4
2555


In [200]:
for i in re2:
    print(i['id'])


3130
3131
3124
2555


In [None]:
# function 2.2

In [285]:
person_id=5064
api_key = '1046609b4211aa3a0803e007440f9f8f'
vote_avg_threshold = 8.0
da=TMDBAPIUtils(api_key).get_movie_credits_for_person(person_id, vote_avg_threshold)
da2=TMDBAPIUtils(api_key).get_movie_credits_for_person(person_id)

dict_keys(['cast', 'crew', 'id'])
<class 'list'>
dict_keys(['cast', 'crew', 'id'])
<class 'list'>


In [252]:
print(len(da))
#print(len(da2))

12
121


In [254]:
id_ls=[]
for i in da:
    id_ls.append(i['id'])


In [None]:
# function 3

In [5]:
#############################################################################################################################
#
# BUILDING YOUR GRAPH
#
# Working with the API:  See use of http.request: https://docs.python.org/3/library/http.client.html#examples
#
# Using TMDb's API, build a co-actor network for the actor's/actress' highest rated movies
# In this graph, each node represents an actor
# An edge between any two nodes indicates that the two actors/actresses acted in a movie together
# i.e., they share a movie credit.
# e.g., An edge between Samuel L. Jackson and Robert Downey Jr. indicates that they have acted in one
# or more movies together.
#
# For this assignment, we are interested in a co-actor network of highly rated movies; specifically,
# we only want the top 3 co-actors in each movie credit of an actor having a vote average >= 8.0.
#
# You will need to add extra functions or code to accomplish this.  We will not directly call or explicitly grade your
# algorithm. We will instead measure the correctness of your output by evaluating the data in your argo-lite graph
# snapshot.
#
# Build your co-actor graph on the actress 'Meryl Streep' w/ person_id 5064.
# Initialize a Graph object with a single node representing Meryl Streep
# Find all of Meryl Streep's movie credits that have a vote average >= 8.0
#
# 1. For each movie credit:
#   get the movie cast members having an 'order' value between 0-2 (these are the co-actors)
#   for each movie cast member:
#       using graph.add_node(), add the movie cast member as a node (keep track of all new nodes added to the graph)
#       using graph.add_edge(), add an edge between the Meryl Streep (actress) node
#       and each new node (co-actor/co-actress)
#
#
# Using the nodes added in the first iteration (this excludes the original node of Meryl Streep!)
#
# 2. For each node (actor / actress) added in the previous iteration:
#   get the movie credits for the actor that have a vote average >= 8.0
#   for each movie credit:
#       try to get the 3 movie cast members having an 'order' value between 0-2
#       for each movie cast member:
#           if the node doesn't already exist:
#               add the node to the graph (track all new nodes added to the graph)
#               if the edge does not exist:
#                   add an edge between the node (actor) and the new node (co-actor/co-actress)
#
#
# - Repeat the steps from # 2. until you have iterated 3 times to build an appropriately sized graph.
# - Your graph should not have any duplicate edges or nodes
# - Write out your finished graph as a nodes file and an edges file using
#   graph.write_edges_file()
#   graph.write_nodes_file()
#
# Exception handling and best practices
# - You should use the param 'language=en-US' in all API calls to avoid encoding issues when writing data to file.
# - If the actor name has a comma char ',' it should be removed to prevent extra columns from being inserted into the .csv file
# - Some movie_credits may actually be collections and do not return cast data. Handle this situation by skipping these instances.
# - While The TMDb API does not have a rate-limiting scheme in place, consider that making hundreds / thousands of calls
#   can occasionally result in timeout errors. It may be necessary to insert periodic sleeps when you are building your graph.


def return_name()->str:
    """
    Return a string containing your GT Username
    e.g., gburdell3
    Do not return your 9 digit GTId
    """
    return 'bli418'


def return_argo_lite_snapshot()->str:
    """
    Return the shared URL of your published graph in Argo-Lite
    """
    return NotImplemented


if __name__ == "__main__":

    graph = Graph()
    graph.add_node(id='5064', name='Meryl Streep') # person id
    tmdb_api_utils = TMDBAPIUtils(api_key='1046609b4211aa3a0803e007440f9f8f')
    
    def new_node_step1(person_id:int,vote_avg:float,limit:int,exclude_ids:list):
        # the id in get_movie_credits_for_person() is movie_id
        dat1=tmdb_api_utils.get_movie_credits_for_person(person_id=person_id, vote_avg_threshold=vote_avg)
        if dat1 !=[]:
            for i in dat1:
                # the id in get_movie_cast() is person_id
                dat = tmdb_api_utils.get_movie_cast(movie_id=i['id'], limit=limit, exclude_ids=exclude_ids) 
                for item in dat:
                    graph.add_node(str(item['id']), name=item['name']) 
                    graph.add_edge(source=str(person_id),target=str(item['id']))
     
        
    
    dat1=tmdb_api_utils.get_movie_credits_for_person(person_id=5064, vote_avg_threshold=8.0) 
    for i in dat1:
        dat = tmdb_api_utils.get_movie_cast(movie_id=i['id'], limit=3, exclude_ids=[5064]) # the id in get_movie_credits_for_person() is movie_id
        for item in dat:
            graph.add_node(str(item['id']), name=item['name']) # the id in get_movie_cast() is person_id
            graph.add_edge(source='5064',target=str(item['id']))
            
                        
            
            
#     # 2
    
#     for i in range(4):
#          if graph.nodes[i][0]!='5064':
#                 df1=tmdb_api_utils.get_movie_credits_for_person(person_id=int(graph.nodes[i][0]), vote_avg_threshold=8.0)
#                 for k in df1:
#                     co_actor2=tmdb_api_utils.get_movie_cast(movie_id=k['id'], limit=3, exclude_ids=[int(graph.nodes[i][0])])
#                     for m in co_actor2:
#                         graph.add_node(id=str(m['id']), name=m['name'])
#                         graph.add_edge(source=graph.nodes[i][0],target=str(m['id']))        
        
       
#     ite=0
#     id_list=['5064'] 
#     while ite < 2: 
#         for ids,names in graph.nodes:
#             if ids not in id_list:
#                 df1=tmdb_api_utils.get_movie_credits_for_person(person_id=int(ids), vote_avg_threshold=8.0)
#                 for k in df1:
#                     if k['id'] not in [i for i,name in graph.nodes]:
#                         co_actor2=tmdb_api_utils.get_movie_cast(movie_id=k['id'], limit=3, exclude_ids=[int(ids)])
#                         for m in co_actor2:
#                             graph.add_node(id=str(m['id']), name=m['name'])
#                             graph.add_edge(source=ids,target=str(m['id']))
#                 id_list.append(ids) 
#             else:
#                 pass
                            
#         ite +=1            
  

    # call functions or place code here to build graph (graph building code not graded)

    #graph.write_edges_file()
    #graph.write_nodes_file()

SyntaxError: invalid syntax (<ipython-input-5-8fe8f7d3a652>, line 85)

In [187]:

def return_name()->str:
    """
    Return a string containing your GT Username
    e.g., gburdell3
    Do not return your 9 digit GTId
    """
    return 'bli418'


def return_argo_lite_snapshot()->str:
    """
    Return the shared URL of your published graph in Argo-Lite
    """
    return NotImplemented


if __name__ == "__main__":

    graph = Graph()
    graph.add_node(id='5064', name='Meryl Streep') # person id
    tmdb_api_utils = TMDBAPIUtils(api_key='1046609b4211aa3a0803e007440f9f8f')
    
#     def new_node_step1(person_id:int,vote_avg:float,limit:int,exclude_ids:list):
#         # the id in get_movie_credits_for_person() is movie_id
#         new_id=[]
#         dat1=tmdb_api_utils.get_movie_credits_for_person(person_id=person_id, vote_avg_threshold=vote_avg)
#         if dat1 !=[]:
#             for i in dat1:
#                 # the id in get_movie_cast() is person_id
#                 dat = tmdb_api_utils.get_movie_cast(movie_id=i['id'], limit=limit, exclude_ids=exclude_ids) 
#                 if dat !=[]:
#                     for item in dat:
#                         if item['id'] not in new_id:
#                             graph.add_node(str(item['id']), name=item['name']) 
#                             graph.add_edge(source=str(person_id),target=str(item['id']))
#                             new_id.append(item['id'])
                
#         return new_id
      
        
    def new_node_step1(person_id:int,vote_avg:float,limit:int):
        # the id in get_movie_credits_for_person() is movie_id
        new_id=[]
        dat1=tmdb_api_utils.get_movie_credits_for_person(person_id=person_id, vote_avg_threshold=vote_avg)
        if dat1 !=[]:
            for i in dat1:
                # the id in get_movie_cast() is person_id
                dat = tmdb_api_utils.get_movie_cast(movie_id=i['id'], limit=limit, exclude_ids=[person_id]) 
                if dat !=[]:
                    for item in dat:
                        if item['id'] not in new_id:
                            graph.add_node(str(item['id']), name=item['name']) 
                            graph.add_edge(source=str(person_id),target=str(item['id']))
                            new_id.append(item['id'])
                
        return new_id
    
    
    def new_nodes_step2(person_id:int,vote_avg:float,limit:int):
        new_id=new_node_step1(person_id,vote_avg,limit)
        print(len(new_id))
        print(len(graph.nodes))
        print(graph.max_degree_nodes())
        #2 iter 1
        big_new=[]
        for item in new_id:
            big_new +=new_node_step1(item,8.0,3)
        print(len(big_new))
        print(len(graph.nodes))
        print(graph.max_degree_nodes())
        #2 iter 2
        big_new2=[]
        for a in big_new:
            big_new2 +=new_node_step1(a,8.0,3)
        print(len(big_new2))
        print(len(graph.nodes))
        print(graph.max_degree_nodes())
        # 2 iter3
#         big_new3=[]
#         for b in big_new2:
#             big_new3 +=new_node_step1(b,8.0,3,big_new2+big_new+new_id)   
#         print(len(big_new3)) 
#         print(len(graph.nodes))
        
        
        
        
        
    def new_nodes_step3(person_id:int,vote_avg:float,limit:int,exclude_ids:list):
        new_id=new_node_step1(person_id,vote_avg,limit,exclude_ids)
        print(len(new_id))
        print(len(graph.nodes))
        #2 iter 1
        big_new=[]
        for item in new_id:
            big_new +=new_node_step1(item,8.0,3,[item])
        print(len(big_new))
        print(len(graph.nodes))
        #2 iter 2
        big_new2=[]
        for a in big_new:
            big_new2 +=new_node_step1(a,8.0,3,[a])
        print(len(big_new2))
        print(len(graph.nodes))
        # 2 iter3
        big_new3=[]
        for b in big_new2:
            big_new3 +=new_node_step1(b,8.0,3,[b])   
        print(len(big_new3))
        print(len(graph.nodes))
        
        
            
            
                
       


    #graph.write_edges_file()
    #graph.write_nodes_file()

In [189]:
# step 1
#new_id=new_node_step1(5064,8.0,3)
new_nodes_step2(5064,8.0,3)
#new_nodes_step3(5064,8.0,3,[5064])    
    #step 2 
    
#     ls=['5064']
#     for a in range(1):
#         for ids,name in graph.nodes:
#             if ids not in ls:
#                 new_node_step1(int(ids),8.0,3,[int(ids)])
#                 ls.append(ids)

20
21
{'5064': 20}
290
226
{'2231': 36}
2757
1070
{'1032': 40, '116341': 40}


In [127]:
graph.write_edges_file()
graph.write_nodes_file()

finished writing edges to csv
finished writing nodes to csv


In [165]:
len(new_id2)

TypeError: object of type 'NoneType' has no len()

In [166]:
print(graph.total_nodes())
print(graph.total_edges())

1070
1683


In [83]:
#graph.edges

In [190]:
ma=graph.max_degree_nodes()
ma

{'1032': 40, '116341': 40}

In [318]:
#graph.edges
