In [274]:
from octopus import OctopusClient

import pandas as pd
import networkx as nx

In [275]:
def build_nx_graph():
    """
    Builds and returns networkx graph for Movies.

    Parameters : 
    None

    Returns : 
    networkx graph object
        A networkx graph object of Movies
    """
    import csv 

    def remove_escape_quotes(s):
        return s.replace('\\"', '"')
    
    movies_csv_path = "../utils/movies.csv"
    G = nx.Graph()

    with open(movies_csv_path, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)
        for row in reader:
            # Check if it's a node or relationship
            if row[0]:  # Node
                #print(row[0])
                node_type = row[1].split(":")[1]  # Extract node type from labels
                if node_type == "Person":
                    G.add_node(row[0], label="Person", born=row[2], name=remove_escape_quotes(row[3]))
                elif node_type == "Movie":
                    G.add_node(row[0], label="Movie", released=row[4], tagline=remove_escape_quotes(row[5]), title=remove_escape_quotes(row[6]))
            else:  # Relationship
                rel_type = row[9]
                if rel_type == "ACTED_IN":
                    G.add_edge(row[7], row[8], label="ACTED_IN", rating=remove_escape_quotes(row[10]), roles=remove_escape_quotes(row[11]))
                elif rel_type == "WROTE":
                    G.add_edge(row[7], row[8], label="WROTE")
                elif rel_type == "PRODUCED":
                    G.add_edge(row[7], row[8], label="PRODUCED")
                elif rel_type == "DIRECTED":
                    G.add_edge(row[7], row[8], label="DIRECTED")
                elif rel_type == "REVIEWED":
                    G.add_edge(row[7], row[8], label="REVIEWED", rating=remove_escape_quotes(row[10]), summary=remove_escape_quotes(row[11]))

    return G
    

In [276]:
def task_1_nx(nx_graph):
    """
    Using networkx library, recommend new actors for "Tom Hanks". 
    Find connections past an immediate neighborhood that are themselves well connected

    Parameters:
    nx_graph : networkx object
       Movies graph
    
    Returns :
    None
    """
    def get_node(name):
        for node, attrs in nx_graph.nodes(data=True):
            if attrs.get("name") == name:
                return node

    def get_name(in_node):
        for node, attrs in nx_graph.nodes(data=True):
            if node == in_node:
                return attrs.get("name")
    
    def get_title(in_node):
        for node, attrs in nx_graph.nodes(data=True):
            if node == in_node:
                return attrs.get("title")
    
    def connected(node1, node2):
        for movie in nx_graph.neighbors(node2):
            if nx_graph.nodes[movie]["label"] == "Movie" and nx_graph.has_edge(movie, node1):
                return True
        return False
        
    th = get_node("Tom Hanks")
    recommendations = {}
    
    for th_movie in nx_graph.neighbors(th):
        for co_actor in nx_graph.neighbors(th_movie):
            if nx_graph[th_movie][co_actor]["label"] == "ACTED_IN" and co_actor != th:
               for co_actor_movie in nx_graph.neighbors(co_actor):
                    for co_co_actor in nx_graph.neighbors(co_actor_movie):
                       if nx_graph[co_actor_movie][co_co_actor]["label"] == "ACTED_IN" and co_co_actor != th and co_co_actor != co_actor: 
                          # Check if co_co_actor has not with Tom Hanks
                          if not connected(th, co_co_actor): 
                             # Add co_co_actor to recommendations
                             if co_co_actor not in recommendations:
                                recommendations[co_co_actor] = 0
                             recommendations[co_co_actor] += 1
   
    # Sort recommendations by strength
    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    candidate = sorted_recommendations[0][0]
    
    print("Recommendations:", get_name(candidate))

    introducers = []
    co_actors = set()
    movies = set()
    # Get neighbors of Tom Hanks who are co-actors in movies
    for movie1 in nx_graph.neighbors(th):
        for co_actor1 in nx_graph.neighbors(movie1):
            if nx_graph[movie1][co_actor1]["label"] == "ACTED_IN" and co_actor1 != th:
               # Get movies that co_actor1 and Tom Cruise have acted in
               for movie2 in nx_graph.neighbors(co_actor1):
                   if nx_graph.nodes[movie2]["label"] == "Movie":
                      if candidate in nx_graph.neighbors(movie2):
                         co_actors.add(get_name(co_actor1))
                         movies.add(get_title(movie1))
                         movies.add(get_title(movie2))
                         introducers.append({"Tom Hanks": "Tom Hanks", "Movie1": get_title(movie1), "CoActor": get_name(co_actor1), "Movie2": get_title(movie2), "Tom Cruise": "Tom Cruise"})

    print(co_actors)
    print(movies)
    print("Introducers:")
    for introducer in introducers:
        print(introducer)    


In [277]:
def task_1_octopus(oc):
    """
    Using Octopus, ecommend new actors for "Tom Hanks". 
    Find connections past an immediate neighborhood that are themselves well connected
    
    Parameters:
    oc : Octopus Client
        Octopus Client Object
    
    Returns :
    None
    """
    
    # Find co-co-actors who have not worked with Tom Hanks
    # Return the result ordered in descending order in terms of popularity (connectivity strength)
    query = "MATCH (tom:Person {name:\"Tom Hanks\"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActors), \
            (coActors)-[:ACTED_IN]->(m2)<-[:ACTED_IN]-(cocoActors) \
            WHERE NOT (tom)-[:ACTED_IN]->()<-[:ACTED_IN]-(cocoActors) AND tom <> cocoActors \
            RETURN cocoActors.name AS Recommended, count(*) AS Strength ORDER BY Strength DESC"
    execution = oc.execute(query, 1)
    execution.poll()
    co_actor = execution.output.fetch_all()[0].to_pandas()
    print(co_actor.iloc[0]["Recommended"])


    # Find someone who can introduce the candidate actor to Tom Hanks
    # Get actors who co-acted with Tom Hanks and Tom Cruise
    query = "MATCH (tom:Person {name:\"Tom Hanks\"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActors), \
             (coActors)-[:ACTED_IN]->(m2)<-[:ACTED_IN]-(cruise:Person {name:\"Tom Cruise\"}) \
             RETURN tom, m, coActors, m2, cruise"
    execution = oc.execute(query, 1)
    execution.poll()
    co_actor = execution.output.fetch_all()[0]   
    print(co_actor)

In [278]:
if __name__ == "__main__":
    # Build Networkx Graph
    nx_graph = build_nx_graph()

    # Instantiate Octopus Client Object
    oc = OctopusClient("localhost", "8000", "neo4j", "password")
    
    # Task-1
    #print("Task-1: Recommend new co-actors for Tom Hanks.")
    #print("networkx:")
    #task_1_nx(nx_graph)
    #print("Octopus:")
    #task_1_octopus(oc)
