# Movies Analysis

In [None]:
%%HTML
<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.js"></script>

In [None]:
from py2neo import Graph
from neo4j import GraphDatabase
import networkx as nx
import matplotlib.pyplot as plt
import neo4jupyter
import pandas as pd

neo_host = "neo4j" # or localhost outside the docker
neo_port = 7687

graph = Graph(f"bolt://{neo_host}:{neo_port}")
driver = GraphDatabase.driver(f"bolt://{neo_host}:{neo_port}")

## Questions

### Is there a correlation between the rating of a movie and the place where it was shot ?


In [None]:
NB_SELECTED = 20

graph.run("""MATCH (m:Movie)-[:HAS_LOCATION]->(l:Location)
RETURN l.name AS location, avg(m.vote_average) AS avg_rating, count(m) AS movie_count
ORDER BY avg_rating DESC
LIMIT $nb_selected""", parameters={"nb_selected": NB_SELECTED}).to_data_frame().plot.bar(x='location', y='avg_rating')

### What are the most popular places where movies are shot?

Our second question was to find who issued more disstracks. We'll once again plot a bar plot to visualize it:

In [None]:
NB_SELECTED = 10

graph.run("""MATCH (m:Movie)-[:HAS_LOCATION]->(l:Location)
RETURN l.name AS location, count(m) AS movie_count
ORDER BY movie_count DESC
LIMIT $nb_selected""", parameters={"nb_selected": NB_SELECTED}).to_data_frame().plot.bar(x='location', y='movie_count')

### What are the most popular places where movies are shot with the best ratings ?

In [None]:
NB_SELECTED = 10

graph.run("""MATCH (m:Movie)-[:HAS_LOCATION]->(l:Location)
WHERE m.vote_average >= 8.0
RETURN l.name AS location, count(m) AS movie_count, avg(m.vote_average) AS avg_rating
ORDER BY movie_count DESC
LIMIT $nb_selected""", parameters={"nb_selected": NB_SELECTED}).to_data_frame().plot.bar(x='location', y='movie_count')

### What are the most popular places where movies are shot with the worst ratings ?

In [None]:
NB_SELECTED = 10

graph.run("""MATCH (m:Movie)-[:HAS_LOCATION]->(l:Location)
WHERE m.vote_average <= 4.0
RETURN l.name AS location, count(m) AS movie_count, avg(m.vote_average) AS avg_rating
ORDER BY movie_count DESC
LIMIT $nb_selected""", parameters={"nb_selected": NB_SELECTED}).to_data_frame().plot.bar(x='location', y='movie_count')

### Are places specializing in a specific genre of movie ?

In [None]:

data = graph.run("""MATCH (m:Movie)-[:HAS_LOCATION]->(l:Location)
MATCH (m)-[:HAS_GENRE]->(g:Genre)
WITH g.name AS genre, l.name AS location, COUNT(m) AS movie_count
RETURN genre, location, movie_count
ORDER BY genre, movie_count DESC""", parameters={"nb_selected": NB_SELECTED}).to_data_frame()


result = data.loc[data.groupby('genre')['movie_count'].idxmax()]

print(result[['genre', 'location', 'movie_count']])

## Additionnal visualization

NB: Need to restart the kernel and re run all cells

In [None]:
neo4jupyter.init_notebook_mode()
neo4jupyter.draw(graph, options={"Artist": "name", "Target": "name"}, limit=50)

# If you want to plot parts of the graph

#sub_g = graph.run("""
#    MATCH (artist)-[r:Dissed]->(target)
#    RETURN *""").to_subgraph()
#neo4jupyter.draw_subgraph(sub_g, options={"artist": "wikidata_id", "target": "name"})
