### Extra Challenge

Complex queries with basic tables as challenge

In [1]:
import pandas as pd
import numpy as np

pd.set_option("display.max_rows",100)

- Get all movies (id, title and year) in which Keanu Reeves AND Laurence Fishburne starred together

```mysql
SELECT
	m.movie_id,
	m.title,
	m.year
FROM
	fm_movies_actors_test m_a
INNER JOIN
	fm_movies_test m
	ON m_a.movie_id = m.movie_id
INNER JOIN 
	fm_actors_test a
	ON m_a.actor_id = a.actor_id
WHERE
	a.name = "Keanu Reeves"
	OR a.name = "Laurence Fishburne"
GROUP BY 1
HAVING COUNT(m.movie_id) = 2
```

In [2]:
# import CSV
actors = pd.read_csv("data/fm_actors.csv")
movies = pd.read_csv("data/fm_movies.csv")
m_a = pd.read_csv("data/fm_movies_actors.csv")


In [3]:
# get movies where Keanu and Laurence acted together

# get dataframe with every movie and actor
ma=(m_a.merge(movies, left_on="movie_id", right_on="movie_id")
       .merge(actors, left_on="actor_id", right_on="actor_id"))

# get every movie where Keanu or Laurence has acted, group by title and store the count (eg in movie title)
ma_kl = (ma.loc[ma["name"].eq("Keanu Reeves")|ma["name"].eq("Laurence Fishburne")]
           .groupby("title")
           .count())

# if they has acted together, the count of the movie must be 2 (assuming and actor can't be casted twice in the same movie)
ma_kl.loc[ma_kl["movie_id"].eq(2)]

Unnamed: 0_level_0,movie_id,actor_id,year,name
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
The Matrix,2,2,2,2


- Get the actor/actress who had the second most appearances during the 90's

```mysql
WITH t AS
(SELECT
	name,
	count(name) AS amount_movies,
	RANK() OVER (ORDER BY COUNT(name) DESC) AS ranking
FROM
	fm_movies_actors_test m_a
INNER JOIN fm_movies_test m
	ON m_a.movie_id = m.movie_id
INNER JOIN fm_actors_test a
	ON m_a.actor_id = a.actor_id
WHERE
	m.year BETWEEN 1990 AND 1999
GROUP BY
	name)
SELECT * FROM t
WHERE ranking = 2
```

In [4]:
# filter "ma" by year, after group the merge of actors and titles by name, using count()
(ma.loc[ma["year"].ge(1990)&ma["year"].le(1999)]
   .groupby("name")
   .count()
   .sort_values(by="movie_id", ascending=False))

Unnamed: 0_level_0,movie_id,actor_id,title,year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brad Pitt,2,2,2,2
Edward Norton,1,1,1,1
John Travolta,1,1,1,1
Keanu Reeves,1,1,1,1
Laurence Fishburne,1,1,1,1
Morgan Freeman,1,1,1,1
Samuel L. Jackson,1,1,1,1
