# SQL: JoinQueries

## Setup

We are now installing the necessary packages to interact with the MySQL database and issue SQL queries using the notebook.

In [None]:
!sudo pip3 install -U -q PyMySQL sqlalchemy sql_magic

In [None]:
%reload_ext sql_magic

In [None]:
from sqlalchemy import create_engine

conn_string = 'mysql+pymysql://{user}:{password}@{host}/?charset=utf8'.format(
    host='db.ipeirotis.org',
    user='student',
    password='dwdstudent2015',
    encoding='utf-8')
engine = create_engine(conn_string)

In [None]:
%config SQL.conn_name = 'engine'

## Poor man's joins: Find the genres that Steven Spielberg typically directs

Use the table directors_genres  and find all genres of films and the corresponding probabilities for the director ID that corresponds to Steven Spielberg. Sort the results by probability. 


#### Switch to IMDb

In [None]:
%%read_sql
USE imdb

#### Find the entry for Steven Spielberg to find his id

In [None]:
%%read_sql
SELECT *
FROM directors 
WHERE first_name = 'Steven' AND last_name = 'Spielberg'

#### Query the `directors_genres` table for Spielberg movies (pick director id value from previous query)

In [None]:
%%read_sql
SELECT *
FROM directors_genres
WHERE director_id = 75380
ORDER BY prob DESC

## Simple Join Queries

#### Switch to IMDb

In [None]:
%%read_sql
USE imdb

#### List all the movies and their genres

In [None]:
%%read_sql
SELECT *
FROM movies M 
  INNER JOIN movies_genres G ON M.id = G.movie_id


#### List the movie genres for the movies directed by Steven Spielberg and sort them in decreasing order of their probability (use the director_genres table)



In [None]:
%%read_sql
SELECT *
FROM directors D
  INNER JOIN directors_genres G ON G.director_id = D.id
WHERE D.first_name = 'Steven' AND D.last_name = 'Spielberg'
ORDER BY G.prob DESC

In [None]:
%%read_sql
SELECT G.genre, G.prob
FROM directors D
  INNER JOIN directors_genres G ON G.director_id = D.id
WHERE D.first_name = 'Steven' AND D.last_name = 'Spielberg'
ORDER BY G.prob DESC

#### List all the movies and their directors

In [None]:
%%read_sql
SELECT *
FROM directors D
  INNER JOIN movies_directors MD ON MD.director_id = D.id
  INNER JOIN movies M ON M.id = MD.movie_id

In [None]:
%%read_sql
SELECT MD.director_id, D.first_name AS director_first_name, D.last_name AS director_last_name,
       MD.movie_id, M.name AS movie_title, M.year AS release_year, M.rating AS movie_rating 
FROM directors D
  INNER JOIN movies_directors MD ON MD.director_id = D.id
  INNER JOIN movies M ON M.id = MD.movie_id

#### List all the movies directed by Steven Spielberg

In [None]:
%%read_sql
SELECT MD.director_id, D.first_name AS director_first_name, D.last_name AS director_last_name,
       MD.movie_id, M.name AS movie_title, M.year AS release_year, M.rating AS movie_rating 
FROM directors D
  INNER JOIN movies_directors MD ON MD.director_id = D.id
  INNER JOIN movies M ON M.id = MD.movie_id
WHERE D.first_name = 'Steven' AND D.last_name = 'Spielberg'
ORDER BY M.rating DESC

## Join Practice Queries: Drama movies from 2000


#### Switch to IMDb

In [None]:
%%read_sql
USE imdb

#### List all the movies from year 2000



In [None]:
%%read_sql
SELECT *
FROM movies
WHERE year=2000

#### List all the movies from year 2000 and their genres

In [None]:
%%read_sql
SELECT *
FROM movies M
  INNER JOIN movies_genres G ON G.movie_id = M.id
WHERE M.year=2000

#### List all the Drama movies from year 2000

In [None]:
%%read_sql
SELECT *
FROM movies M
  INNER JOIN movies_genres G ON G.movie_id = M.id
WHERE M.year=2000 AND G.genre = 'Drama'

#### List all the Drama movies from year 2000 with ratings

In [None]:
%%read_sql
SELECT *
FROM movies M
  INNER JOIN movies_genres G ON G.movie_id = M.id
WHERE M.year=2000 AND G.genre = 'Drama' AND M.rating IS NOT NULL

#### List the top-50 Drama movies from year 2000, based on the ratings 


In [None]:
%%read_sql
SELECT *
FROM movies M
  INNER JOIN movies_genres G ON G.movie_id = M.id
WHERE M.year=2000 AND G.genre = 'Drama' AND M.rating IS NOT NULL
ORDER BY M.rating DESC
LIMIT 50

## Join Practice Queries: James Bond movies

#### Switch to IMDb

In [None]:
%%read_sql
USE imdb

#### List all the movies where there is an actor with the role ‘James Bond’


In [None]:
%%read_sql
SELECT *
FROM movies M
  INNER JOIN roles R ON R.movie_id = M.id
WHERE R.role = 'James Bond'

#### List the actors who played ‘James Bond’

In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
WHERE R.role = 'James Bond'

#### List the actors who played ‘James Bond’ and the name of the movie

In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
  INNER JOIN movies M ON M.id = R.movie_id
WHERE R.role = 'James Bond'

#### List the actors who played ‘James Bond’ and the name of the movie. Rank the result by rating

In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
  INNER JOIN movies M ON M.id = R.movie_id
WHERE R.role = 'James Bond'
ORDER BY M.rating DESC

#### List the actors who played ‘James Bond’ and the name of the movie. Rank the result by year

In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
  INNER JOIN movies M ON M.id = R.movie_id
WHERE R.role = 'James Bond'
ORDER BY M.year

## JOIN practice queries: Brad Pitt movies

#### List all the movies where Brad Pitt is playing

In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
  INNER JOIN movies M ON M.id = R.movie_id
WHERE A.first_name = 'Brad' AND A.last_name = 'Pitt'


#### List all the movies where Brad Pitt is playing. Exclude the movies where he plays “himself”



In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
  INNER JOIN movies M ON M.id = R.movie_id
WHERE A.first_name = 'Brad' AND A.last_name = 'Pitt'
  AND R.role NOT LIKE '%%himself%%'

#### List all the movies where Brad Pitt is playing. Exclude the movies where he plays “himself”. Rank the result by movie rating.

In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
  INNER JOIN movies M ON M.id = R.movie_id
WHERE A.first_name = 'Brad' AND A.last_name = 'Pitt'
  AND R.role NOT LIKE '%%himself%%'
ORDER BY M.rating DESC

#### List all the movies where Brad Pitt is playing. Exclude the movies where he plays “himself”. Rank the result by year.

In [None]:
%%read_sql
SELECT *
FROM actors A 
  INNER JOIN roles R ON R.actor_id = A.id
  INNER JOIN movies M ON M.id = R.movie_id
WHERE A.first_name = 'Brad' AND A.last_name = 'Pitt'
  AND R.role NOT LIKE '%%himself%%'
ORDER BY M.year

## JOIN Practice Queries: Facebook

#### Switch to Facebook

In [None]:
%%read_sql
USE facebook;

#### List all the Single students



In [None]:
%%read_sql
SELECT *
FROM Profiles P
  INNER JOIN Relationship R ON R.ProfileID = P.ProfileID
WHERE R.Status = 'Single'

#### List all the Single students who live in Palladium (as declared in the “Residence” attribute).  Allow for flexible matching of the “Residence” as people list Palladium in different manner (e.g “Palladium 101” vs “Palladium”)






In [None]:
%%read_sql
SELECT *
FROM Profiles P
  INNER JOIN Relationship R ON R.ProfileID = P.ProfileID
WHERE R.Status = 'Single' AND P.Residence LIKE 'Palladium%%'

#### List all the Single students LookingFor “random play”. List their AIM and their gender in the results.



In [None]:
%%read_sql
SELECT P.AIM, P.Sex
FROM Profiles P
  INNER JOIN Relationship R ON R.ProfileID = P.ProfileID
  INNER JOIN LookingFor L ON L.ProfileID = P.ProfileID
WHERE R.Status = 'Single' AND L.LookingFor = 'Random Play'

#### List all the students who have “The Killers” as favorite Music




In [None]:
%%read_sql
SELECT P.*
FROM Profiles P
  INNER JOIN FavoriteMusic M ON M.ProfileID = P.ProfileID
WHERE M.Music = 'The Killers'

#### List all the Finance students who like the book “1984”

In [None]:
%%read_sql
SELECT P.*
FROM Profiles P
  INNER JOIN FavoriteBooks B ON B.ProfileID = P.ProfileID
WHERE B.Book = '1984'

## Self Joins

#### Switch to IMDb

In [None]:
%%read_sql
USE imdb;  

#### List the movies in the database that have both drama and comedy listed among their genres

In [None]:
%%read_sql
SELECT *
FROM movies M
  INNER JOIN movies_genres G1 ON G1.movie_id = M.id
  INNER JOIN movies_genres G2 ON G2.movie_id = M.id
WHERE G1.genre = 'Drama' AND G2.genre = 'Comedy'

#### Switch to Facebook

In [None]:
%%read_sql
USE facebook;  

#### List the Profile IDs and for students majoring in computer science and another concentration (Concentration table); show the second concentration as well


In [None]:
%%read_sql
SELECT *
FROM Concentration C1
  INNER JOIN Concentration C2 ON C1.ProfileID = C2.ProfileID
WHERE C1.Concentration = 'Computer Science' AND C2.Concentration != 'Computer Science'

## Outer Joins

#### Switch to IMDB

In [None]:
%%read_sql 
USE imdb;

#### List all the movies without actors

In [None]:
%%read_sql
SELECT M.*, R.*
FROM movies M
  LEFT JOIN roles R ON M.id = R.movie_id
WHERE R.movie_id IS NULL

#### List all the movies without an associated genre

In [None]:
%%read_sql
SELECT M.*
FROM movies M
  LEFT JOIN movies_genres G ON M.id = G.movie_id
WHERE G.movie_id IS NULL

#### List all the Students that have not listed a Concentration

In [None]:
%%read_sql 
USE facebook;

In [None]:
%%read_sql
SELECT P.*, C.*
FROM Profiles P
  LEFT JOIN Concentration C ON P.ProfileID = C.ProfileID
  WHERE C.ProfileID IS NULL 