## SQL Queries

To be able to execute SQL queries from within a Jupyter notebook, we will use the `sql_magic` extension (https://github.com/pivotal/sql_magic)

    sudo pip3 install -U sql_magic

In [1]:
from sqlalchemy import create_engine

In [2]:
conn_string = 'mysql://{user}:{password}@{host}/?charset=utf8'.format(
    host = 'db.ipeirotis.org', 
    user = 'student',
    password = 'dwdstudent2015',
    encoding = 'utf-8')
engine = create_engine(conn_string)

In [3]:
%reload_ext sql_magic

In [4]:
%config SQL.conn_name = 'engine'

In [5]:
%%read_sql
show databases

Query started at 08:10:00 PM UTC; Query executed in 0.01 m

Unnamed: 0,Database
0,information_schema
1,bike_sharing
2,citibike
3,citibike_fall2017
4,facebook
5,imdb
6,music
7,nypd_complaints
8,public


In [6]:
%%read_sql
use imdb

Query started at 08:10:00 PM UTC; Query executed in 0.00 m

<sql_magic.exceptions.EmptyResult at 0x7f61034711d0>

In [7]:
%%read_sql
show tables

Query started at 08:10:00 PM UTC; Query executed in 0.00 m

Unnamed: 0,Tables_in_imdb
0,actors
1,directors
2,directors_genres
3,movies
4,movies_directors
5,movies_genres
6,roles


In [8]:
%%read_sql
describe actors

Query started at 08:10:00 PM UTC; Query executed in 0.00 m

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,id,int(11),NO,PRI,0.0,
1,first_name,varchar(100),YES,MUL,,
2,last_name,varchar(100),YES,MUL,,
3,gender,char(1),YES,,,


In [9]:
%%read_sql
describe roles

Query started at 08:10:00 PM UTC; Query executed in 0.00 m

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,actor_id,int(11),YES,MUL,,
1,movie_id,int(11),YES,MUL,,
2,role,varchar(100),YES,MUL,,


### SELECT Queries (IMDB database)

In [10]:
%%read_sql
USE imdb

Query started at 08:10:01 PM UTC; Query executed in 0.00 m

<sql_magic.exceptions.EmptyResult at 0x7f6103464d68>

#### Find all movie titles with id less than 100.

In [11]:
%%read_sql
SELECT * 
FROM movies 
WHERE id<100

Query started at 08:10:01 PM UTC; Query executed in 0.00 m

Unnamed: 0,id,name,year,rank
0,0,#28,2002,
1,1,"#7 Train: An Immigrant Journey, The",2000,
2,2,$,1971,6.4
3,3,"$1,000 Reward",1913,
4,4,"$1,000 Reward",1915,
...,...,...,...,...
91,95,'Cross the Mexican Line,1914,
92,96,'Den vide verden's billedbog nr. 1,1938,
93,97,'Deutschland' Leaving New York at Full Speed (...,1902,
94,98,"'Diamond S' Ranch, The",1912,


#### Find all information about movies that were released before 1895 (excl)

In [12]:
%%read_sql
SELECT *
FROM movies 
WHERE year<1895

Query started at 08:10:01 PM UTC; Query executed in 0.01 m

Unnamed: 0,id,name,year,rank
0,13261,"Amateur Gymnast, No. 2",1894,
1,17623,Annabelle Butterfly Dance,1894,5.3
2,17627,Annabelle Serpentine Dance,1894,6.1
3,17628,Annabelle Sun Dance,1894,
4,17770,Annie Oakley,1894,4.9
...,...,...,...,...
76,363640,Whirlwind Gun Spinning,1894,
77,364791,"Widder, The",1894,
78,368839,Wrestling,1892,2.0
79,368852,"Wrestling Dog, The",1894,


#### Find all information about movies that were released before 1895 and after 2006 (inclusive)

In [13]:
%%read_sql
SELECT *
FROM movies 
WHERE year<=1895 OR year>=2006

Query started at 08:10:01 PM UTC; Query executed in 0.00 m

Unnamed: 0,id,name,year,rank
0,899,10th & Wolf,2006,
1,1799,2176,2006,
2,2238,300,2006,
3,9086,Airborn,2006,
4,9495,Akrobatisches Potpourri,1895,5.2
...,...,...,...,...
351,369459,X-Men 3,2006,
352,372568,Young Griffo v. Battling Charles Barnett,1895,
353,374889,Zhmurki,2006,
354,400649,"""Pacific War, The""",2006,


#### Find all information about movies released between 1895 and 1898 (excl)

In [14]:
%%read_sql
SELECT *
FROM movies 
WHERE year>1895 AND year<1898

Query started at 08:10:01 PM UTC; Query executed in 0.00 m

Unnamed: 0,id,name,year,rank
0,56,"'Amphitrite', The",1897,
1,70,"'Brennus', Le",1897,
2,247,"'Standard' Picture Animated, A",1897,
3,1076,"13th Infantry, U.S. Army Marching Through Sall...",1897,
4,1077,"13th Infantry, U.S. Army, in Heavy Marching Or...",1897,
...,...,...,...,...
1093,372727,"Young Rivals, The",1897,
1094,375815,"Zoo van Antwerpen, De",1897,
1095,376589,"Z Pereira na Romaria de Santo Tirso, O",1896,
1096,377666,"cole des gendres, L'",1897,


#### Find all information about *actresses* who are have first name Skyler

In [15]:
%%read_sql
SELECT *
FROM actors
WHERE first_name = 'Skyler' AND gender = 'F';

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

Unnamed: 0,id,first_name,last_name,gender
0,556133,Skyler,Blankenship,F
1,570689,Skyler,Campbell,F
2,595403,Skyler,Day,F
3,773680,Skyler,Rose,F
4,776752,Skyler,Russell,F
5,789243,Skyler,Shaye,F


#### Find the director ID of Steven Spielberg

In [16]:
%%read_sql
SELECT id
FROM directors
WHERE first_name = 'Steven' AND last_name = 'Spielberg';

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

Unnamed: 0,id
0,75380


#### Find the director IDs and the first and last names of directors with the last name Spielberg and Hitchcock

In [17]:
%%read_sql
SELECT id, first_name, last_name
FROM directors
WHERE last_name = 'Spielberg' OR last_name = 'Hitchcock';

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

Unnamed: 0,id,first_name,last_name
0,34658,Alfred (I),Hitchcock
1,34659,Bill (II),Hitchcock
2,34660,Charles Webster,Hitchcock
3,34661,Peter,Hitchcock
4,34662,Victress,Hitchcock
5,75380,Steven,Spielberg


#### Find all genres of films and the corresponding probabilities for the director ID that corresponds to Steven Spielberg. Sort the results by probability.

In [18]:
%%read_sql
SELECT genre, prob
FROM directors_genres
WHERE director_id = '75380' # the id of Spielberg FROM above
ORDER BY prob DESC

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

Unnamed: 0,genre,prob
0,Drama,0.435897
1,Thriller,0.282051
2,Adventure,0.282051
3,Sci-Fi,0.25641
4,Action,0.230769
5,Horror,0.179487
6,Short,0.128205
7,Fantasy,0.102564
8,War,0.102564
9,Crime,0.102564


#### Find the id of the movie Schindler's List.

In [19]:
%%read_sql
SELECT *
FROM movies
WHERE name = "Schindler's List"

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

Unnamed: 0,id,name,year,rank
0,290070,Schindler's List,1993,8.8


#### List all the roles for the movie with id 290070. Sort them alphabetically

In [20]:
%%read_sql
SELECT *
FROM roles
WHERE movie_id = '290070'

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

Unnamed: 0,actor_id,movie_id,role
0,14688,290070,Auschwitz guard
1,18959,290070,Klaus Tauber (Gestapo clerk)
2,19022,290070,Julius Madritsch
3,21072,290070,Chaim Nowak
4,31611,290070,Himself/Schindler mourner
...,...,...,...
141,806842,290070,Ghetto woman
142,828128,290070,Brinnlitz girl
143,839849,290070,Clara Sternberg
144,841110,290070,Plaszow Jewish girl


### SELECT Queries (Facebook database)

In [21]:
%%read_sql
USE facebook

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

<sql_magic.exceptions.EmptyResult at 0x7f6103437860>

#### Find all names of students FROM the Profiles table

In [22]:
%%read_sql
SELECT name
FROM Profiles

Query started at 08:10:02 PM UTC; Query executed in 0.00 m

Unnamed: 0,name
0,The Creator
1,Brian Whitton
2,Anita Nagwani
3,Sunny Kim
4,Ariel Podwal
...,...
25779,Jen Slovin
25780,Kumi Dikengil
25781,Sky Gaven
25782,Eli Brown


#### Get the names and sex of all liberal students

In [23]:
%%read_sql
SELECT name, sex
FROM Profiles
WHERE PoliticalViews ='Liberal'

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,name,sex
0,Stephanie Lai,Female
1,Josh Fern,Male
2,Blaine Davis,Male
3,Sacha Kenton,Female
4,Kelly Quann,Female
...,...,...
6456,Maggie Winterfeldt,
6457,Sonia Avila,
6458,Kat Redniss,Female
6459,Haby Barry,Female


#### Get the High Schools of the students in the database

In [24]:
%%read_sql
SELECT HighSchool
FROM Profiles

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,HighSchool
0,
1,George W.Hewlett High School
2,Ward Melville Senior High School '02
3,West Windsor Plainsboro High School South '03
4,Harrison High School '03
...,...
25779,Worcester Academy '04
25780,
25781,
25782,


#### Find all the possible political views, eliminating duplicate entries

In [25]:
%%read_sql
SELECT DISTINCT PoliticalViews
FROM Profiles

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,PoliticalViews
0,
1,Libertarian
2,Conservative
3,Liberal
4,Apathetic
5,Moderate
6,Very Liberal
7,Very Conservative
8,Other


#### Find all possible relationship statuses

In [26]:
%%read_sql
SELECT DISTINCT Status
FROM Relationship

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,Status
0,Single
1,In a Relationship
2,Married
3,In an Open Relationship
4,It's complicated
5,Engaged


#### Find all possible values for the “status” attribute in Profiles 

In [27]:
%%read_sql
SELECT DISTINCT Status
FROM Profiles

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,Status
0,Undergrad
1,Alumnus/Alumna
2,Faculty
3,Grad Student
4,
5,Staff
6,Summer Student


#### Find all possible values for the “Residence” attribute in Profiles, eliminating duplicates

In [28]:
%%read_sql
SELECT DISTINCT Residence
FROM Profiles

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,Residence
0,
1,Thirteenth Street 5AN
2,Palladium 524D
3,Carlyle Court
4,Palladium 524F
...,...
5084,Cliff Street 17F
5085,Lafayette Street 1203B
5086,Weinstein Hall 423
5087,Broome Street 706A


#### Find all students living in Palladium

In [29]:
%%read_sql
SELECT Name, Residence
FROM Profiles
WHERE Residence LIKE 'Palladium%%'

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,Name,Residence
0,Ariel Podwal,Palladium 524D
1,Josh Fern,Palladium 524F
2,Santosh Sateesh,Palladium 603
3,Holly Hamilton,Palladium
4,Yuriy Prilutskiy,Palladium 1114
...,...,...
776,David Laubach,Palladium
777,Michael Chang,Palladium 2302
778,Hiro Fukuda,Palladium
779,Elizabeth Demas,Palladium 827B


#### Find all students who attended Stuyvesant

In [30]:
%%read_sql
SELECT *
FROM Profiles
WHERE HighSchool LIKE 'Stuyvesant%%'

Query started at 08:10:03 PM UTC; Query executed in 0.00 m

Unnamed: 0,ProfileID,Name,MemberSince,LastUpdate,School,Status,Sex,Birthday,AIM,Website,PoliticalViews,Geography,HighSchool,HomeTown,HomeState,Residence,CurrentAddress,CurrentTown,CurrentState
0,800073,Jueyu Wang,2004-03-23,2006-01-08,NYU '07,Undergrad,Female,1985-06-02,myheadwuzbumped,,Moderate,,Stuyvesant High School '03,"Brooklyn, NY",,,,,
1,800077,Sean Brandt,2004-03-23,2005-09-23,NYU '07,Undergrad,Male,1985-07-17,yo soy el dorko,http://livejournal.com/~seanpictures,Liberal,,Stuyvesant High School '03,"Staten Island, NY",,,"105 Lincoln Rd., Apt. 3J Brooklyn, NY",,
2,800083,George Liu,2004-03-23,2005-12-30,NYU '07,Undergrad,Male,1985-11-11,MEGAmorphG,,Apathetic,,Stuyvesant High School '03,"New York, NY",,,,,
3,800092,Hong Li,2004-03-23,2006-01-08,NYU '07,Undergrad,Female,1985-12-28,lilhacp1228,,Liberal,,Stuyvesant High School '03,"Whitestone, NY",,,"195 E. 4th St New York, NY 10009",,
4,800356,Peter Naas,2004-03-26,2006-01-17,NYU,Undergrad,,1985-09-01,silent4042,http://en.wikipedia.org/wiki/Eddie_Slovi...,,,Stuyvesant High School '03,Cop Killer Queens,,Carlyle Court C310CA,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232,831501,Eliza Chan,2005-10-18,2005-11-02,NYU '09,Undergrad,Female,1987-11-05,BlackSerene,,,,Stuyvesant High School '05,"Brooklyn, NY 11229",,,,"Brooklyn, NY 11229",
233,831928,Jeff Strabone,2005-10-30,2006-01-12,NYU,Undergrad,,NaT,,http://www.lavasurfer.com/cereal-guide.h...,,,Stuyvesant High School,,,,,"Brooklyn, NY",
234,832061,Annie Chang,2005-11-03,2005-12-27,NYU,Undergrad,,NaT,,,,,Stuyvesant High School '05,,,,,,
235,832913,Elijah Kim,2005-11-26,2005-11-28,NYU '08,Grad Student,Male,1982-06-07,qcelij,,,"New York, NY",Stuyvesant High School '00,"Brooklyn, NY 11237",,,,,


#### Find all names that contain a digit

In [31]:
%%read_sql
SELECT * 
FROM Profiles 
WHERE name REGEXP '[0-9]+'

Query started at 08:10:04 PM UTC; Query executed in 0.00 m

Unnamed: 0,ProfileID,Name,MemberSince,LastUpdate,School,Status,Sex,Birthday,AIM,Website,PoliticalViews,Geography,HighSchool,HomeTown,HomeState,Residence,CurrentAddress,CurrentTown,CurrentState
0,817012,Tbone 3000,2004-12-12,NaT,NYU,Undergrad,,NaT,,,,,,,,,,,
1,817574,K 5,2004-12-26,2005-09-28,NYU '07,Undergrad,Male,NaT,kjobby12xxx,http://www.K5ive.com,Other,,Royal High '01,"Simi Valley, CA 93065",,,,,NY
2,818366,Arielle 4u,2005-01-19,2006-01-15,NYU '07,Undergrad,Female,1986-10-24,sexoholic blue u,http://five.flash-gear.com/npuz/puz.php?...,Very Liberal,,,,,Twentysixth Street 14F1,,,
3,819379,ML152,2005-02-14,2005-12-09,NYU '06,Grad Student,Male,1977-03-05,mattlobron,,,,,"Long Valley, NJ 07853",,,,"NY, NY",
4,819606,Panaflex 16,2005-02-21,2005-02-21,NYU,Alumnus/Alumna,,NaT,,http://www.panavision.com,,,,,,,,,
5,821186,g nupe the hazel eyed 1,2005-04-04,2006-01-18,NYU '05,Faculty,Male,NaT,g NUPE the hAZEL EYED,,Moderate,,,"new york, NY 10037",,,,,
6,823755,NYC Summer2005,2005-05-22,2006-01-18,NYU,Alumnus/Alumna,Male,NaT,mischa998,,,,,"New York City, NY",,,,"NYC, NY 10012",
7,827978,Ben 0viatt,2005-08-05,2006-01-13,NYU '09,Undergrad,Male,1986-08-17,TheCoolestMonkey,http://www.fotolog.net/bennigan/,Other,,Lander Valley High School '04,"Lander, WY 82520",,Hayden Hall 1011A,,,


#### Find all students with a name that contains a non-letter character, other than \- and \.

In [32]:
%%read_sql
SELECT * 
FROM Profiles 
WHERE name NOT REGEXP '^[A-z \-\.]+$'

Query started at 08:10:04 PM UTC; Query executed in 0.00 m

Unnamed: 0,ProfileID,Name,MemberSince,LastUpdate,School,Status,Sex,Birthday,AIM,Website,PoliticalViews,Geography,HighSchool,HomeTown,HomeState,Residence,CurrentAddress,CurrentTown,CurrentState
0,800794,Shula Ponet β™¥,2004-04-01,2006-01-15,NYU '06,Undergrad,Female,1983-10-24,shuls1024,,Liberal,,Hamden Hall Country Day School '02,"new haven, CT",,Alumni Hall,,,
1,802953,DontrΓ© L. Conerly,2004-04-20,2005-12-26,NYU '05,Alumnus/Alumna,Male,1983-02-07,majoliemoi,http://jadoremoi.easyjournal.com,Conservative,"New York, NY",Lawndale High School '01,"New Orleans, LA 70126",,,,,
2,803148,Rajiv/Jeeves Therealman,2004-04-21,2006-01-03,NYU '07,Undergrad,Male,1985-07-22,,,Moderate,,Boonton High School '03,"Boonton, NJ 07005",,,,,
3,803169,David NegrΓ³n,2004-04-21,2005-10-29,NYU '07,Undergrad,Male,1984-09-21,dbneg921,,Very Liberal,,Guilford High School '03,"Guilford, CT 06437",,,,"Brooklyn, NY 11222",
4,803349,Christopher T|M Burns,2004-04-22,2006-01-04,NYU '04,Alumnus/Alumna,Male,NaT,,http://www.sickabodsane.com,,"New York, NY",The Pennington Prep School '00,,,,,,
5,803696,β™¥ Rachel Doyle β™¥,2004-04-25,2006-01-09,NYU '05,Alumnus/Alumna,Female,1984-04-27,o doyle ru1ez,http://myspace.com/ambrosiaswirl,Liberal,"New York, NY",Albert Einstein High School,District of Corruption,,,"E. 5th & Avenue B, Manhattan",,
6,804467,ChloΓ© Bakalar,2004-04-29,2006-01-09,NYU '06,Undergrad,Female,1985-03-14,skittlesCLO,,Very Conservative,,Pine Crest School '03,,,,,,
7,805463,TrinΓ© Alimena,2004-05-07,2006-01-16,NYU '07,Undergrad,Female,1985-06-16,Quarterrican16,,Conservative,,,"Stamford, CT 06905",,Carlyle Court,,,
8,806663,Jeremy Greene (δΊΊ),2004-05-26,2006-01-16,NYU '07,Undergrad,Male,1984-11-26,Tirbz,,Liberal,,Winter Springs High School '03,"New York, NY 10011",,Thirteenth Street 3BS,47 W 13,,
9,807220,Margaux OpeΓ±a,2004-06-09,2005-10-23,NYU,Alumnus/Alumna,Female,1982-09-25,Roswelliscool,,,,Convent of the Sacred Heart High School '00,"San Francisco, CA",,,,,
