## Basic SQL SELECT queries

Queries are run against dvdrental tutorial database.

In [1]:
from sqlalchemy import create_engine
import pandas as pd

In [2]:
engine_str = "postgresql+psycopg2://docker:docker@0.0.0.0:25432/dvdrental"
engine = create_engine(engine_str)

In [3]:
def do_sql(sql):
    with engine.connect() as con:
        df = pd.read_sql(sql, con=con)
    return df

Selet one table's first 100 rows

In [4]:
sql = "SELECT * FROM actor LIMIT 100;"
do_sql(sql)

Unnamed: 0,actor_id,first_name,last_name,last_update
0,1,Penelope,Guiness,2013-05-26 14:47:57.620
1,2,Nick,Wahlberg,2013-05-26 14:47:57.620
2,3,Ed,Chase,2013-05-26 14:47:57.620
3,4,Jennifer,Davis,2013-05-26 14:47:57.620
4,5,Johnny,Lollobrigida,2013-05-26 14:47:57.620
...,...,...,...,...
95,95,Daryl,Wahlberg,2013-05-26 14:47:57.620
96,96,Gene,Willis,2013-05-26 14:47:57.620
97,97,Meg,Hawke,2013-05-26 14:47:57.620
98,98,Chris,Bridges,2013-05-26 14:47:57.620


All customers whose firname is Kelly or Tony

In [5]:
sql = (
    "SELECT * FROM customer "
    "WHERE "
    "   first_name = 'Tony' "
    "OR "
    "   first_name='Kelly';"
)
do_sql(sql)


Unnamed: 0,customer_id,store_id,first_name,last_name,email,address_id,activebool,create_date,last_update,active
0,67,1,Kelly,Torres,kelly.torres@sakilacustomer.org,71,True,2006-02-14,2013-05-26 14:49:45.738,1
1,401,2,Tony,Carranza,tony.carranza@sakilacustomer.org,406,True,2006-02-14,2013-05-26 14:49:45.738,1
2,546,1,Kelly,Knott,kelly.knott@sakilacustomer.org,552,True,2006-02-14,2013-05-26 14:49:45.738,1


All films longer than two hours.

In [6]:
sql = (
    "SELECT film_id, title, length FROM film "
    "WHERE length > 120;"
)
do_sql(sql)

Unnamed: 0,film_id,title,length
0,5,African Egg,130
1,6,Agent Truman,169
2,11,Alamo Videotape,126
3,12,Alaska Phantom,136
4,13,Ali Forever,150
...,...,...,...
452,991,Worst Banger,185
453,992,Wrath Mile,176
454,993,Wrong Behavior,178
455,996,Young Language,183


The titles of films whose description contains "Drama" or "Documentary"

In [7]:
# double up percent sign for SQLAlchemy
sql = (
    "SELECT film_id, title, description FROM film "
    "WHERE "
	"description SIMILAR TO '%%(Documentary|Drama)%%';"
)
do_sql(sql)

Unnamed: 0,film_id,title,description
0,384,Grosse Wonderful,A Epic Drama of a Cat And a Explorer who must ...
1,1,Academy Dinosaur,A Epic Drama of a Feminist And a Mad Scientist...
2,4,Affair Prejudice,A Fanciful Documentary of a Frisbee And a Lumb...
3,5,African Egg,A Fast-Paced Documentary of a Pastry Chef And ...
4,13,Ali Forever,A Action-Packed Drama of a Dentist And a Croco...
...,...,...,...
202,981,Wolves Desire,A Fast-Paced Drama of a Squirrel And a Robot w...
203,982,Women Dorado,A Insightful Documentary of a Waitress And a B...
204,983,Won Dares,A Unbelieveable Documentary of a Teacher And a...
205,991,Worst Banger,A Thrilling Drama of a Madman And a Dentist wh...


The total number and average length of drama and documentary films , respectively .

In [8]:
sql = """
SELECT COUNT(*), avg(length) as avg_length, 'Documentary' as film_type FROM film
WHERE 
	description SIMILAR TO '%%(Documentary)%%'
UNION
SELECT COUNT(*), avg(length) as avg_length, 'Drama' as film_type FROM film
WHERE 
	description SIMILAR TO '%%(Drama)%%';
"""
do_sql(sql)

Unnamed: 0,count,avg_length,film_type
0,101,115.564356,Documentary
1,106,118.160377,Drama


The most common first names of customers and the most common last names of actors.

In [9]:
sql = """
SELECT first_name, COUNT(*) as counted FROM customer
GROUP BY first_name
ORDER BY counted DESC;
"""
do_sql(sql)

Unnamed: 0,first_name,counted
0,Marion,2
1,Jamie,2
2,Tracy,2
3,Kelly,2
4,Leslie,2
...,...,...
586,Edward,1
587,Cindy,1
588,Amy,1
589,Earl,1


In [10]:
sql = """
SELECT last_name, COUNT(*) as counted FROM actor
GROUP BY last_name
ORDER BY counted DESC;
"""
do_sql(sql)

Unnamed: 0,last_name,counted
0,Kilmer,5
1,Temple,4
2,Nolte,4
3,Williams,3
4,Peck,3
...,...,...
116,Carrey,1
117,Astaire,1
118,Hope,1
119,Walken,1
