In [1]:
import pandas as pd
import numpy as np
import sqlite3 as sql
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In this Notebook I will practice more complicated SQL queries. To do this I wil, connect to [Chinook Database](https://github.com/lerocha/chinook-database). This is sample database that represent a digital media store. Database model is shown below.

![chinook_schema](https://user-images.githubusercontent.com/51002300/71028739-2e674500-210e-11ea-86c4-eff1258c738f.JPG)

In [88]:
connection = sql.connect('chinook.db')

In [89]:
query = '''SELECT * FROM sqlite_master'''
pd.read_sql_query(query, connection)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,album,album,2,CREATE TABLE [album]\n(\n [album_id] INTEGE...
1,table,artist,artist,3,CREATE TABLE [artist]\n(\n [artist_id] INTE...
2,table,customer,customer,4,CREATE TABLE [customer]\n(\n [customer_id] ...
3,table,employee,employee,5,CREATE TABLE [employee]\n(\n [employee_id] ...
4,table,genre,genre,6,CREATE TABLE [genre]\n(\n [genre_id] INTEGE...
5,table,invoice,invoice,7,CREATE TABLE [invoice]\n(\n [invoice_id] IN...
6,table,invoice_line,invoice_line,8,CREATE TABLE [invoice_line]\n(\n [invoice_l...
7,table,media_type,media_type,9,CREATE TABLE [media_type]\n(\n [media_type_...
8,table,playlist,playlist,10,CREATE TABLE [playlist]\n(\n [playlist_id] ...
9,table,playlist_track,playlist_track,11,CREATE TABLE [playlist_track]\n(\n [playlis...


Query to gather some information on a specific purchase:
- id of the track
- name of the track
- name of media type of the track
- price that the customer paid for the track
- quantity of the track that was purchased

In [90]:
query = '''SELECT * FROM invoice_line'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,invoice_line_id,invoice_id,track_id,unit_price,quantity
0,1,1,1158,0.99,1
1,2,1,1159,0.99,1
2,3,1,1160,0.99,1


In [91]:
query = '''SELECT * FROM track'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,track_id,name,album_id,media_type_id,genre_id,composer,milliseconds,bytes,unit_price
0,1,For Those About To Rock (We Salute You),1,1,1,"Angus Young, Malcolm Young, Brian Johnson",343719,11170334,0.99
1,2,Balls to the Wall,2,2,1,,342562,5510424,0.99
2,3,Fast As a Shark,3,2,1,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",230619,3990994,0.99


In [92]:
query = '''SELECT * FROM media_type'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,media_type_id,name
0,1,MPEG audio file
1,2,Protected AAC audio file
2,3,Protected MPEG-4 video file


##### Find invoice id

In [93]:
query = '''SELECT * FROM invoice_line
            WHERE invoice_id == 3'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,invoice_line_id,invoice_id,track_id,unit_price,quantity
0,27,3,2516,0.99,1
1,28,3,2646,0.99,1


##### Add tracks data

In [94]:
query = '''SELECT * FROM invoice_line il
            INNER JOIN track t ON t.track_id = il.track_id 
            WHERE invoice_id == 3'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,invoice_line_id,invoice_id,track_id,unit_price,quantity,track_id.1,name,album_id,media_type_id,genre_id,composer,milliseconds,bytes,unit_price.1
0,27,3,2516,0.99,1,2516,Black Hole Sun,203,1,1,Soundgarden,320365,10425229,0.99
1,28,3,2646,0.99,1,2646,I Looked At You,214,1,1,"Robby Krieger, Ray Manzarek, John Densmore, Ji...",142080,4663988,0.99


##### Add media_type data

In [95]:
query = '''SELECT * FROM invoice_line il
            INNER JOIN track t ON t.track_id = il.track_id 
            INNER JOIN media_type mt ON mt.media_type_id = t.media_type_id
            WHERE invoice_id == 3'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,invoice_line_id,invoice_id,track_id,unit_price,quantity,track_id.1,name,album_id,media_type_id,genre_id,composer,milliseconds,bytes,unit_price.1,media_type_id.1,name.1
0,27,3,2516,0.99,1,2516,Black Hole Sun,203,1,1,Soundgarden,320365,10425229,0.99,1,MPEG audio file
1,28,3,2646,0.99,1,2646,I Looked At You,214,1,1,"Robby Krieger, Ray Manzarek, John Densmore, Ji...",142080,4663988,0.99,1,MPEG audio file


Query that gathers data about the invoice with an invoice_id of 4
- id of the track, track_id.
- name of the track, track_name.
- name of media type of the track, track_type.
- price that the customer paid for the track, unit_price.
- quantity of the track that was purchased, quantity.

In [96]:
query = '''SELECT 
                t.track_id, 
                t.name track_name, 
                mt.name track_type,
                il.unit_price,
                il.quantity
            FROM invoice_line il
            INNER JOIN track t ON t.track_id = il.track_id
            INNER JOIN media_type mt ON mt.media_type_id = t.media_type_id
            WHERE il.invoice_id == 4'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,track_id,track_name,track_type,unit_price,quantity
0,3448,"Lamentations of Jeremiah, First Set \ Incipit ...",Protected AAC audio file,0.99,1
1,2560,Violent Pornography,MPEG audio file,0.99,1
2,3336,War Pigs,Purchased AAC audio file,0.99,1


##### Adding the artist for each track

In [97]:
query = '''SELECT 
                t.track_id, 
                t.name track_name, 
                mt.name track_type,
                il.unit_price,
                il.quantity,
                ar.name
            FROM invoice_line il
            INNER JOIN track t ON t.track_id = il.track_id
            INNER JOIN media_type mt ON mt.media_type_id = t.media_type_id
            INNER JOIN album a ON a.album_id = t.album_id
            INNER JOIN artist ar ON ar.artist_id = a.artist_id
            WHERE il.invoice_id == 4'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,track_id,track_name,track_type,unit_price,quantity,name
0,3448,"Lamentations of Jeremiah, First Set \ Incipit ...",Protected AAC audio file,0.99,1,The King's Singers
1,2560,Violent Pornography,MPEG audio file,0.99,1,System Of A Down
2,3336,War Pigs,Purchased AAC audio file,0.99,1,Cake


##### Query that lists the top 10 artists, calculated by the number of times a track by that artist has been purchased

##### Subquery

In [98]:
query = '''SELECT 
                t.track_id, 
                ar.name artist_name
            FROM track t
            INNER JOIN album al ON al.album_id = t.album_id
            INNER JOIN artist ar ON ar.artist_id = al.artist_id
            ORDER BY 1'''
pd.read_sql_query(query, connection).head(3)

Unnamed: 0,track_id,artist_name
0,1,AC/DC
1,2,Accept
2,3,Accept


In [99]:
query = '''SELECT
                ta.artist_name artist,
                COUNT(*) tracks_purchased
            FROM invoice_line il
            INNER JOIN (
                        SELECT 
                            t.track_id, 
                            ar.name artist_name
                        FROM track t
                        INNER JOIN album al ON al.album_id = t.album_id
                        INNER JOIN artist ar ON ar.artist_id = al.artist_id
                        ORDER BY 1
                        ) ta
                        ON ta.track_id = il.track_id
            GROUP BY 1
            ORDER BY 2 DESC
                        '''
pd.read_sql_query(query, connection).head(10)

Unnamed: 0,artist,tracks_purchased
0,Queen,192
1,Jimi Hendrix,187
2,Nirvana,130
3,Red Hot Chili Peppers,130
4,Pearl Jam,129
5,AC/DC,124
6,Guns N' Roses,124
7,Foo Fighters,121
8,The Rolling Stones,117
9,Metallica,106


##### Query that returns the top 5 albums, as calculated by the number of times a track from that album has been purchased

In [100]:
query = '''SELECT
                ta.album_name album,
                ta.artist_name artist,
                COUNT(*) tracks_purchased
            FROM invoice_line il
            INNER JOIN (
                        SELECT 
                            t.track_id, 
                            al.title album_name,
                            ar.name artist_name
                        FROM track t
                        INNER JOIN album al ON al.album_id = t.album_id
                        INNER JOIN artist ar ON ar.artist_id = al.artist_id
                        ORDER BY 1
                        ) ta
                        ON ta.track_id = il.track_id
            GROUP BY 1
            ORDER BY 3 DESC
                        '''
pd.read_sql_query(query, connection).head(5)

Unnamed: 0,album,artist,tracks_purchased
0,Are You Experienced?,Jimi Hendrix,187
1,Faceless,Godsmack,96
2,Mezmerize,System Of A Down,93
3,Get Born,JET,90
4,The Doors,The Doors,83


#### Recursive join

In [101]:
query = '''SELECT  * FROM employee 
                        '''
pd.read_sql_query(query, connection).head(2)

Unnamed: 0,employee_id,last_name,first_name,title,reports_to,birthdate,hire_date,address,city,state,country,postal_code,phone,fax,email
0,1,Adams,Andrew,General Manager,,1962-02-18 00:00:00,2016-08-14 00:00:00,11120 Jasper Ave NW,Edmonton,AB,Canada,T5K 2N1,+1 (780) 428-9482,+1 (780) 428-3457,andrew@chinookcorp.com
1,2,Edwards,Nancy,Sales Manager,1.0,1958-12-08 00:00:00,2016-05-01 00:00:00,825 8 Ave SW,Calgary,AB,Canada,T2P 2T3,+1 (403) 262-3443,+1 (403) 262-3322,nancy@chinookcorp.com


In [102]:
query = '''SELECT  e1.first_name|| " " || e1.last_name employee_name, 
                   e1.title employee_title, 
                   e2.first_name || " " || e2.last_name supervisor_name, 
                   e2. title supervisor_title
            FROM employee e1
            LEFT JOIN employee e2 ON e1.reports_to = e2.employee_id
            ORDER BY 1'''
pd.read_sql_query(query, connection).head(5)

Unnamed: 0,employee_name,employee_title,supervisor_name,supervisor_title
0,Andrew Adams,General Manager,,
1,Jane Peacock,Sales Support Agent,Nancy Edwards,Sales Manager
2,Laura Callahan,IT Staff,Michael Mitchell,IT Manager
3,Margaret Park,Sales Support Agent,Nancy Edwards,Sales Manager
4,Michael Mitchell,IT Manager,Andrew Adams,General Manager


##### Query to find "Jen"

In [103]:
query = '''SELECT
                first_name,
                last_name,
                phone
            FROM customer
            WHERE LOWER(first_name) LIKE LOWER("%Jen%")'''

pd.read_sql_query(query, connection).head(5)

Unnamed: 0,first_name,last_name,phone
0,Jennifer,Peterson,+1 (604) 688-2255


#### Case statement

In [104]:
query = '''SELECT * FROM media_type'''

pd.read_sql_query(query, connection).head(5)

Unnamed: 0,media_type_id,name
0,1,MPEG audio file
1,2,Protected AAC audio file
2,3,Protected MPEG-4 video file
3,4,Purchased AAC audio file
4,5,AAC audio file


In [105]:
query = '''SELECT 
            media_type_id,
            name,
            CASE
                WHEN name LIKE "%protected%" THEN 1
                ELSE 0
                END
                AS Protected
            FROM media_type'''

pd.read_sql_query(query, connection).head(5)

Unnamed: 0,media_type_id,name,Protected
0,1,MPEG audio file,0
1,2,Protected AAC audio file,1
2,3,Protected MPEG-4 video file,1
3,4,Purchased AAC audio file,0
4,5,AAC audio file,0


##### Summarizing each customer's purchases

In [106]:
query = '''SELECT
                c.first_name || " " || c.last_name customer_name,
                COUNT(*) number_of_purchases,
                SUM(i.total) total_spent,
                CASE
                    WHEN SUM(i.total) < 40 THEN "small spender"
                    WHEN SUM(i.total) > 100 THEN "big spender"
                    ELSE "regular"
                    END
                    AS customer_category
            FROM invoice i
            INNER JOIN customer c ON c.customer_id = i.customer_id
            group by i.customer_id
            order by customer_name
            '''

pd.read_sql_query(query, connection).head(10)

Unnamed: 0,customer_name,number_of_purchases,total_spent,customer_category
0,Aaron Mitchell,8,70.29,regular
1,Alexandre Rocha,10,69.3,regular
2,Astrid Gruber,9,69.3,regular
3,Bjørn Hansen,9,72.27,regular
4,Camille Bernard,9,79.2,regular
5,Daan Peeters,7,60.39,regular
6,Dan Miller,12,95.04,regular
7,Diego Gutiérrez,5,39.6,small spender
8,Dominique Lefebvre,9,72.27,regular
9,Eduardo Martins,12,60.39,regular


In [1]:
query = '''SELECT * FROM customer'''
pd.read_sql_query(query, connection).head(30)

NameError: name 'pd' is not defined

In [107]:
connection.close()