## Connecting to SQL databases

In [1]:
import pandas as pd
import numpy as np

In [7]:
from sqlalchemy import create_engine

engine = create_engine("sqlite:///../python_cookbook/data/chinook.db")

In [9]:
tracks = pd.read_sql_table("tracks", engine)
tracks

Unnamed: 0,TrackId,Name,AlbumId,MediaTypeId,GenreId,Composer,Milliseconds,Bytes,UnitPrice
0,1,For Those About To Rock (We Salute You),1,1,1,"Angus Young, Malcolm Young, Brian Johnson",343719,11170334,0.99
1,2,Balls to the Wall,2,2,1,,342562,5510424,0.99
2,3,Fast As a Shark,3,2,1,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",230619,3990994,0.99
3,4,Restless and Wild,3,2,1,"F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...",252051,4331779,0.99
4,5,Princess of the Dawn,3,2,1,Deaffy & R.A. Smith-Diesel,375418,6290521,0.99
...,...,...,...,...,...,...,...,...,...
3498,3499,Pini Di Roma (Pinien Von Rom) \ I Pini Della V...,343,2,24,,286741,4718950,0.99
3499,3500,"String Quartet No. 12 in C Minor, D. 703 ""Quar...",344,2,24,Franz Schubert,139200,2283131,0.99
3500,3501,"L'orfeo, Act 3, Sinfonia (Orchestra)",345,2,24,Claudio Monteverdi,66639,1189062,0.99
3501,3502,"Quintet for Horn, Violin, 2 Violas, and Cello ...",346,2,24,Wolfgang Amadeus Mozart,221331,3665114,0.99


In [10]:
(pd.read_sql_table("genres", engine)
    .merge(tracks[["GenreId", "Milliseconds"]], on="GenreId", how="left")
    .drop("GenreId", axis="columns")
)

Unnamed: 0,Name,Milliseconds
0,Rock,343719
1,Rock,342562
2,Rock,230619
3,Rock,252051
4,Rock,375418
...,...,...
3498,Classical,286741
3499,Classical,139200
3500,Classical,66639
3501,Classical,221331


In [26]:
(pd.read_sql_table("genres", engine)
    .merge(tracks[["GenreId", "Milliseconds"]], on="GenreId", how="left")
    .drop("GenreId", axis="columns")
    .groupby("Name")
    ["Milliseconds"]
    .mean()
    .pipe(lambda s_: pd.to_timedelta(s_, unit="ms"))
    .dt.floor("s")
    .sort_values()
    .reset_index()
)

Unnamed: 0,Name,Milliseconds
0,Rock And Roll,0 days 00:02:14
1,Opera,0 days 00:02:54
2,Hip Hop/Rap,0 days 00:02:58
3,Easy Listening,0 days 00:03:09
4,Bossa Nova,0 days 00:03:39
5,R&B/Soul,0 days 00:03:40
6,World,0 days 00:03:44
7,Pop,0 days 00:03:49
8,Latin,0 days 00:03:52
9,Alternative & Punk,0 days 00:03:54


In [21]:
cust = pd.read_sql_table("customers", engine, columns=["CustomerId", "FirstName", "LastName"])
invoice = pd.read_sql_table("invoices", engine, columns=["InvoiceId", "CustomerId"])
invoice_items = pd.read_sql_table("invoice_items", engine, columns=["InvoiceId", "UnitPrice", "Quantity"])

(cust
    .merge(invoice, on="CustomerId")
    .merge(invoice_items, on="InvoiceId")
)

Unnamed: 0,CustomerId,FirstName,LastName,InvoiceId,UnitPrice,Quantity
0,1,Luís,Gonçalves,98,1.99,1
1,1,Luís,Gonçalves,98,1.99,1
2,1,Luís,Gonçalves,121,0.99,1
3,1,Luís,Gonçalves,121,0.99,1
4,1,Luís,Gonçalves,121,0.99,1
...,...,...,...,...,...,...
2235,59,Puja,Srivastava,284,0.99,1
2236,59,Puja,Srivastava,284,0.99,1
2237,59,Puja,Srivastava,284,0.99,1
2238,59,Puja,Srivastava,284,0.99,1


In [30]:
(cust
    .merge(invoice, on="CustomerId")
    .merge(invoice_items, on="InvoiceId")
    .assign(Total=lambda df_: df_.Quantity * df_.UnitPrice)
    .groupby(["CustomerId", "FirstName", "LastName"])
    ["Total"]
    .sum()
    .sort_values(ascending=False)
    .reset_index()
)

Unnamed: 0,CustomerId,FirstName,LastName,Total
0,6,Helena,Holý,49.62
1,26,Richard,Cunningham,47.62
2,57,Luis,Rojas,46.62
3,45,Ladislav,Kovács,45.62
4,46,Hugh,O'Reilly,45.62
5,37,Fynn,Zimmermann,43.62
6,24,Frank,Ralston,43.62
7,28,Julia,Barnett,43.62
8,25,Victor,Stevens,42.62
9,7,Astrid,Gruber,42.62


In [27]:
sql_string1 = """
SELECT
    Name,
    time(avg(Milliseconds) / 1000, 'unixepoch') as avg_time
FROM (
    SELECT
        g.Name, t.Milliseconds
    FROM
        genres as g
    JOIN
        tracks as t on g.genreid == t.genreid
        )
GROUP BY Name
ORDER BY avg_time"""

pd.read_sql_query(sql_string1, engine)

Unnamed: 0,Name,avg_time
0,Rock And Roll,00:02:14
1,Opera,00:02:54
2,Hip Hop/Rap,00:02:58
3,Easy Listening,00:03:09
4,Bossa Nova,00:03:39
5,R&B/Soul,00:03:40
6,World,00:03:44
7,Pop,00:03:49
8,Latin,00:03:52
9,Alternative & Punk,00:03:54


In [29]:
sql_string2 = """
SELECT
    c.customerId, c.FirstName, c.LastName, sum(ii.quantity * ii.unitprice) as Total
FROM
    customers as c
JOIN
    invoices as i
    on c.customerid = i.customerid
JOIN
    invoice_items as ii
    on i.invoiceid = ii.invoiceid
GROUP BY
    c.customerid, c.FirstName, c.LastName
ORDER BY
    Total desc"""

pd.read_sql_query(sql_string2, engine)

Unnamed: 0,CustomerId,FirstName,LastName,Total
0,6,Helena,Holý,49.62
1,26,Richard,Cunningham,47.62
2,57,Luis,Rojas,46.62
3,45,Ladislav,Kovács,45.62
4,46,Hugh,O'Reilly,45.62
5,37,Fynn,Zimmermann,43.62
6,24,Frank,Ralston,43.62
7,28,Julia,Barnett,43.62
8,25,Victor,Stevens,42.62
9,7,Astrid,Gruber,42.62
