In [3]:
import pandas as pd

from sqlalchemy import (create_engine, 
                        inspect, 
                        text, 
                        select, 
                        MetaData, 
                        Table, 
                        and_,
                        or_,
                        desc,
                        asc,
                        func,
                        case,
                        cast,
                        Float,
                        )


# dialtect+driver:///username:password@host:port/database.db - connecting to an outside db

# dialect+driver:////filepath.db

chinook_engine = create_engine("sqlite+pysqlite:////workspaces/ERN-sessions/SQL Learning/chinook.db")
inspect(chinook_engine).get_table_names()

['albums',
 'artists',
 'customers',
 'employees',
 'genres',
 'invoice_items',
 'invoices',
 'media_types',
 'playlist_track',
 'playlists',
 'tracks']

In [4]:
# gravity_engine
gravity_engine = create_engine("sqlite+pysqlite:////workspaces/ERN-sessions/SQL Learning/gravity.db")
inspect(gravity_engine).get_table_names()

['address',
 'address_status',
 'author',
 'book',
 'book_author',
 'book_language',
 'country',
 'cust_order',
 'customer',
 'customer_address',
 'order_history',
 'order_line',
 'order_status',
 'publisher',
 'shipping_method']

In [29]:
chinook_metadata = MetaData()

tracks = Table('tracks', chinook_metadata, autoload_with=chinook_engine)
albums = Table('albums', chinook_metadata, autoload_with=chinook_engine)
artists = Table('artists', chinook_metadata, autoload_with=chinook_engine)

# do this for the gravity.db we want: book, author, book_author
# make a metadata obj. for gravity, make the three tables

gravity_metadata = MetaData()

books = Table('book', gravity_metadata, autoload_with=gravity_engine)
authors = Table('author', gravity_metadata, autoload_with=gravity_engine)
book_authors = Table('book_author', gravity_metadata, autoload_with=gravity_engine)

authors.c.keys()

['author_id', 'author_name']

In [15]:
with chinook_engine.connect() as con:
    stmt = select(tracks.c.Name, (tracks.c.Bytes / tracks.c.Milliseconds).label('Bytes per Millisecond'))
    print(stmt)
    result_proxy = con.execute(stmt)
    result = result_proxy.fetchmany(size=10)



# chinook engine for the first 10 tracks, make a new column that's:
# cost per unit time

with chinook_engine.connect() as con:
    stmt = select(tracks.c.Name, (tracks.c.UnitPrice / tracks.c.Milliseconds).label(
        'Cost per unit time (ms)'
    ))
    result_proxy = con.execute(stmt)
    result = result_proxy.fetchmany(size=10)

result_df = pd.DataFrame(result)
result_df

SELECT tracks."Name", tracks."Bytes" / CAST(tracks."Milliseconds" AS NUMERIC) AS "Bytes per Millisecond" 
FROM tracks


Unnamed: 0,Name,Cost per unit time (ms)
0,For Those About To Rock (We Salute You),2.8803e-06
1,Balls to the Wall,2.89e-06
2,Fast As a Shark,4.2928e-06
3,Restless and Wild,3.9278e-06
4,Princess of the Dawn,2.6371e-06
5,Put The Finger On You,4.8137e-06
6,Let's Get It Up,4.2321e-06
7,Inject The Venom,4.6956e-06
8,Snowballed,4.8744e-06
9,Evil Walks,3.7572e-06


In [19]:
# case(when this is true, do this)
# if we wrap case in function, we can do the function only when the case is true

with chinook_engine.connect() as con:
    album_1_length = func.sum(case((tracks.c.AlbumId == 1, tracks.c.Milliseconds), else_=0))
    stmt = select(album_1_length / ( 1000 * 60 ) )
    result_proxy = con.execute(stmt)
    result = result_proxy.scalar()

# print(result)


# work out length of album 1 as a percentage of entire library
# clause to get length of album 1
# clause to get length of entire library
# stmt to select the calcualtion to get length of album 1 over entire libraryr ad pct

with chinook_engine.connect() as con:
    album_1 = func.sum(case((tracks.c.AlbumId == 1, tracks.c.Milliseconds), else_=0))
    entire_lib = func.sum(tracks.c.Milliseconds)
    stmt = select((album_1 / entire_lib) * 100)
    result_proxy = con.execute(stmt)
    result = result_proxy.scalar()

print(result)

0.1740972753


In [28]:
# if there's a pre-determined relationship 
with chinook_engine.connect() as con:
    stmt = select(artists.c.Name, tracks)
    result_proxy = con.execute(stmt)
    result = result_proxy.fetchmany(size=10)

# result_df = pd.DataFrame(result)
# result_df

# no pre-determined
with chinook_engine.connect() as con:
    stmt = select(tracks.join(albums, tracks.c.AlbumId == albums.c.AlbumId))
    result_proxy = con.execute(stmt)
    result = result_proxy.fetchmany(size=20)

result_df = pd.DataFrame(result)
result_df

# 1. select a new table of book title with author name - using pre-detemrined
# 2. make a big table joining books to book_authors to authors - determine yourself




Unnamed: 0,TrackId,Name,AlbumId,MediaTypeId,GenreId,Composer,Milliseconds,Bytes,UnitPrice,AlbumId_1,Title,ArtistId
0,1,For Those About To Rock (We Salute You),1,1,1,"Angus Young, Malcolm Young, Brian Johnson",343719,11170334,0.99,1,For Those About To Rock We Salute You,1
1,6,Put The Finger On You,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",205662,6713451,0.99,1,For Those About To Rock We Salute You,1
2,7,Let's Get It Up,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",233926,7636561,0.99,1,For Those About To Rock We Salute You,1
3,8,Inject The Venom,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",210834,6852860,0.99,1,For Those About To Rock We Salute You,1
4,9,Snowballed,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",203102,6599424,0.99,1,For Those About To Rock We Salute You,1
5,10,Evil Walks,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",263497,8611245,0.99,1,For Those About To Rock We Salute You,1
6,11,C.O.D.,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",199836,6566314,0.99,1,For Those About To Rock We Salute You,1
7,12,Breaking The Rules,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",263288,8596840,0.99,1,For Those About To Rock We Salute You,1
8,13,Night Of The Long Knives,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",205688,6706347,0.99,1,For Those About To Rock We Salute You,1
9,14,Spellbound,1,1,1,"Angus Young, Malcolm Young, Brian Johnson",270863,8817038,0.99,1,For Those About To Rock We Salute You,1


In [33]:
with gravity_engine.connect() as con:
    stmt = select(books.join(book_authors, books.c.book_id == book_authors.c.book_id).join(authors))
    result_proxy = con.execute(stmt)
    result = result_proxy.fetchmany(size=10)

result_df = pd.DataFrame(result)
result_df

Unnamed: 0,book_id,title,isbn13,language_id,num_pages,publication_date,publisher_id,book_id_1,author_id,author_id_1,author_name
0,1570,Good Poems for Hard Times,9780143037675,1,344,2006-08-29,1476,1570,2823,2823,Garrison Keillor
1,10539,Baseball: a Literary Anthology,9781931082099,1,733,2002-03-04,1130,10539,6439,6439,Nicholas Dawidoff
2,8323,Plato: Complete Works,9780872203495,1,1838,1997-05-01,830,8323,7011,7011,Plato
3,7344,Thriller: Stories To Keep You Up All Night,9780778322993,1,568,2006-05-30,1270,7344,3760,3760,James Patterson
4,5762,The Best American Comics 2006,9780618718740,1,293,2006-10-01,984,5762,3237,3237,Harvey Pekar
5,4262,Masterpieces: The Best Science Fiction of the ...,9780441011339,1,422,2004-03-02,23,4262,6560,6560,Orson Scott Card
6,2217,Modern Classics of Science Fiction,9780312088477,1,672,1993-01-15,1878,2217,2819,2819,Gardner Dozois
7,10340,The Flying Sorcerers: More Comic Tales of Fantasy,9781857237252,1,383,1999-02-04,1403,10340,6857,6857,Peter Haining
8,7451,The Mammoth Book of New Jules Verne Adventures...,9780786714957,1,498,2005-02-15,1731,7451,6237,6237,Mike Ashley
9,9047,Everything I Needed to Know about Being a Girl...,9781416531043,1,275,2007-06-05,1564,9047,4000,4000,Jennifer O'Connell


In [None]:
# The task is to use SQLAlchemy and pandas and, any way you want, find out: 
# The name of the author who has the most books in the database, and what those books are.

# A stretch task for homework is to find out how many orders have been placed which contain books by that author
# for the stretch task, email me your answers.