In [1]:
import sqlite3
import pandas as pd

In [2]:
# Create Connection
con = sqlite3.connect('./data/Chinook_Sqlite.sqlite')

In [3]:
# Create Cursor
cursor = con.cursor()

In [4]:
# Create Query
album_count = """
SELECT COUNT()
FROM Album
"""

In [5]:
# Execute Query
cursor.execute(album_count)

<sqlite3.Cursor at 0x13e24f040>

In [6]:
# Return Query
count = cursor.fetchall()

In [7]:
print(type(count))
count

<class 'list'>


[(347,)]

In [8]:
# Quick helper function
def return_query(query, cursor):
    cursor.execute(query)
    return cursor.fetchall()

In [9]:
album_query = """
SELECT COUNT(*)
FROM Album
"""

In [10]:
al_list = return_query(album_query, cursor)
al_list

[(347,)]

# A reminder of how to read into pandas

In [11]:
albums_df = pd.read_sql(album_query, con)

albums_df

Unnamed: 0,COUNT(*)
0,347


In [12]:
al_list[0]

(347,)

In [13]:
al_list[0][0]

347

In [14]:
# Schema!
schema_df = pd.read_sql("""

SELECT *
FROM sqlite_master

""", con)

schema_df

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,Album,Album,19,CREATE TABLE [Album]\n(\n [AlbumId] INTEGER...
1,table,Artist,Artist,281,CREATE TABLE [Artist]\n(\n [ArtistId] INTEG...
2,table,Customer,Customer,386,CREATE TABLE [Customer]\n(\n [CustomerId] I...
3,table,Employee,Employee,392,CREATE TABLE [Employee]\n(\n [EmployeeId] I...
4,table,Genre,Genre,395,CREATE TABLE [Genre]\n(\n [GenreId] INTEGER...
5,table,Invoice,Invoice,396,CREATE TABLE [Invoice]\n(\n [InvoiceId] INT...
6,table,InvoiceLine,InvoiceLine,399,CREATE TABLE [InvoiceLine]\n(\n [InvoiceLin...
7,table,MediaType,MediaType,402,CREATE TABLE [MediaType]\n(\n [MediaTypeId]...
8,table,Playlist,Playlist,404,CREATE TABLE [Playlist]\n(\n [PlaylistId] I...
9,table,PlaylistTrack,PlaylistTrack,405,CREATE TABLE [PlaylistTrack]\n(\n [Playlist...


In [16]:
name_list = list(schema_df[ schema_df['type'] == 'table']['name'].values)
name_list

['Album',
 'Artist',
 'Customer',
 'Employee',
 'Genre',
 'Invoice',
 'InvoiceLine',
 'MediaType',
 'Playlist',
 'PlaylistTrack',
 'Track']

In [17]:
# This will create a new dataframe for each table in the database. This allows for easy exploration of the data.
# First, create the query with the format statement
# Create a temporary table with the pd.read_sql
# Create the variable name by concatenating the table name and '_df'
# Use the globals() to access the table_df name and assign the the temp df to it

for name in name_list:
    
    query = 'SELECT * from {}'.format(name)
    table_df = pd.read_sql(query, con)
    df_name=name+'_df'
    globals()[df_name]=table_df

In [18]:
# Create a list of dataframe names
df_list = [name+'_df' for name in name_list]

In [19]:
# Print out all the tables to provide a reference
# Please note that the exec() command is a rather powerful tool, and should be used lightly

for df in df_list:
    command = 'display({}.head())'.format(df)
    print(f'Table {df}')
    exec(command)

Table Album_df


Unnamed: 0,AlbumId,Title,ArtistId
0,1,For Those About To Rock We Salute You,1
1,2,Balls to the Wall,2
2,3,Restless and Wild,2
3,4,Let There Be Rock,1
4,5,Big Ones,3


Table Artist_df


Unnamed: 0,ArtistId,Name
0,1,AC/DC
1,2,Accept
2,3,Aerosmith
3,4,Alanis Morissette
4,5,Alice In Chains


Table Customer_df


Unnamed: 0,CustomerId,FirstName,LastName,Company,Address,City,State,Country,PostalCode,Phone,Fax,Email,SupportRepId
0,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,+55 (12) 3923-5566,luisg@embraer.com.br,3
1,2,Leonie,Köhler,,Theodor-Heuss-Straße 34,Stuttgart,,Germany,70174,+49 0711 2842222,,leonekohler@surfeu.de,5
2,3,François,Tremblay,,1498 rue Bélanger,Montréal,QC,Canada,H2G 1A7,+1 (514) 721-4711,,ftremblay@gmail.com,3
3,4,Bjørn,Hansen,,Ullevålsveien 14,Oslo,,Norway,0171,+47 22 44 22 22,,bjorn.hansen@yahoo.no,4
4,5,František,Wichterlová,JetBrains s.r.o.,Klanova 9/506,Prague,,Czech Republic,14700,+420 2 4172 5555,+420 2 4172 5555,frantisekw@jetbrains.com,4


Table Employee_df


Unnamed: 0,EmployeeId,LastName,FirstName,Title,ReportsTo,BirthDate,HireDate,Address,City,State,Country,PostalCode,Phone,Fax,Email
0,1,Adams,Andrew,General Manager,,1962-02-18 00:00:00,2002-08-14 00:00:00,11120 Jasper Ave NW,Edmonton,AB,Canada,T5K 2N1,+1 (780) 428-9482,+1 (780) 428-3457,andrew@chinookcorp.com
1,2,Edwards,Nancy,Sales Manager,1.0,1958-12-08 00:00:00,2002-05-01 00:00:00,825 8 Ave SW,Calgary,AB,Canada,T2P 2T3,+1 (403) 262-3443,+1 (403) 262-3322,nancy@chinookcorp.com
2,3,Peacock,Jane,Sales Support Agent,2.0,1973-08-29 00:00:00,2002-04-01 00:00:00,1111 6 Ave SW,Calgary,AB,Canada,T2P 5M5,+1 (403) 262-3443,+1 (403) 262-6712,jane@chinookcorp.com
3,4,Park,Margaret,Sales Support Agent,2.0,1947-09-19 00:00:00,2003-05-03 00:00:00,683 10 Street SW,Calgary,AB,Canada,T2P 5G3,+1 (403) 263-4423,+1 (403) 263-4289,margaret@chinookcorp.com
4,5,Johnson,Steve,Sales Support Agent,2.0,1965-03-03 00:00:00,2003-10-17 00:00:00,7727B 41 Ave,Calgary,AB,Canada,T3B 1Y7,1 (780) 836-9987,1 (780) 836-9543,steve@chinookcorp.com


Table Genre_df


Unnamed: 0,GenreId,Name
0,1,Rock
1,2,Jazz
2,3,Metal
3,4,Alternative & Punk
4,5,Rock And Roll


Table Invoice_df


Unnamed: 0,InvoiceId,CustomerId,InvoiceDate,BillingAddress,BillingCity,BillingState,BillingCountry,BillingPostalCode,Total
0,1,2,2009-01-01 00:00:00,Theodor-Heuss-Straße 34,Stuttgart,,Germany,70174,1.98
1,2,4,2009-01-02 00:00:00,Ullevålsveien 14,Oslo,,Norway,0171,3.96
2,3,8,2009-01-03 00:00:00,Grétrystraat 63,Brussels,,Belgium,1000,5.94
3,4,14,2009-01-06 00:00:00,8210 111 ST NW,Edmonton,AB,Canada,T6G 2C7,8.91
4,5,23,2009-01-11 00:00:00,69 Salem Street,Boston,MA,USA,2113,13.86


Table InvoiceLine_df


Unnamed: 0,InvoiceLineId,InvoiceId,TrackId,UnitPrice,Quantity
0,1,1,2,0.99,1
1,2,1,4,0.99,1
2,3,2,6,0.99,1
3,4,2,8,0.99,1
4,5,2,10,0.99,1


Table MediaType_df


Unnamed: 0,MediaTypeId,Name
0,1,MPEG audio file
1,2,Protected AAC audio file
2,3,Protected MPEG-4 video file
3,4,Purchased AAC audio file
4,5,AAC audio file


Table Playlist_df


Unnamed: 0,PlaylistId,Name
0,1,Music
1,2,Movies
2,3,TV Shows
3,4,Audiobooks
4,5,90’s Music


Table PlaylistTrack_df


Unnamed: 0,PlaylistId,TrackId
0,1,3402
1,1,3389
2,1,3390
3,1,3391
4,1,3392


Table Track_df


Unnamed: 0,TrackId,Name,AlbumId,MediaTypeId,GenreId,Composer,Milliseconds,Bytes,UnitPrice
0,1,For Those About To Rock (We Salute You),1,1,1,"Angus Young, Malcolm Young, Brian Johnson",343719,11170334,0.99
1,2,Balls to the Wall,2,2,1,,342562,5510424,0.99
2,3,Fast As a Shark,3,2,1,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",230619,3990994,0.99
3,4,Restless and Wild,3,2,1,"F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...",252051,4331779,0.99
4,5,Princess of the Dawn,3,2,1,Deaffy & R.A. Smith-Diesel,375418,6290521,0.99


In [None]:
# Which Customer_id has the largest number of invoices?
query = """
None
"""
max_customer = pd.read_sql(query, con)
max_customer

In [None]:
# What Song is most often purchased

query  = """
None
"""


trackcount = pd.read_sql(query, con)
trackcount


In [None]:
# What is the average price of a track?

query = """
None
"""

avg_tracks = pd.read_sql(query, con)
avg_tracks


In [None]:
# Which employee has the most direct reports?

query = """
None
"""

most_reports = pd.read_sql(query, con)
most_reports

In [None]:
# In what city does the customer with the largest purchase amount live?

query = """
None
"""

largest = pd.read_sql(query, con)
largest

In [None]:
# What country has the largest amount of customers?

query = pd.read_sql("""
None
""", con)

country_largest_customers = pd.read_sql(query, con)
country_largest_customers

In [None]:
# What is the most popular song by country? Who is the artist?
query = """
None
"""

most_pop = pd.read_sql(query, con)
most_pop

In [None]:
# What is the most common artist by playlist? 

# What is the name of their most popular song?

query = """
None
"""

common_artist = pd.read_sql(query, con)
common_artist

In [None]:
# What song is in the most playlists?

# Who is the artist?

query = """
None
"""

most_playlists = pd.read_sql(query, con)
most_playlists