# SQLite3

### Notes:
* [DB Browser for SQLite](http://sqlitebrowser.org/) is an excellent way to graphically browse the database

### Basic Concept

[Using sqlite3 Python module](https://docs.python.org/3.5/library/sqlite3.html?highlight=sqlite#module-sqlite3)

import sqlite3

In [2]:
# set up a connection with the database
conn = sqlite3.connect('sample_data/chinook.db')

In [3]:
# create a cursor object
c = conn.cursor()

In [4]:
# use the cursor object's execute() method to perform a query
x = c.execute("SELECT * FROM Album")

In [5]:
# print out all rows from the execute method
for row in x:
    print(row)

(1, 'For Those About To Rock We Salute You', 1)
(2, 'Balls to the Wall', 2)
(3, 'Restless and Wild', 2)
(4, 'Let There Be Rock', 1)
(5, 'Big Ones', 3)
(6, 'Jagged Little Pill', 4)
(7, 'Facelift', 5)
(8, 'Warner 25 Anos', 6)
(9, 'Plays Metallica By Four Cellos', 7)
(10, 'Audioslave', 8)
(11, 'Out Of Exile', 8)
(12, 'BackBeat Soundtrack', 9)
(13, 'The Best Of Billy Cobham', 10)
(14, 'Alcohol Fueled Brewtality Live! [Disc 1]', 11)
(15, 'Alcohol Fueled Brewtality Live! [Disc 2]', 11)
(16, 'Black Sabbath', 12)
(17, 'Black Sabbath Vol. 4 (Remaster)', 12)
(18, 'Body Count', 13)
(19, 'Chemical Wedding', 14)
(20, 'The Best Of Buddy Guy - The Millenium Collection', 15)
(21, 'Prenda Minha', 16)
(22, 'Sozinho Remix Ao Vivo', 16)
(23, 'Minha Historia', 17)
(24, 'Afrociberdelia', 18)
(25, 'Da Lama Ao Caos', 18)
(26, 'Acústico MTV [Live]', 19)
(27, 'Cidade Negra - Hits', 19)
(28, 'Na Pista', 20)
(29, 'Axé Bahia 2001', 21)
(30, 'BBC Sessions [Disc 1] [Live]', 22)
(31, 'Bongo Fury', 23)
(32, 'Carnaval 

In [6]:
# be sure to close the connection to the database when done
conn.close()

### Use-Case

##### Queries

In [7]:
# Use pandas to render table as a dataframe
import pandas as pd
import sqlite3

# create object to connect to database
conn = sqlite3.connect('sample_data/chinook.db')
conn

<sqlite3.Connection at 0x1173232d0>

In [8]:
# make any Query
query = "SELECT * FROM Album"

In [None]:
# render query as dataframe
df = pd.read_sql_query(query, conn)
df.head()

##### Commands - Adding rows

In [None]:
# create cursor object
c = conn.cursor()
c

In [None]:
# create a command to add something to the table
command = "INSERT INTO Album VALUES (3723,'grapes',273)"

# execute a query agaist the cursor object
c.execute(command)

In [None]:
# commit the changes to the database
conn.commit()

In [None]:
# render dataframe using the original query object
df = pd.read_sql(query, conn)
df.tail()

##### Commands - Delete rows

In [None]:
# create a command to delete something from the table
command = '''
DELETE FROM Album 
WHERE AlbumId in (3723, 353)
'''

# execute a query agaist the cursor object
c.execute(command)

In [None]:
# commit the changes to the database
conn.commit()

In [None]:
# render dataframe
df = pd.read_sql(query, conn)
df.tail()

In [None]:
# be sure to close the connection to the database when done
conn.close()

##### Commands - Add Schema (create a new database)

In [None]:
# set up the connection, import declaratinos and the cursor
import sqlite3
import pandas as pd

conn = sqlite3.connect('sample_data/UdaciousMusic.db')

c = conn.cursor()
c

In [None]:
# run the command
command = '''CREATE TABLE InvoiceLine3
(
    InvoiceLineId INTEGER PRIMARY KEY,
    InvoiceId INTEGER,
    TrackId INTEGER,
    UnitPrice NUMERIC(10,2),
    Quantity INTEGER,
    FOREIGN KEY (InvoiceId) REFERENCES Invoice (InvoiceId),
    FOREIGN KEY (TrackId) REFERENCES Track (TrackId)
);
'''

c.execute(command)

Add data to the new table:

In [None]:
sql_command_list = [
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (1, 1, 2, 0.99, 1);",
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (2, 1, 4, 0.99, 1);",
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (3, 2, 6, 0.99, 1);",
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (4, 2, 8, 0.99, 1);",
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (5, 2, 10, 0.99, 1);",
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (6, 2, 12, 0.99, 1);",
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (7, 3, 16, 0.99, 1);",
    "INSERT INTO [InvoiceLine2] ([InvoiceLineId], [InvoiceId], [TrackId], [UnitPrice], [Quantity]) VALUES (8, 4, 17, 1.99, 2);"
]
for command in sql_command_list:
    c.execute(command)

In [None]:
query = "SELECT * FROM InvoiceLine2"

df = pd.read_sql(query, conn)
df

In [None]:
# be sure to close the connection to the database when done
conn.close()

### Queries on the Chinook database

In [None]:
# set up the connection, import declaratinos and the cursor
import sqlite3
import pprint

# connect to database
conn = sqlite3.connect('sample_data/chinook.db')

# make cursor object
c = conn.cursor()
c

Create my own quick function for printing the rows from the queries:

In [None]:
def ppdb(query):
    for row in query:
        print(row)

In [None]:
##  First, you'd like to run a promotion targeting the 3 countries with the 
##  highest number of invoices.  

##  Write a query that returns the 3 countries with the highest number of invoices, along with the number 
##  of invoices for these countries.

# COUNT(*) essentially assignes a 1 to each row/column
# GROUP BY condences all instances of someting into a single one
# ORDER BY used with DESC imposes descending order based on aggregation
query = c.execute("""
SELECT
    Invoice.BillingCountry,
    COUNT(*) AS Total
FROM Invoice
GROUP BY Invoice.BillingCountry
ORDER BY Total DESC
LIMIT 3
""")

ppdb(query)

In [None]:
##  The customer who has spent the most money will be declared your best customer.
##  They definitely deserve an email thanking them for their patronage :)  

##  Build a query that returns the person who has the highest sum of all invoices,
##  along with their email, first name, and last name.

'''
---VISUAL GUIDE---

Before Query...

###############         ####################   
#  Customer   #         #     Invoice      #  
###############         ####################   
|  CustomerId | = ON  = | CustomerId       |  <-----  FROM/JOIN
+=============+         +==================+  
|  FirstName  |         | Total            |  <-----  sum Total and limit
+=============+         +==================+          to highest sum
|  LastName   |    
+=============+    
|  Email      |               
+=============+

After Query...

###################################################   
#             CustomerInvoice                     #   <-----  RESULT!
###################################################   
|  Email  |  FirstName | LastName    |    Total   |
+=========+============+=============+============+

'''

query = c.execute("""
SELECT
    Customer.Email,
    Customer.FirstName,
    Customer.LastName,
    SUM(Invoice.Total) AS Total
FROM Customer
    LEFT JOIN Invoice
        ON Customer.CustomerId = Invoice.CustomerId
GROUP BY Customer.CustomerId
ORDER BY Total DESC
LIMIT 1
""")

ppdb(query)

In [None]:
##  Rock Music Lives on!  After the success of your recent email campaign,  
##  you're interested in targeting your long standing Rock Music audience!
##  You'll need to collect a list of emails containing each of your Rock Music listeners.

##  Use your query to return the email, first name, last name, and Genre of all Rock Music listeners!
##  Return you list ordered alphabetically by email address starting with A.
##  Can you find a way to deal with duplicate email addresses so no one receives multiple emails?

query = c.execute("""
SELECT 
Customer.Email, 
Customer.FirstName, 
Customer.LastName, 
Genre.Name AS Genre 
FROM Customer 
    LEFT JOIN Invoice 
        ON Customer.CustomerId=Invoice.CustomerId
    LEFT JOIN InvoiceLine 
        ON Invoice.InvoiceId=InvoiceLine.InvoiceId
    LEFT JOIN Track
        ON InvoiceLine.TrackId=Track.TrackId 
    LEFT JOIN Genre
        ON Track.GenreId=Genre.GenreId
WHERE Genre.GenreId = 1 
GROUP BY Customer.CustomerId 
ORDER BY Customer.Email ASC
""")

ppdb(query)

In [None]:
##  Let's throw a promotional Music Festival in the city with the best customers!
##  Which city have you made the most money from?

##  Write a query that returns the 1 city that has the highest sum of invoice totals.
##  Return both the city name and the sum of all invoice totals.

'''
---VISUAL GUIDE---

Before Query...

#######################
#        Invoice      #            <--- FROM 
#######################
| BillingCity | Total |           <--- SELECT 
+-------------+-------+

After Query...

##################################   
#          Invoice               #   <-----  RESULT!
##################################   
|  BillingCity  |  sum(Total)    |
+===============+================+
|   Top City      Total Invoices |
+---------------+----------------+
'''

query = c.execute("""
SELECT 
BillingCity, SUM(Total ) 
FROM Invoice 
GROUP BY BillingCity 
ORDER BY SUM(Total) DESC
limit 1
""")

ppdb(query)

In [None]:
##  It would be really helpful to know what type of music everyone likes before 
##  throwing this festival.
##  Lucky for us we've got the data to find out!  
##  We should be able to tell what music people like by figuring out what music they're buying.

##  Write a query that returns the BillingCity,total number of invoices 
##  associated with that particular genre, and the genre Name.

##  Return the top 3 most popular music genres for the city Prague
##  with the highest invoice total (you found this in the previous quiz!)

'''
---Visual Guide---

Before Query...

###############       #################       #############      #############
#  Invoice    #       #  InvoiceLine  #       #   Track   #      #   Genre   #
###############       #################       #############      #############
|  InvoiceId  | --->  |  InvoiceId    |       |  GenreId  | ---> |  GenreId  |
+-------------+       +---------------+       +-----------+      +-----------+
|  BillingCity|       |  TrackId      |  ---> |  TrackId  |      |  Name     |  
+-------------+       +---------------+       +-----------+      +-----------+

After Query..

#######################################
#            InvoiceGenre             #
#######################################
|  BillingCity  |  COUNT(*)  |  Name  |
+---------------+------------+--------+

'''

query = c.execute("""
SELECT 
Invoice.BillingCity,
COUNT(Genre.Name),
Genre.Name
FROM Invoice 
    JOIN InvoiceLine
        ON Invoice.InvoiceId=InvoiceLine.InvoiceId
    JOIN Track
        ON InvoiceLine.TrackId=Track.TrackID
    JOIN Genre
        ON Track.GenreId=Genre.GenreId
WHERE Invoice.BillingCity = 'Prague'
GROUP BY Genre.Name
ORDER BY COUNT(Genre.Name) DESC
limit 3
""")

ppdb(query)

In [None]:
##  Now that we know that our customers love rock music, we can decide which musicians to 
##  invite to play at the concert. 

##  Let's invite the artists who have written the most rock music in our dataset.
##  Write a query that returns the Artist name and total track count of the top 10 rock bands. 

'''
---Visual Guide---

Before Query...

#############      #############      #############      ############
#    Genre  #      #   Track   #      #   Album   #      #  Artist  #
#############      #############      #############      ############
|  GenreId  | ---> |  GenreId  |      |  ArtistId  | --->| ArtistId |
+-----------+      +-----------+      +-----------+      +----------+
|  Name     |      |  AlbumId   |---> |  AlbumId  |      |  Name    |
+-----------+      +-----------+      +-----------+      +----------+

After Query...

#######################################
#             GenreArtist             #
#######################################
|  Artist.Name  |  COUNT(Genre.Name)  |
+---------------+---------------------+

'''

query = c.execute("""
SELECT 
Artist.Name as Artist, 
COUNT(Genre.Name) as count 
FROM Genre
    JOIN Track
        ON Genre.GenreId=Track.GenreId
    JOIN Album
        ON Track.AlbumId=Album.AlbumId
    JOIN Artist
        ON Album.ArtistId=Artist.ArtistId
WHERE Genre.GenreId = 1
GROUP BY Artist.Name
ORDER BY COUNT(Genre.Name) DESC
limit 10
""")

ppdb(query)

In [None]:
##  The show was a huge hit! Congratulations on all your hard work :)  
##  After the popularity of your first show you've decided to jump on the
##  railway for an Alternative & Punk tour through France!  

##  What does the alternative punk scene look like throughout French 
##  cities in your dataset?

##  Return the BillingCities in France, followed by the total number of 
##  tracks purchased for Alternative & Punk music.

##  Order your output so that the city with the highest total number of
##  tracks purchased is on top.

'''
---Visual Guide---

Before Query...

#################       #################       #############      #############
#    Invoice    #       #  InvoiceLine  #       #   Track   #      #   Genre   #
#################       #################       #############      #############
|  InvoiceId    | --->  |  InvoiceId    |       |  GenreId  | ---> |  GenreId  |
+---------------+       +---------------+       +-----------+      +-----------+
|  BillingCity| |       |  TrackId      |  ---> |  TrackId  |      |  Name     |  
+---------------+       +---------------+       +-----------+      +-----------+
| BillingCountry|
+---------------+

After Query..

###############################
#        InvoiceGenre         #
###############################
|  BillingCity  |  NumTracks  |
+---------------+-------------+

'''

query = c.execute("""
SELECT
Invoice.BillingCity,
Count(Genre.Name) AS NumTracks
FROM Invoice
    JOIN InvoiceLine
        ON Invoice.InvoiceId=InvoiceLine.InvoiceId
    JOIN Track
        ON InvoiceLine.TrackId=Track.TrackId
    JOIN Genre
        ON Track.GenreId=Genre.GenreId
WHERE Genre.GenreId = 4 and Invoice.BillingCountry = 'France'
GROUP BY Invoice.BillingCity
ORDER BY NumTracks DESC
""")

ppdb(query)