In [1]:
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy import inspect
import pandas as pd

In [2]:
# Connection DB
connectionDB = 'sqlite:///../../../datasets/publications.db'
engineDB = create_engine(connectionDB)
inspector_db = inspect(engineDB)

In [3]:
# Tables DB
inspector_db.get_table_names()

['authors',
 'discounts',
 'employee',
 'jobs',
 'pub_info',
 'publishers',
 'roysched',
 'sales',
 'stores',
 'titleauthor',
 'titles']

## Challenge 1 - Who Have Published What At Where?

In [4]:
print([i['name'] for i in inspector_db.get_columns('authors')])
print([i['name'] for i in inspector_db.get_columns('titleauthor')])
print([i['name'] for i in inspector_db.get_columns('titles')])
print([i['name'] for i in inspector_db.get_columns('publishers')])

['au_id', 'au_lname', 'au_fname', 'phone', 'address', 'city', 'state', 'zip', 'contract']
['au_id', 'title_id', 'au_ord', 'royaltyper']
['title_id', 'title', 'type', 'pub_id', 'price', 'advance', 'royalty', 'ytd_sales', 'notes', 'pubdate']
['pub_id', 'pub_name', 'city', 'state', 'country']


In [5]:
# `AUTHOR ID` - the ID of the author
# `LAST NAME` - author last name
# `FIRST NAME` - author first name
# `TITLE` - name of the published title
# `PUBLISHER` - name of the publisher where the title was published

query_1 = '''
SELECT a.au_id "AUTHOR ID", a.au_lname "LAST NAME", a.au_fname "FIRST NAME", 
    t.title "TITLE", p.pub_name "PUBLISHER"
FROM authors a
    INNER JOIN titleauthor ta ON a.au_id = ta.au_id
    INNER JOIN titles t ON ta.title_id = t.title_id
    INNER JOIN publishers p ON t.pub_id = p.pub_id
'''

df_1 = pd.read_sql_query(query_1, engineDB)
df_1

Unnamed: 0,AUTHOR ID,LAST NAME,FIRST NAME,TITLE,PUBLISHER
0,172-32-1176,White,Johnson,Prolonged Data Deprivation: Four Case Studies,New Moon Books
1,213-46-8915,Green,Marjorie,The Busy Executive's Database Guide,Algodata Infosystems
2,213-46-8915,Green,Marjorie,You Can Combat Computer Stress!,New Moon Books
3,238-95-7766,Carson,Cheryl,But Is It User Friendly?,Algodata Infosystems
4,267-41-2394,O'Leary,Michael,Cooking with Computers: Surreptitious Balance ...,Algodata Infosystems
5,267-41-2394,O'Leary,Michael,"Sushi, Anyone?",Binnet & Hardley
6,274-80-9391,Straight,Dean,Straight Talk About Computers,Algodata Infosystems
7,409-56-7008,Bennet,Abraham,The Busy Executive's Database Guide,Algodata Infosystems
8,427-17-2319,Dull,Ann,Secrets of Silicon Valley,Algodata Infosystems
9,472-27-2349,Gringlesby,Burt,"Sushi, Anyone?",Binnet & Hardley


In [6]:
df_1.count()

AUTHOR ID     25
LAST NAME     25
FIRST NAME    25
TITLE         25
PUBLISHER     25
dtype: int64

In [7]:
query_count= "SELECT * FROM titleauthor"
df_count = pd.read_sql_query(query_count, engineDB)
df_count.count()

au_id         25
title_id      25
au_ord        25
royaltyper    25
dtype: int64

## Challenge 2 - Who Have Published How Many At Where?

In [8]:
query_2 = '''
SELECT a.au_id "AUTHOR ID", a.au_lname "LAST NAME", a.au_fname "FIRST NAME", 
    p.pub_name "PUBLISHER", COUNT(*) "TITLE CONT"
FROM authors a
    INNER JOIN titleauthor ta ON a.au_id = ta.au_id
    INNER JOIN titles t ON ta.title_id = t.title_id
    INNER JOIN publishers p ON t.pub_id = p.pub_id
GROUP BY a.au_id, a.au_lname, a.au_fname, p.pub_name
ORDER BY "TITLE CONT" DESC
'''

df_2 = pd.read_sql_query(query_2, engineDB)
df_2

Unnamed: 0,AUTHOR ID,LAST NAME,FIRST NAME,PUBLISHER,TITLE CONT
0,998-72-3567,Ringer,Albert,New Moon Books,2
1,172-32-1176,White,Johnson,New Moon Books,1
2,213-46-8915,Green,Marjorie,Algodata Infosystems,1
3,213-46-8915,Green,Marjorie,New Moon Books,1
4,238-95-7766,Carson,Cheryl,Algodata Infosystems,1
5,267-41-2394,O'Leary,Michael,Algodata Infosystems,1
6,267-41-2394,O'Leary,Michael,Binnet & Hardley,1
7,274-80-9391,Straight,Dean,Algodata Infosystems,1
8,409-56-7008,Bennet,Abraham,Algodata Infosystems,1
9,427-17-2319,Dull,Ann,Algodata Infosystems,1


In [9]:
query_sum = '''
SELECT SUM("TITLE CONT") "SUM TITLE CONT"
FROM
(
SELECT a.au_id "AUTHOR ID", a.au_lname "LAST NAME", a.au_fname "FIRST NAME", 
    p.pub_name "PUBLISHER", COUNT(*) "TITLE CONT"
FROM authors a
    INNER JOIN titleauthor ta ON a.au_id = ta.au_id
    INNER JOIN titles t ON ta.title_id = t.title_id
    INNER JOIN publishers p ON t.pub_id = p.pub_id
GROUP BY a.au_id, a.au_lname, a.au_fname, p.pub_name
ORDER BY "TITLE CONT" DESC
)
'''

df_sum = pd.read_sql_query(query_sum, engineDB)
df_sum

Unnamed: 0,SUM TITLE CONT
0,25


## Challenge 3 - Best Selling Authors

In [10]:
inspector_db.get_table_names()

['authors',
 'discounts',
 'employee',
 'jobs',
 'pub_info',
 'publishers',
 'roysched',
 'sales',
 'stores',
 'titleauthor',
 'titles']

In [11]:
print([i['name'] for i in inspector_db.get_columns('authors')])
print([i['name'] for i in inspector_db.get_columns('sales')])
print([i['name'] for i in inspector_db.get_columns('titleauthor')])

['au_id', 'au_lname', 'au_fname', 'phone', 'address', 'city', 'state', 'zip', 'contract']
['stor_id', 'ord_num', 'ord_date', 'qty', 'payterms', 'title_id']
['au_id', 'title_id', 'au_ord', 'royaltyper']


In [16]:
# * `AUTHOR ID` - the ID of the author
# * `LAST NAME` - author last name
# * `FIRST NAME` - author first name
# * `TOTAL` - total number of titles sold from this author

query_3 = '''
SELECT a.au_id "AUTHOR ID", a.au_lname "LAST NAME", a.au_fname "FIRST NAME", 
    COUNT(*) "TOTAL", SUM(s.qty) "TOTAL UNITS"
FROM authors a
    INNER JOIN titleauthor ta ON a.au_id = ta.au_id
    INNER JOIN sales s ON s.title_id = ta.title_id
GROUP BY a.au_id, a.au_lname, a.au_fname
ORDER BY "TOTAL UNITS" DESC
LIMIT 3
'''

df_3 = pd.read_sql_query(query_3, engineDB)
df_3

Unnamed: 0,AUTHOR ID,LAST NAME,FIRST NAME,TOTAL,TOTAL UNITS
0,899-46-2035,Ringer,Anne,6,148
1,998-72-3567,Ringer,Albert,5,133
2,213-46-8915,Green,Marjorie,3,50


## Challenge 4 - Best Selling Authors Ranking

In [17]:
# * `AUTHOR ID` - the ID of the author
# * `LAST NAME` - author last name
# * `FIRST NAME` - author first name
# * `TOTAL` - total number of titles sold from this author

query_4 = '''
SELECT a.au_id "AUTHOR ID", a.au_lname "LAST NAME", a.au_fname "FIRST NAME", 
    COUNT(*) "TOTAL", IFNULL(SUM(s.qty),0) "TOTAL UNITS"
FROM authors a
    LEFT JOIN titleauthor ta ON a.au_id = ta.au_id
    LEFT JOIN sales s ON s.title_id = ta.title_id
GROUP BY a.au_id, a.au_lname, a.au_fname
ORDER BY "TOTAL UNITS" DESC
'''

df_4 = pd.read_sql_query(query_4, engineDB)
df_4

Unnamed: 0,AUTHOR ID,LAST NAME,FIRST NAME,TOTAL,TOTAL UNITS
0,899-46-2035,Ringer,Anne,6,148
1,998-72-3567,Ringer,Albert,5,133
2,213-46-8915,Green,Marjorie,3,50
3,427-17-2319,Dull,Ann,1,50
4,846-92-7186,Hunter,Sheryl,1,50
5,267-41-2394,O'Leary,Michael,2,45
6,724-80-9391,MacFeather,Stearns,2,45
7,722-51-5454,DeFrance,Michel,2,40
8,807-91-6654,Panteley,Sylvia,1,40
9,238-95-7766,Carson,Cheryl,1,30
