# Lecture 1 - Relating


In [1]:
# Let's import the required libraries
import sqlite3
import pandas as pd

# Let's connect to the SQLite database used in CS50
conn = sqlite3.connect("longlist.db")


In [7]:
pd.read_sql_query(
    """
    SELECT *
    FROM sqlite_master
    WHERE type = 'table';
    """,
    conn
)



Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,authors,authors,2,"CREATE TABLE ""authors"" (\n ""id"" INTEGER,\n ..."
1,table,authored,authored,3,"CREATE TABLE ""authored"" (\n ""author_id"" INT..."
2,table,books,books,4,"CREATE TABLE ""books"" (\n ""id"" INTEGER,\n ..."
3,table,publishers,publishers,5,"CREATE TABLE ""publishers"" (\n ""id"" INTEGER,..."
4,table,ratings,ratings,6,"CREATE TABLE ""ratings"" (\n ""book_id"" INTEGE..."
5,table,translators,translators,7,"CREATE TABLE ""translators"" (\n ""id"" INTEGER..."
6,table,translated,translated,8,"CREATE TABLE ""translated"" (\n ""translator_i..."


### Tables

In [13]:
pd.read_sql_query(
    """
    SELECT *
    FROM authors
    LIMIT 3;
    """,
    conn
)

Unnamed: 0,id,name,country,birth
0,1,Adania Shibli,Palestine,1974
1,2,Ahmed Saadawi,Iraq,1973
2,3,Alia Trabucco Zerán,Chile,1983


In [14]:
pd.read_sql_query(
    """
    SELECT *
    FROM books
    LIMIT 3;
    """,
    conn
)

Unnamed: 0,id,isbn,title,publisher_id,format,pages,published,year
0,1,9788439736967,Boulder,10,paperback,112,2022-08-02,2023
1,2,9781628971538,Whale,3,paperback,368,2023-01-19,2023
2,3,9781642861181,The Gospel According to the New World,32,paperback,184,2023-03-07,2023


In [16]:
pd.read_sql_query(
    """
    SELECT *
    FROM authored
    LIMIT 3;
    """,
    conn
)

Unnamed: 0,author_id,book_id
0,23,1
1,13,2
2,49,3


### Keys

#### Primary key: an identifier that is unique for every item in a table

#### Foreign Keys: a primary key taken from a different table

It is worth noting that foreign key values can be repeated within a table, but primary key values are always unique.

### Subqueries or Nested queries

A subquery is a query inside another query. These are also called nested queries.

In [17]:
pd.read_sql_query(
    """
    SELECT *
    FROM books
    LIMIT 1;
    """,
    conn
)

Unnamed: 0,id,isbn,title,publisher_id,format,pages,published,year
0,1,9788439736967,Boulder,10,paperback,112,2022-08-02,2023


In the books table, we have an ID to indicate the publisher, which is a foreign key taken from the publishers table

In [18]:
pd.read_sql_query(
    """
    SELECT *
    FROM "publishers"
    LIMIT 1;
    """,
    conn
)

Unnamed: 0,id,publisher
0,1,And Other Stories


#### - Task 1: to find out the books published by Fitzcarraldo Editions

In [23]:
pd.read_sql_query(
"""
SELECT "id" ,"publisher"
FROM "publishers" 
WHERE "publisher"='Fitzcarraldo Editions';
""",
conn
)

Unnamed: 0,id,publisher
0,5,Fitzcarraldo Editions


In [22]:
pd.read_sql_query(
"""
SELECT "isbn","title", "publisher_id" 
FROM "books" 
WHERE "publisher_id"= (SELECT "id" FROM "publishers" WHERE "publisher"='Fitzcarraldo Editions');
""",
conn
)

Unnamed: 0,isbn,title,publisher_id
0,9781804270288,While We Were Dreaming,5
1,9781913097660,Still Born,5
2,9781913097875,Paradais,5
3,9781913097721,A New Name: Septology VI-VII,5
4,9781910695593,The Books of Jacob,5
5,9781913097530,In Memory of Memory,5
6,9781913097172,Minor Detail,5
7,9781910695913,The Other Name: Septology I-II,5
8,9781913097097,Hurricane Season,5
9,9781910695715,Drive Your Plow Over the Bones of the Dead,5


#### - Task 2: To find all the ratings for the book In Memory of Memory

In [24]:
pd.read_sql_query(
    """
    SELECT *
    FROM "ratings"
    LIMIT 1;
    """,
    conn
)

Unnamed: 0,book_id,rating
0,1,3


In [27]:
pd.read_sql_query(
    """
    SELECT "rating","book_id"
    FROM "ratings"
    WHERE "book_id"=(
        SELECT "id"
        FROM "books"
        WHERE "title"='In Memory of Memory'
    );
    """,
    conn
)

Unnamed: 0,rating,book_id
0,3,33
1,4,33
2,4,33
3,4,33
4,3,33
...,...,...
1549,4,33
1550,4,33
1551,4,33
1552,4,33


In [29]:
pd.read_sql_query(
"""
SELECT "id","title"
FROM "books"
WHERE "title"='In Memory of Memory';
""",
conn
)

Unnamed: 0,id,title
0,33,In Memory of Memory


#### - Task 3: To compute the average rating for the book In Memory of Memory

In [31]:
pd.read_sql_query(
    """
    SELECT AVG("rating")
    FROM "ratings"
    WHERE "book_id"=(
        SELECT "id"
        FROM "books"
        WHERE "title"='In Memory of Memory'
    );
    """,
    conn
)

Unnamed: 0,"AVG(""rating"")"
0,3.86036


#### - Task 4: To find the author(s) who wrote the book Flights

In [32]:
pd.read_sql_query(
    """
    SELECT *
    FROM "authored"
    LIMIT 1;
    """,
    conn
)

Unnamed: 0,author_id,book_id
0,23,1


In [35]:
pd.read_sql_query(
    """
    SELECT "name"
    FROM "authors"
    WHERE "id"=(    
        SELECT "author_id"
        FROM "authored"
        WHERE "book_id"=(
            SELECT "id"
            FROM "books"
            WHERE "title"='Flights'
        )
    );
    """,
    conn
)

Unnamed: 0,name
0,Olga Tokarczuk


### IN - check whether a value is in a given list or set of values.

Task: To find the names of all books in the database written by Fernanda Melchor

In [36]:
# Find the id of books written by Fernanda
pd.read_sql_query(
"""
SELECT "book_id"
FROM "authored"
WHERE "author_id" = (
    SELECT "id"
    FROM "authors"
    WHERE "name" = 'Fernanda Melchor'
);
""",
conn
)

Unnamed: 0,book_id
0,14
1,48


We have to use the IN keyword as follows:

In [41]:
# Find the id of books written by Fernanda
pd.read_sql_query(
"""
SELECT *
FROM "books"
WHERE "id" IN (
    SELECT "book_id"
    FROM "authored"
    WHERE "author_id" = (
        SELECT "id"
        FROM "authors"
        WHERE "name" = 'Fernanda Melchor'
    )
);
""",
conn
)

Unnamed: 0,id,isbn,title,publisher_id,format,pages,published,year
0,14,9781913097875,Paradais,5,paperback,118,2022-03-23,2022
1,48,9781913097097,Hurricane Season,5,paperback,229,2020-02-19,2020


47   -- Using IN to search for multiple authors
48   SELECT "title" FROM "books" WHERE "id" IN (
49       SELECT "book_id" FROM "authored" WHERE "author_id" IN (
50           SELECT "id" FROM "authors" WHERE "name" IN ('Fernanda Melchor', 'Annie Ernaux')
51       )
52   );
nested.sql


In [42]:
conn.close()


### JOIN

#### Tables

In [44]:
# Let's connect to the SQLite database used in CS50
conn = sqlite3.connect("sea_lions.db")

pd.read_sql_query(
    """
    SELECT *
    FROM sqlite_master
    WHERE type = 'table';
    """,
    conn
)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,sea_lions,sea_lions,3,"CREATE TABLE ""sea_lions"" (\n ""id"" INTEGER,\..."
1,table,migrations,migrations,2,"CREATE TABLE ""migrations"" (\n ""id"" INTEGER,..."


In [48]:
pd.read_sql_query(
    """
    SELECT *
    FROM "sea_lions"
    """,
    conn
)

Unnamed: 0,id,name,species
0,10484,Ayah,Zalophus californianus
1,11728,Spot,Zalophus californianus
2,11729,Tiger,Zalophus californianus
3,11732,Mabel,Zalophus californianus
4,11734,Rick,Zalophus californianus
5,11790,Jolee,Zalophus californianus


In [47]:
pd.read_sql_query(
    """
    SELECT *
    FROM "migrations"
    """,
    conn
)

Unnamed: 0,id,distance,days
0,10484,1000,107
1,11728,1531,56
2,11729,1370,37
3,11732,1622,62
4,11734,1491,58
5,11735,2723,82
6,11736,1571,52
7,11737,1957,92


#### JOIN (INNER) - 

In [50]:
join_inner = '''
SELECT * FROM "sea_lions"
JOIN migrations ON "migrations"."id" = "sea_lions"."id";
'''
df = pd.read_sql_query(join_inner, conn)
print(df)



      id   name                 species     id  distance  days
0  10484   Ayah  Zalophus californianus  10484      1000   107
1  11728   Spot  Zalophus californianus  11728      1531    56
2  11729  Tiger  Zalophus californianus  11729      1370    37
3  11732  Mabel  Zalophus californianus  11732      1622    62
4  11734   Rick  Zalophus californianus  11734      1491    58


#### LEFT JOIN  - 

In [52]:
join = '''
SELECT * FROM "sea_lions"
LEFT JOIN migrations ON "migrations"."id" = "sea_lions"."id";
'''
df = pd.read_sql_query(join, conn)
print(df)



      id   name                 species       id  distance   days
0  10484   Ayah  Zalophus californianus  10484.0    1000.0  107.0
1  11728   Spot  Zalophus californianus  11728.0    1531.0   56.0
2  11729  Tiger  Zalophus californianus  11729.0    1370.0   37.0
3  11732  Mabel  Zalophus californianus  11732.0    1622.0   62.0
4  11734   Rick  Zalophus californianus  11734.0    1491.0   58.0
5  11790  Jolee  Zalophus californianus      NaN       NaN    NaN


#### RIGHT JOIN - 

In [53]:
join_inner = '''
SELECT * FROM "sea_lions"
RIGHT JOIN migrations ON "migrations"."id" = "sea_lions"."id";
'''
df = pd.read_sql_query(join_inner, conn)
print(df)



        id   name                 species     id  distance  days
0  10484.0   Ayah  Zalophus californianus  10484      1000   107
1  11728.0   Spot  Zalophus californianus  11728      1531    56
2  11729.0  Tiger  Zalophus californianus  11729      1370    37
3  11732.0  Mabel  Zalophus californianus  11732      1622    62
4  11734.0   Rick  Zalophus californianus  11734      1491    58
5      NaN   None                    None  11735      2723    82
6      NaN   None                    None  11736      1571    52
7      NaN   None                    None  11737      1957    92


#### FULL JOIN (OUTER)  

In [55]:
join = '''
SELECT * FROM "sea_lions"
FULL JOIN migrations ON "migrations"."id" = "sea_lions"."id";
'''
df = pd.read_sql_query(join, conn)
print(df)



        id   name                 species       id  distance   days
0  10484.0   Ayah  Zalophus californianus  10484.0    1000.0  107.0
1  11728.0   Spot  Zalophus californianus  11728.0    1531.0   56.0
2  11729.0  Tiger  Zalophus californianus  11729.0    1370.0   37.0
3  11732.0  Mabel  Zalophus californianus  11732.0    1622.0   62.0
4  11734.0   Rick  Zalophus californianus  11734.0    1491.0   58.0
5  11790.0  Jolee  Zalophus californianus      NaN       NaN    NaN
6      NaN   None                    None  11735.0    2723.0   82.0
7      NaN   None                    None  11736.0    1571.0   52.0
8      NaN   None                    None  11737.0    1957.0   92.0


#### NATURAL JOIN

In [56]:
join = '''
SELECT * FROM "sea_lions"
NATURAL JOIN migrations;
'''
df = pd.read_sql_query(join, conn)
print(df)

      id   name                 species  distance  days
0  10484   Ayah  Zalophus californianus      1000   107
1  11728   Spot  Zalophus californianus      1531    56
2  11729  Tiger  Zalophus californianus      1370    37
3  11732  Mabel  Zalophus californianus      1622    62
4  11734   Rick  Zalophus californianus      1491    58


Notice that the result does not have a duplicate id column in this case. Also, this join works similarly to an INNER JOIN.

21   -- Use WHERE after joining a table
22   SELECT * FROM "sea_lions"
23   JOIN "migrations" ON "migrations"."id" = "sea_lions"."id"
24   WHERE "migrations"."distance" > 1500;
joins.sql

In [57]:
conn.close()

### SETS

On running a query, the results we see are called a "result set". This is a kind of set in SQL.

In [59]:
# Let's connect to the SQLite database used in CS50
conn = sqlite3.connect("longlist.db")

INTERSECT

In [62]:
i = '''
SELECT "name" FROM "translators"
INTERSECT 
SELECT "name" FROM "authors";
'''
df = pd.read_sql_query(i, conn)
print(df)

                name
0  Ngũgĩ wa Thiong'o


UNION

In [64]:
u= '''
SELECT "name" FROM "translators"
UNION 
SELECT "name" FROM "authors";
'''
df = pd.read_sql_query(u, conn)
print(df)

                    name
0          Adania Shibli
1     Adrian Nathan West
2          Ahmed Saadawi
3    Alia Trabucco Zerán
4      Alison L. Strayer
..                   ...
140         Willem Anker
141           Wu Ming-Yi
142           Yōko Ogawa
143          Zou Jingzhi
144        Éric Vuillard

[145 rows x 1 columns]


Every author and every translator is included in this result set, but only once!

EXCEPT

In [66]:
e= '''
SELECT "name" FROM "authors"
EXCEPT 
SELECT "name" FROM "translators";
'''
df = pd.read_sql_query(e, conn)
print(df)

                   name
0         Adania Shibli
1         Ahmed Saadawi
2   Alia Trabucco Zerán
3       Amanda Svensson
4         Andrey Kurkov
..                  ...
66         Willem Anker
67           Wu Ming-Yi
68           Yōko Ogawa
69          Zou Jingzhi
70        Éric Vuillard

[71 rows x 1 columns]


1:23:00

1   -- UNION
 2  
 3   -- Select all authors, labeling as authors
 4   SELECT 'author' AS "profession", "name" FROM "authors";
 5  
 6   -- Select all translators, labeling as translators
 7   SELECT 'translator' AS "profession", "name" FROM "translators";
 8  
 9   -- Combine authors and translators into one result set
10   SELECT 'author' AS "profession", "name" FROM "authors";
11   UNION
12   SELECT 'translator' AS "profession", "name" FROM "translators";
13

### GROUP BY

HAVING