<a href="https://colab.research.google.com/github/engineer-nicolas/cs50sql/blob/master/lecture_4_Viewing/lecture_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lecture 4 - Viewing - CS50 SQL harvard


## Views

A view is a virtual table defined by a query.

Views are useful for:


* simplifying: putting together data from different tables to be queried more simply

* aggregating: running aggregate functions, like finding the sum, and storing the results

* partitioning: dividing data into logical pieces

* securing: hiding columns that should be kept secure



In [1]:
# Let's import the required libraries
import sqlite3
import pandas as pd
import os

In [2]:
longlist_db_string_format="""
BEGIN TRANSACTION;
CREATE TABLE "authored" (
    "author_id" INTEGER,
    "book_id" INTEGER,
    FOREIGN KEY("author_id") REFERENCES "authors"("id"),
    FOREIGN KEY("book_id") REFERENCES "books"("id")
);
INSERT INTO "authored" VALUES(23,1);
INSERT INTO "authored" VALUES(13,2);
INSERT INTO "authored" VALUES(9,3);
INSERT INTO "authored" VALUES(27,4);
INSERT INTO "authored" VALUES(29,5);
INSERT INTO "authored" VALUES(66,6);
INSERT INTO "authored" VALUES(5,7);
INSERT INTO "authored" VALUES(9,8);
INSERT INTO "authored" VALUES(16,9);
INSERT INTO "authored" VALUES(60,10);
INSERT INTO "authored" VALUES(9,11);
INSERT INTO "authored" VALUES(4,12);
INSERT INTO "authored" VALUES(72,13);
INSERT INTO "authored" VALUES(24,14);
INSERT INTO "authored" VALUES(52,15);
INSERT INTO "authored" VALUES(62,16);
INSERT INTO "authored" VALUES(56,17);
INSERT INTO "authored" VALUES(15,18);
INSERT INTO "authored" VALUES(67,19);
INSERT INTO "authored" VALUES(19,20);
CREATE TABLE "authors" (
    "id" INTEGER,
    "name" TEXT,
    "country" TEXT,
    "birth" INTEGER,
    PRIMARY KEY("id")
);
INSERT INTO "authors" VALUES(1,'Adania Shibli','Palestine',1974);
INSERT INTO "authors" VALUES(2,'Ahmed Saadawi','Iraq',1973);
INSERT INTO "authors" VALUES(3,'Alia Trabucco Zerán','Chile',1983);
INSERT INTO "authors" VALUES(4,'Amanda Svensson','Sweden',1987);
INSERT INTO "authors" VALUES(5,'Andrey Kurkov','Ukraine',1961);
INSERT INTO "authors" VALUES(6,'Andrzej Tichy','Sweden',1978);
INSERT INTO "authors" VALUES(7,'Annie Ernaux','France',1940);
INSERT INTO "authors" VALUES(8,'Antonio Muñoz Molina','Spain',1956);
INSERT INTO "authors" VALUES(9,'Fernanda Melchor','Argentina',1977);
INSERT INTO "authors" VALUES(10,'Benajamín Labatut','Chile',1980);
INSERT INTO "authors" VALUES(11,'Bora Chung','South Korea',1976);
INSERT INTO "authors" VALUES(12,'Can Xue','China',1953);
INSERT INTO "authors" VALUES(13,'Cheon Myeong-Kwan','South Korea',1964);
INSERT INTO "authors" VALUES(14,'Christoph Ransmayr','Austria',1954);
INSERT INTO "authors" VALUES(15,'Claudia Piñeiro','Argentina',1960);
INSERT INTO "authors" VALUES(16,'Clemens Meyer','Germany',1977);
INSERT INTO "authors" VALUES(17,'Daniel Kehlmann','Germany',1975);
INSERT INTO "authors" VALUES(18,'David Diop','France',1966);
INSERT INTO "authors" VALUES(19,'David Grossman','Israel',1954);
INSERT INTO "authors" VALUES(20,'Emmanuelle Pagano','France',1969);
CREATE TABLE "books" (
    "id" INTEGER,
    "isbn" TEXT,
    "title" TEXT,
    "publisher_id" INTEGER,
    "format" TEXT,
    "pages" INTEGER,
    "published" TEXT,
    "year" INTEGER,
    PRIMARY KEY("id"),
    FOREIGN KEY("publisher_id") REFERENCES "publishers"("id")
);
INSERT INTO "books" VALUES(1,'9788439736967','Boulder',10,'paperback',112,'2022-08-02',2023);
INSERT INTO "books" VALUES(2,'9781628971538','Whale',3,'paperback',368,'2023-01-19',2023);
INSERT INTO "books" VALUES(3,'9781642861181','The Gospel According to the New World',32,'paperback',184,'2023-03-07',2023);
INSERT INTO "books" VALUES(4,'9781529414431','Standing Heavy',12,'paperback',252,'2022-05-26',2023);
INSERT INTO "books" VALUES(5,'9781474623025','Time Shelter',30,'hardcover',304,'2022-04-21',2023);
INSERT INTO "books" VALUES(6,'9781839764318','Is Mother Dead',29,'paperback',330,'2022-09-27',2023);
INSERT INTO "books" VALUES(7,'9781529427820','Jimi Hendrix Live in Lviv',12,'hardcover',416,'2023-04-27',2023);
INSERT INTO "books" VALUES(8,'9781945492655','The Birthday Party',27,'paperback',454,'2023-01-24',2023);
INSERT INTO "books" VALUES(9,'9781804270288','While We Were Dreaming',5,'paperback',528,'2023-03-30',2023);
INSERT INTO "books" VALUES(10,'9781782278627','Pyre',17,'paperback',224,'2022-04-08',2023);
INSERT INTO "books" VALUES(11,'9781913097660','Still Born',5,'paperback',200,'2022-06-22',2023);
INSERT INTO "books" VALUES(12,'9781914484872','A System So Magnificent It Is Blinding',20,'paperback',544,'2023-03-09',2023);
INSERT INTO "books" VALUES(13,'9781739822507','Ninth Building',8,'paperback',272,'2022-05-16',2023);
INSERT INTO "books" VALUES(14,'9781913097875','Paradais',5,'paperback',118,'2022-03-23',2022);
INSERT INTO "books" VALUES(15,'9781509898251','Heaven',15,'paperback',167,'2022-05-12',2022);
INSERT INTO "books" VALUES(16,'9781911284659','Love in the Big City',26,'paperback',217,'2021-10-29',2022);
INSERT INTO "books" VALUES(17,'9781911284635','Happy Stories, Mostly',26,'paperback',151,'2021-12-02',2022);
INSERT INTO "books" VALUES(18,'9781999368432','Elena Knows',2,'paperback',143,'2021-07-01',2022);
INSERT INTO "books" VALUES(19,'9781982108786','The Book of Mother',21,'paperback',224,'2021-09-19',2022);
INSERT INTO "books" VALUES(20,'9781787332935','More Than I Love My Life',9,'paperback',288,'2021-08-26',2022);
CREATE TABLE "publishers" (
    "id" INTEGER,
    "publisher" TEXT,
    PRIMARY KEY("id")
);
INSERT INTO "publishers" VALUES(1,'And Other Stories');
INSERT INTO "publishers" VALUES(2,'Charco Press');
INSERT INTO "publishers" VALUES(3,'Europa Editions');
INSERT INTO "publishers" VALUES(4,'Faber & Faber');
INSERT INTO "publishers" VALUES(5,'Fitzcarraldo Editions');
INSERT INTO "publishers" VALUES(6,'Granta Books');
INSERT INTO "publishers" VALUES(7,'Harvill Secker');
INSERT INTO "publishers" VALUES(8,'Honford Star');
INSERT INTO "publishers" VALUES(9,'Jonathan Cape');
INSERT INTO "publishers" VALUES(10,'Literatura Random House');
INSERT INTO "publishers" VALUES(11,'Lolli Editions');
INSERT INTO "publishers" VALUES(12,'MacLehose Press');
INSERT INTO "publishers" VALUES(13,'Oneworld');
INSERT INTO "publishers" VALUES(14,'Peirene Press');
INSERT INTO "publishers" VALUES(15,'Picador');
INSERT INTO "publishers" VALUES(16,'Portobello Books');
INSERT INTO "publishers" VALUES(17,'Pushkin Press');
INSERT INTO "publishers" VALUES(18,'Riverrun');
INSERT INTO "publishers" VALUES(19,'Sandstone Press');
INSERT INTO "publishers" VALUES(20,'Scribe UK');
CREATE TABLE "ratings" (
    "book_id" INTEGER,
    "rating" INTEGER,
    FOREIGN KEY("book_id") REFERENCES "books"("id")
);
INSERT INTO "ratings" VALUES(1,3);
INSERT INTO "ratings" VALUES(1,5);
INSERT INTO "ratings" VALUES(2,4);
INSERT INTO "ratings" VALUES(1,3);
INSERT INTO "ratings" VALUES(5,3);
INSERT INTO "ratings" VALUES(1,4);
INSERT INTO "ratings" VALUES(9,4);
INSERT INTO "ratings" VALUES(1,4);
INSERT INTO "ratings" VALUES(1,4);
INSERT INTO "ratings" VALUES(1,4);
INSERT INTO "ratings" VALUES(12,5);
INSERT INTO "ratings" VALUES(13,5);
INSERT INTO "ratings" VALUES(6,3);
INSERT INTO "ratings" VALUES(18,4);
INSERT INTO "ratings" VALUES(4,3);
INSERT INTO "ratings" VALUES(5,3);
INSERT INTO "ratings" VALUES(6,3);
INSERT INTO "ratings" VALUES(17,4);
INSERT INTO "ratings" VALUES(6,4);
INSERT INTO "ratings" VALUES(1,4);
CREATE TABLE "translated" (
    "translator_id" INTEGER,
    "book_id" INTEGER,
    FOREIGN KEY("translator_id") REFERENCES "translators"("id"),
    FOREIGN KEY("book_id") REFERENCES "books"("id")
);
INSERT INTO "translated" VALUES(53,1);
INSERT INTO "translated" VALUES(33,2);
INSERT INTO "translated" VALUES(56,3);
INSERT INTO "translated" VALUES(29,4);
INSERT INTO "translated" VALUES(3,5);
INSERT INTO "translated" VALUES(12,6);
INSERT INTO "translated" VALUES(55,7);
CREATE TABLE "translators" (
    "id" INTEGER,
    "name" TEXT,
    PRIMARY KEY("id")
);
INSERT INTO "translators" VALUES(1,'Adrian Nathan West');
INSERT INTO "translators" VALUES(2,'Alison L. Strayer');
INSERT INTO "translators" VALUES(3,'Angela Rodel');
INSERT INTO "translators" VALUES(4,'Aniruddhan Vasudevan');
INSERT INTO "translators" VALUES(5,'Anna Moschovakis');
INSERT INTO "translators" VALUES(6,'Anne McLean');
INSERT INTO "translators" VALUES(7,'Annelise Finegan Wasmoen');
INSERT INTO "translators" VALUES(8,'Anton Hur');
INSERT INTO "translators" VALUES(9,'Antonia Lloyd-Jones');
INSERT INTO "translators" VALUES(10,'Camilo A. Ramirez');
COMMIT;
"""

In [3]:
file="longlist.db"
if os.path.exists(file):
    os.remove(file)
    
# Let's connect to a smaller version of the database used in CS50
conn = sqlite3.connect(file)
# Recreate the database from the SQL dump string
conn.executescript(longlist_db_string_format)
# Persist changes and close
conn.commit()
size_bytes = os.path.getsize(file)
print(f"Initial Size - Database {file}: {size_bytes / (1024**2):.2f} MB")

Initial Size - Database longlist.db: 0.03 MB


In [4]:
df=pd.read_sql_query(
    """
    SELECT *
    FROM sqlite_master;
    """,
    conn
)
df

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,authored,authored,2,"CREATE TABLE ""authored"" (\n ""author_id"" INT..."
1,table,authors,authors,3,"CREATE TABLE ""authors"" (\n ""id"" INTEGER,\n ..."
2,table,books,books,4,"CREATE TABLE ""books"" (\n ""id"" INTEGER,\n ..."
3,table,publishers,publishers,5,"CREATE TABLE ""publishers"" (\n ""id"" INTEGER,..."
4,table,ratings,ratings,6,"CREATE TABLE ""ratings"" (\n ""book_id"" INTEGE..."
5,table,translated,translated,7,"CREATE TABLE ""translated"" (\n ""translator_i..."
6,table,translators,translators,8,"CREATE TABLE ""translators"" (\n ""id"" INTEGER..."


In [5]:
for e in range(len(df)):
    print(df["sql"][e])

CREATE TABLE "authored" (
    "author_id" INTEGER,
    "book_id" INTEGER,
    FOREIGN KEY("author_id") REFERENCES "authors"("id"),
    FOREIGN KEY("book_id") REFERENCES "books"("id")
)
CREATE TABLE "authors" (
    "id" INTEGER,
    "name" TEXT,
    "country" TEXT,
    "birth" INTEGER,
    PRIMARY KEY("id")
)
CREATE TABLE "books" (
    "id" INTEGER,
    "isbn" TEXT,
    "title" TEXT,
    "publisher_id" INTEGER,
    "format" TEXT,
    "pages" INTEGER,
    "published" TEXT,
    "year" INTEGER,
    PRIMARY KEY("id"),
    FOREIGN KEY("publisher_id") REFERENCES "publishers"("id")
)
CREATE TABLE "publishers" (
    "id" INTEGER,
    "publisher" TEXT,
    PRIMARY KEY("id")
)
CREATE TABLE "ratings" (
    "book_id" INTEGER,
    "rating" INTEGER,
    FOREIGN KEY("book_id") REFERENCES "books"("id")
)
CREATE TABLE "translated" (
    "translator_id" INTEGER,
    "book_id" INTEGER,
    FOREIGN KEY("translator_id") REFERENCES "translators"("id"),
    FOREIGN KEY("book_id") REFERENCES "books"("id")
)
CR

## Simplifying

To select the books written by Fernanda Melchor, we would write this nested query.

In [6]:
#books written by Fernanda Melchor
pd.read_sql_query(
"""
SELECT "title"
FROM "books"
WHERE "id" IN (
    SELECT "book_id"
    FROM "authored"
    WHERE "author_id" = (
        SELECT "id"
        FROM "authors"
        WHERE "name" = 'Fernanda Melchor'
    )
);
""",
conn
)

Unnamed: 0,title
0,The Gospel According to the New World
1,The Birthday Party
2,Still Born


To simplify this, let us first use JOIN to create a view containing authors and their books

In [7]:
joined_tables=pd.read_sql_query(
"""
SELECT "name", "title" FROM "authors"
JOIN "authored" ON "authors"."id" = "authored"."author_id"
JOIN "books" ON "books"."id" = "authored"."book_id";
""",
conn
)
joined_tables

Unnamed: 0,name,title
0,Cheon Myeong-Kwan,Whale
1,Fernanda Melchor,The Gospel According to the New World
2,Andrey Kurkov,Jimi Hendrix Live in Lviv
3,Fernanda Melchor,The Birthday Party
4,Clemens Meyer,While We Were Dreaming
5,Fernanda Melchor,Still Born
6,Amanda Svensson,A System So Magnificent It Is Blinding
7,Claudia Piñeiro,Elena Knows
8,David Grossman,More Than I Love My Life


## CREATE VIEW ___ AS + DROP VIEW IF EXISTS


To save the virtual table created in the previous step as a view, we need to change the query.

In [8]:
conn.execute('''
DROP VIEW IF EXISTS "view_author_and_title";
''')

conn.execute('''
CREATE VIEW "view_author_and_title" AS
SELECT "authors"."name", "books"."title"
FROM "authors"
JOIN "authored" ON "authors"."id" = "authored"."author_id"
JOIN "books" ON "books"."id" = "authored"."book_id";
''')

conn.commit()
pd.read_sql_query(
    """
    SELECT *
    FROM sqlite_master
    WHERE "type"='view';
    """,
    conn
)


Unnamed: 0,type,name,tbl_name,rootpage,sql
0,view,view_author_and_title,view_author_and_title,0,"CREATE VIEW ""view_author_and_title"" AS\nSELECT..."


Using this view, we can considerably simplify the query needed to find the books written by Fernanda Melchor

In [9]:
simplified_query_with_view = pd.read_sql_query(
'''
SELECT "title"
FROM "view_author_and_title"
WHERE "name" = 'Fernanda Melchor';
''',
conn
)
simplified_query_with_view


Unnamed: 0,title
0,The Gospel According to the New World
1,The Birthday Party
2,Still Born


A view, being a virtual table, does not consume much more disk space to create. The data within a view is still stored in the underlying tables, but still accessible through this simplfied view.


## Aggregating

In lecture 1, we saw how to find the average rating of every book, rounded to 2 decimal places.

In [10]:
pd.read_sql_query(
"""
SELECT "book_id", ROUND(AVG("rating"), 2) AS "average rating" 
FROM "ratings"
GROUP BY "book_id";
""",
conn
)

Unnamed: 0,book_id,average rating
0,1,3.88
1,2,4.0
2,4,3.0
3,5,3.0
4,6,3.33
5,9,4.0
6,12,5.0
7,13,5.0
8,17,4.0
9,18,4.0


We could also display the title and year of every book
> **Note:** SQL clauses must be written in a specific syntactic order.The correct order is:
>
> `SELECT → FROM → JOIN → WHERE → GROUP BY → HAVING → ORDER BY → LIMIT`

In [11]:
pd.read_sql_query(
"""
SELECT 
    "book_id", 
    "title", 
    "year", 
    ROUND(AVG("rating"), 2) AS "average rating" 
FROM "ratings"
JOIN "books" ON "ratings"."book_id" = "books"."id"
GROUP BY "book_id";
""",
conn
)

Unnamed: 0,book_id,title,year,average rating
0,1,Boulder,2023,3.88
1,2,Whale,2023,4.0
2,4,Standing Heavy,2023,3.0
3,5,Time Shelter,2023,3.0
4,6,Is Mother Dead,2023,3.33
5,9,While We Were Dreaming,2023,4.0
6,12,A System So Magnificent It Is Blinding,2023,5.0
7,13,Ninth Building,2023,5.0
8,17,"Happy Stories, Mostly",2022,4.0
9,18,Elena Knows,2022,4.0


This aggregated data can be stored in a view:

In [12]:
conn.execute('''
DROP VIEW IF EXISTS "view_book_ratings";
''')

conn.execute('''
CREATE VIEW "view_book_ratings" AS
SELECT 
    "book_id" AS "id", 
    "title", 
    "year", 
    ROUND(AVG("rating"), 2) AS "average rating" 
FROM "ratings"
JOIN "books" ON "ratings"."book_id" = "books"."id"
GROUP BY "book_id";
''')

conn.commit()
df=pd.read_sql_query(
    """
    SELECT *
    FROM sqlite_master;
    """,
    conn
)
df

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,authored,authored,2,"CREATE TABLE ""authored"" (\n ""author_id"" INT..."
1,table,authors,authors,3,"CREATE TABLE ""authors"" (\n ""id"" INTEGER,\n ..."
2,table,books,books,4,"CREATE TABLE ""books"" (\n ""id"" INTEGER,\n ..."
3,table,publishers,publishers,5,"CREATE TABLE ""publishers"" (\n ""id"" INTEGER,..."
4,table,ratings,ratings,6,"CREATE TABLE ""ratings"" (\n ""book_id"" INTEGE..."
5,table,translated,translated,7,"CREATE TABLE ""translated"" (\n ""translator_i..."
6,table,translators,translators,8,"CREATE TABLE ""translators"" (\n ""id"" INTEGER..."
7,view,view_author_and_title,view_author_and_title,0,"CREATE VIEW ""view_author_and_title"" AS\nSELECT..."
8,view,view_book_ratings,view_book_ratings,0,"CREATE VIEW ""view_book_ratings"" AS\nSELECT \n ..."


Each time a view is created, it gets added to the schema. We can verify this by running `.schema` to observe that `view_author_and_title` and `view_book_ratings` are now part of this database’s schema.

In [13]:
aggregating_query_with_view = pd.read_sql_query(
'''
SELECT * FROM "view_book_ratings";
''',
conn
)
aggregating_query_with_view

Unnamed: 0,id,title,year,average rating
0,1,Boulder,2023,3.88
1,2,Whale,2023,4.0
2,4,Standing Heavy,2023,3.0
3,5,Time Shelter,2023,3.0
4,6,Is Mother Dead,2023,3.33
5,9,While We Were Dreaming,2023,4.0
6,12,A System So Magnificent It Is Blinding,2023,5.0
7,13,Ninth Building,2023,5.0
8,17,"Happy Stories, Mostly",2022,4.0
9,18,Elena Knows,2022,4.0


In [14]:
pd.read_sql_query(
"""
PRAGMA table_info("view_book_ratings");
""",
conn
)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,INTEGER,0,,0
1,1,title,TEXT,0,,0
2,2,year,INTEGER,0,,0
3,3,average rating,,0,,0


## CREATE TEMPORARY VIEW ___ AS and DROP VIEW IF EXISTS

To create temporary views that are not stored in the database schema, we can use CREATE TEMPORARY VIEW. 

This command creates a view that exists only for the duration of our connection with the database.



To find the average rating of books per year, we can use the view we already created and store the results in a temporary view:

In [15]:
conn.execute("""
DROP VIEW IF EXISTS "average_ratings_by_year";
"""
)

conn.execute("""
CREATE TEMPORARY VIEW "average_ratings_by_year" AS
SELECT 
    "year", 
    AVG("average rating") AS "rating" 
FROM "view_book_ratings" 
GROUP BY "year";
""")

conn.commit()
pd.read_sql_query(
    """
    SELECT *
    FROM sqlite_master;
    """,
    conn
)


Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,authored,authored,2,"CREATE TABLE ""authored"" (\n ""author_id"" INT..."
1,table,authors,authors,3,"CREATE TABLE ""authors"" (\n ""id"" INTEGER,\n ..."
2,table,books,books,4,"CREATE TABLE ""books"" (\n ""id"" INTEGER,\n ..."
3,table,publishers,publishers,5,"CREATE TABLE ""publishers"" (\n ""id"" INTEGER,..."
4,table,ratings,ratings,6,"CREATE TABLE ""ratings"" (\n ""book_id"" INTEGE..."
5,table,translated,translated,7,"CREATE TABLE ""translated"" (\n ""translator_i..."
6,table,translators,translators,8,"CREATE TABLE ""translators"" (\n ""id"" INTEGER..."
7,view,view_author_and_title,view_author_and_title,0,"CREATE VIEW ""view_author_and_title"" AS\nSELECT..."
8,view,view_book_ratings,view_book_ratings,0,"CREATE VIEW ""view_book_ratings"" AS\nSELECT \n ..."


In [16]:
pd.read_sql_query(
"""
SELECT *
FROM "sqlite_temp_master";
""",
conn
)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,view,average_ratings_by_year,average_ratings_by_year,0,"CREATE VIEW ""average_ratings_by_year"" AS\nSELE..."


In [17]:
pd.read_sql_query(
"""
SELECT *
FROM "temp"."average_ratings_by_year";
""",
conn
)


Unnamed: 0,year,rating
0,2022,4.0
1,2023,3.90125


## Common Table Expression (CTE)

- A regular view exists forever in our database schema. 
- A temporary view exists for the duration of our connection with the database. 
- A CTE is a view that exists for a single query alone.


Let us recreate the view containing average book ratings per year using a CTE instead of a temporary view. 

In [18]:
conn.execute("""
DROP VIEW IF EXISTS "average_book_ratings";
"""
)

<sqlite3.Cursor at 0x277a72d5a40>

Next, we create a CTE containing the average ratings per book. We then use the average ratings per book to calculate the average ratings per year, in much the same way as we did before.

## WITH ___ AS - CTE

In [19]:
pd.read_sql_query("""
WITH "average_book_ratings" AS (
    SELECT 
        "book_id", 
        "title", 
        "year", 
        ROUND(AVG("rating"), 2) AS "rating" 
    FROM "ratings"
    JOIN "books" ON "ratings"."book_id" = "books"."id"
    GROUP BY "book_id"
) 
             
SELECT "year", ROUND(AVG("rating"), 2) AS "rating" FROM "average_book_ratings"
GROUP BY "year";
"""
, conn
)

Unnamed: 0,year,rating
0,2022,4.0
1,2023,3.9


## Partitioning

Views can be used to partition data. Let us create a view to store books longlisted in 2023.

In [20]:
conn.execute("""
DROP VIEW IF EXISTS "2023";
"""
)
conn.execute("""
CREATE VIEW "2023" AS
SELECT "id", "title" FROM "books"
WHERE "year" = 2023;
""")

conn.commit()
pd.read_sql_query(
    """
    SELECT * FROM "2023";
    """,
    conn
)

Unnamed: 0,id,title
0,1,Boulder
1,2,Whale
2,3,The Gospel According to the New World
3,4,Standing Heavy
4,5,Time Shelter
5,6,Is Mother Dead
6,7,Jimi Hendrix Live in Lviv
7,8,The Birthday Party
8,9,While We Were Dreaming
9,10,Pyre


In [21]:
conn.close()

## Securing

Views can be used to enhance database security by limiting access to certain data. Consider a rideshare company’s database with a table rides that looks like the following:



In [22]:
rideshare_db_string_format="""
BEGIN TRANSACTION;
CREATE TABLE "rides" (
    "id" INTEGER,
    "origin" TEXT NOT NULL,
    "destination" INTEGER NOT NULL,
    "rider" TEXT NOT NULL,
    PRIMARY KEY("id")
);
INSERT INTO "rides" VALUES(1,'Good Egg Galaxy','Honeyhive Galaxy','Peach');
INSERT INTO "rides" VALUES(2,'Castle Courtyard','Cascade Kingdom','Mario');
INSERT INTO "rides" VALUES(3,'Metro Kingdom','Mushroom Kingdom','Luigi');
INSERT INTO "rides" VALUES(4,'Seaside Kingdom','Deep Woods','Bowser');
COMMIT;
"""

file="rideshare.db"
if os.path.exists(file):
    os.remove(file)
    
# Let's connect to a smaller version of the database used in CS50
conn = sqlite3.connect(file)
# Recreate the database from the SQL dump string
conn.executescript(rideshare_db_string_format)
# Persist changes and close
conn.commit()

pd.read_sql_query(
    """
    SELECT * FROM "rides";
    """,
    conn
)

Unnamed: 0,id,origin,destination,rider
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach
1,2,Castle Courtyard,Cascade Kingdom,Mario
2,3,Metro Kingdom,Mushroom Kingdom,Luigi
3,4,Seaside Kingdom,Deep Woods,Bowser


Rider names are likely categorized as Personally Identifiable Information (PII) which companies are not allowed to share indiscriminately. Views can be handy in this situation — we can share with the analyst a view containing the origin and destination of rides, but not the rider names:

In [23]:
conn.execute("""
CREATE VIEW "analysis" AS
SELECT "origin", "destination", 'Anonymous' AS "rider" 
FROM "rides";
""")

conn.commit()
pd.read_sql_query(
    """
    SELECT * FROM "analysis";
    """,
    conn
)

Unnamed: 0,origin,destination,rider
0,Good Egg Galaxy,Honeyhive Galaxy,Anonymous
1,Castle Courtyard,Cascade Kingdom,Anonymous
2,Metro Kingdom,Mushroom Kingdom,Anonymous
3,Seaside Kingdom,Deep Woods,Anonymous


## TRIGGER + INSTEAD OF + VIEW and Soft Deletions

It is not possible to insert data into or delete data from a view. However, we can set up a trigger that inserts into or deletes from the underlying table! The `INSTEAD OF` trigger allows us to do this.

In [24]:
conn.executescript("""    
ALTER TABLE "rides" 
ADD COLUMN "deleted" INTEGER DEFAULT 0;
                       
ALTER TABLE "rides" 
RENAME COLUMN "rider" TO "driver";
                   
CREATE VIEW "current_rides" AS
SELECT "id","origin","destination","driver"
FROM "rides" 
WHERE "deleted" = 0;
                   
CREATE TRIGGER "delete"
INSTEAD OF DELETE ON "current_rides"
FOR EACH ROW
BEGIN
    UPDATE "rides" SET "deleted" = 1 
    WHERE "id" = OLD."id";
END;
                                
""")

conn.commit()
pd.read_sql_query(
    """
    SELECT * FROM "sqlite_master";
    """,
    conn
)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,rides,rides,2,"CREATE TABLE ""rides"" (\n ""id"" INTEGER,\n ..."
1,view,analysis,analysis,0,"CREATE VIEW ""analysis"" AS\nSELECT ""origin"", ""d..."
2,view,current_rides,current_rides,0,"CREATE VIEW ""current_rides"" AS\nSELECT ""id"",""o..."
3,trigger,delete,current_rides,0,"CREATE TRIGGER ""delete""\nINSTEAD OF DELETE ON ..."


In [25]:
pd.read_sql_query(
    """
    SELECT * FROM "rides";
    """,
    conn
)

Unnamed: 0,id,origin,destination,driver,deleted
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach,0
1,2,Castle Courtyard,Cascade Kingdom,Mario,0
2,3,Metro Kingdom,Mushroom Kingdom,Luigi,0
3,4,Seaside Kingdom,Deep Woods,Bowser,0


Let's test the trigger:

In [26]:
conn.execute("""    
DELETE FROM "current_rides" 
WHERE "driver" = 'Mario';                               
""")
conn.commit()

Every time we try to delete rows from the view, this trigger will instead update the deleted column of the row in the underlying table collections, thus completing the soft deletion.

In [27]:
pd.read_sql_query(
    """
    SELECT * FROM "current_rides";
    """,
    conn
)

Unnamed: 0,id,origin,destination,driver
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach
1,3,Metro Kingdom,Mushroom Kingdom,Luigi
2,4,Seaside Kingdom,Deep Woods,Bowser


In [28]:
pd.read_sql_query(
    """
    SELECT * FROM "rides";
    """,
    conn
)

Unnamed: 0,id,origin,destination,driver,deleted
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach,0
1,2,Castle Courtyard,Cascade Kingdom,Mario,1
2,3,Metro Kingdom,Mushroom Kingdom,Luigi,0
3,4,Seaside Kingdom,Deep Woods,Bowser,0


## TRIGGER + INSTEAD OF + VIEW + WHEN 

Similarly, we can create a trigger that inserts data into the underlying table when we try to insert it into a view.

There are 2 situations to consider here.

1. We could be trying to insert into a view a row that already exists in the underlying table, but was soft deleted.In this situation, We can write the following trigger to handle it:

In [29]:
conn.executescript("""                      
CREATE TRIGGER "insert_when_exists"
INSTEAD OF INSERT ON "current_rides"
FOR EACH ROW 
WHEN NEW."driver" IN (
    SELECT "driver" FROM "rides"
)
BEGIN
    UPDATE "rides" 
    SET "deleted" = 0 
    WHERE "driver" = NEW."driver";
END;
                                
""")

conn.commit()
pd.read_sql_query(
    """
    SELECT * FROM "sqlite_master";
    """,
    conn
)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,rides,rides,2,"CREATE TABLE ""rides"" (\n ""id"" INTEGER,\n ..."
1,view,analysis,analysis,0,"CREATE VIEW ""analysis"" AS\nSELECT ""origin"", ""d..."
2,view,current_rides,current_rides,0,"CREATE VIEW ""current_rides"" AS\nSELECT ""id"",""o..."
3,trigger,delete,current_rides,0,"CREATE TRIGGER ""delete""\nINSTEAD OF DELETE ON ..."
4,trigger,insert_when_exists,current_rides,0,"CREATE TRIGGER ""insert_when_exists""\nINSTEAD O..."


In [30]:
conn.execute("""    
    INSERT INTO "current_rides" ("id","origin","destination","driver")
    VALUES (2,'Castle Courtyard','Cascade Kingdom','Mario');                             
""")
conn.commit()

In [31]:
pd.read_sql_query(
    """
    SELECT * FROM "current_rides";
    """,
    conn
)


Unnamed: 0,id,origin,destination,driver
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach
1,2,Castle Courtyard,Cascade Kingdom,Mario
2,3,Metro Kingdom,Mushroom Kingdom,Luigi
3,4,Seaside Kingdom,Deep Woods,Bowser


In [32]:
pd.read_sql_query(
    """
    SELECT * FROM "rides";
    """,
    conn
)

Unnamed: 0,id,origin,destination,driver,deleted
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach,0
1,2,Castle Courtyard,Cascade Kingdom,Mario,0
2,3,Metro Kingdom,Mushroom Kingdom,Luigi,0
3,4,Seaside Kingdom,Deep Woods,Bowser,0


2. The second situation occurs when we are trying to insert a row that does not exist in the underlying table. The following trigger handles this situation:

In [33]:
conn.executescript("""
DROP TRIGGER IF EXISTS "insert_when_new";
                                                            
CREATE TRIGGER "insert_when_new"
INSTEAD OF INSERT ON "current_rides"
FOR EACH ROW 
WHEN NEW."driver" NOT IN (
    SELECT "driver" FROM "rides"
)
BEGIN
    INSERT INTO "rides" ("origin","destination","driver")
    VALUES (NEW."origin", NEW."destination", NEW."driver");
END;                            
""")

conn.commit()
pd.read_sql_query(
    """
    SELECT * FROM "sqlite_master";
    """,
    conn
)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,rides,rides,2,"CREATE TABLE ""rides"" (\n ""id"" INTEGER,\n ..."
1,view,analysis,analysis,0,"CREATE VIEW ""analysis"" AS\nSELECT ""origin"", ""d..."
2,view,current_rides,current_rides,0,"CREATE VIEW ""current_rides"" AS\nSELECT ""id"",""o..."
3,trigger,delete,current_rides,0,"CREATE TRIGGER ""delete""\nINSTEAD OF DELETE ON ..."
4,trigger,insert_when_exists,current_rides,0,"CREATE TRIGGER ""insert_when_exists""\nINSTEAD O..."
5,trigger,insert_when_new,current_rides,0,"CREATE TRIGGER ""insert_when_new""\nINSTEAD OF I..."


In [34]:
conn.execute("""    
    INSERT INTO "current_rides"("origin","destination","driver")
    VALUES ('Castle','Cascade','Donkey Kong');                             
""")
conn.commit()

In [35]:
pd.read_sql_query(
    """
    SELECT * FROM "current_rides";
    """,
    conn
)

Unnamed: 0,id,origin,destination,driver
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach
1,2,Castle Courtyard,Cascade Kingdom,Mario
2,3,Metro Kingdom,Mushroom Kingdom,Luigi
3,4,Seaside Kingdom,Deep Woods,Bowser
4,5,Castle,Cascade,Donkey Kong


In [36]:
pd.read_sql_query(
    """
    SELECT * FROM "rides";
    """,
    conn
)

Unnamed: 0,id,origin,destination,driver,deleted
0,1,Good Egg Galaxy,Honeyhive Galaxy,Peach,0
1,2,Castle Courtyard,Cascade Kingdom,Mario,0
2,3,Metro Kingdom,Mushroom Kingdom,Luigi,0
3,4,Seaside Kingdom,Deep Woods,Bowser,0
4,5,Castle,Cascade,Donkey Kong,0


In [37]:
conn.close()