<img src="images/banner.png" style="width: 100%;">

# Lecture Demonstrations Notebook

In [1]:
import pandas as pd
import sqlalchemy as sqla

## 0 Creating the SQL Tables

The SQL scripts for creating the tables for this example is located at `sql/create_frieren.sql`. Run the code cell below to create the tables which will be saved on a db file at `data/frieren_characters.db`.

In [2]:
!sqlite3 data/frieren_characters.db < sql/create_frieren.sql

Now, let's establish a connection with this database using `sqlalchemy`:

In [3]:
db = sqla.create_engine('sqlite:///data/frieren_characters.db')

## 1 Inspecting the `journeys_end` and `demon_king` tables

We perform queries on the database to inspect the `journeys_end` and `demon_king` tables in the database. We also save them onto a variable so that we can demonstrate how to perform the same join using `pandas.merge`.

In [4]:
query = """
        SELECT *
        FROM journeys_end
        """
journeys_end = pd.read_sql(query, db)
journeys_end

Unnamed: 0,name,str,dex,int
0,Frieren,3,2,5
1,Fern,2,2,4
2,Stark,4,4,1


In [5]:
query = """
        SELECT *
        FROM demon_king
        """
demon_king = pd.read_sql(query, db)
demon_king

Unnamed: 0,name,str,dex,int
0,Frieren,3,2,5
1,Himmel,4,4,2
2,Heiter,2,2,4
3,Eisen,5,4,3


## 2 Using SQL Joins

### Inner Join

In [6]:
query = """
        SELECT *
        FROM journeys_end AS je
        INNER JOIN demon_king AS dk
            ON je.int = dk.int
        """
pd.read_sql(query, db)

Unnamed: 0,name,str,dex,int,name.1,str.1,dex.1,int.1
0,Frieren,3,2,5,Frieren,3,2,5
1,Fern,2,2,4,Heiter,2,2,4


In [7]:
pd.merge(journeys_end, demon_king, how='inner', on='int',
         suffixes=('_je', '_dk'))

Unnamed: 0,name_je,str_je,dex_je,int,name_dk,str_dk,dex_dk
0,Frieren,3,2,5,Frieren,3,2
1,Fern,2,2,4,Heiter,2,2


### Left Join

In [8]:
query = """
        SELECT *
        FROM journeys_end AS je
        LEFT JOIN demon_king AS dk
            ON je.int = dk.int
        """
pd.read_sql(query, db)

Unnamed: 0,name,str,dex,int,name.1,str.1,dex.1,int.1
0,Frieren,3,2,5,Frieren,3.0,2.0,5.0
1,Fern,2,2,4,Heiter,2.0,2.0,4.0
2,Stark,4,4,1,,,,


In [9]:
pd.merge(journeys_end, demon_king, how='left', on='int',
         suffixes=('_je', '_dk'))

Unnamed: 0,name_je,str_je,dex_je,int,name_dk,str_dk,dex_dk
0,Frieren,3,2,5,Frieren,3.0,2.0
1,Fern,2,2,4,Heiter,2.0,2.0
2,Stark,4,4,1,,,


### Right Join

In [10]:
query = """
        SELECT *
        FROM journeys_end AS je
        RIGHT JOIN demon_king AS dk
            ON je.int = dk.int
        """
pd.read_sql(query, db)

Unnamed: 0,name,str,dex,int,name.1,str.1,dex.1,int.1
0,Frieren,3.0,2.0,5.0,Frieren,3,2,5
1,Fern,2.0,2.0,4.0,Heiter,2,2,4
2,,,,,Himmel,4,4,2
3,,,,,Eisen,5,4,3


In [11]:
pd.merge(journeys_end, demon_king, how='right', on='int',
         suffixes=('_je', '_dk'))

Unnamed: 0,name_je,str_je,dex_je,int,name_dk,str_dk,dex_dk
0,Frieren,3.0,2.0,5,Frieren,3,2
1,,,,2,Himmel,4,4
2,Fern,2.0,2.0,4,Heiter,2,2
3,,,,3,Eisen,5,4


### Full Outer Join

In [12]:
query = """
        SELECT *
        FROM journeys_end AS je
        FULL OUTER JOIN demon_king AS dk
            ON je.int = dk.int
        """
pd.read_sql(query, db)

Unnamed: 0,name,str,dex,int,name.1,str.1,dex.1,int.1
0,Frieren,3.0,2.0,5.0,Frieren,3.0,2.0,5.0
1,Fern,2.0,2.0,4.0,Heiter,2.0,2.0,4.0
2,Stark,4.0,4.0,1.0,,,,
3,,,,,Himmel,4.0,4.0,2.0
4,,,,,Eisen,5.0,4.0,3.0


In [13]:
pd.merge(journeys_end, demon_king, how='outer', on='int',
         suffixes=('_je', '_dk'))

Unnamed: 0,name_je,str_je,dex_je,int,name_dk,str_dk,dex_dk
0,Stark,4.0,4.0,1,,,
1,,,,2,Himmel,4.0,4.0
2,,,,3,Eisen,5.0,4.0
3,Fern,2.0,2.0,4,Heiter,2.0,2.0
4,Frieren,3.0,2.0,5,Frieren,3.0,2.0


## 3 Other Types of Joins

### Cross Joins

This just pairs each row of the left table to each row of the right table.

In [14]:
query = """
        SELECT *
        FROM journeys_end AS je
        CROSS JOIN demon_king AS dk
        """
pd.read_sql(query, db)

Unnamed: 0,name,str,dex,int,name.1,str.1,dex.1,int.1
0,Frieren,3,2,5,Frieren,3,2,5
1,Frieren,3,2,5,Himmel,4,4,2
2,Frieren,3,2,5,Heiter,2,2,4
3,Frieren,3,2,5,Eisen,5,4,3
4,Fern,2,2,4,Frieren,3,2,5
5,Fern,2,2,4,Himmel,4,4,2
6,Fern,2,2,4,Heiter,2,2,4
7,Fern,2,2,4,Eisen,5,4,3
8,Stark,4,4,1,Frieren,3,2,5
9,Stark,4,4,1,Himmel,4,4,2


In [15]:
 pd.merge(journeys_end, demon_king, how='cross', suffixes=('_je', '_dk'))

Unnamed: 0,name_je,str_je,dex_je,int_je,name_dk,str_dk,dex_dk,int_dk
0,Frieren,3,2,5,Frieren,3,2,5
1,Frieren,3,2,5,Himmel,4,4,2
2,Frieren,3,2,5,Heiter,2,2,4
3,Frieren,3,2,5,Eisen,5,4,3
4,Fern,2,2,4,Frieren,3,2,5
5,Fern,2,2,4,Himmel,4,4,2
6,Fern,2,2,4,Heiter,2,2,4
7,Fern,2,2,4,Eisen,5,4,3
8,Stark,4,4,1,Frieren,3,2,5
9,Stark,4,4,1,Himmel,4,4,2


### Using conditions other than equality

In [16]:
query = """
        SELECT *
        FROM journeys_end AS je
        FULL OUTER JOIN demon_king AS dk
            ON je.int <= dk.int
        """
pd.read_sql(query, db)

Unnamed: 0,name,str,dex,int,name.1,str.1,dex.1,int.1
0,Frieren,3,2,5,Frieren,3,2,5
1,Fern,2,2,4,Frieren,3,2,5
2,Fern,2,2,4,Heiter,2,2,4
3,Stark,4,4,1,Frieren,3,2,5
4,Stark,4,4,1,Himmel,4,4,2
5,Stark,4,4,1,Heiter,2,2,4
6,Stark,4,4,1,Eisen,5,4,3


In [17]:
res = pd.merge(journeys_end, demon_king, how='cross', suffixes=('_je', '_dk'))
res.loc[res.int_je <= res.int_dk].reset_index(drop=True)

Unnamed: 0,name_je,str_je,dex_je,int_je,name_dk,str_dk,dex_dk,int_dk
0,Frieren,3,2,5,Frieren,3,2,5
1,Fern,2,2,4,Frieren,3,2,5
2,Fern,2,2,4,Heiter,2,2,4
3,Stark,4,4,1,Frieren,3,2,5
4,Stark,4,4,1,Himmel,4,4,2
5,Stark,4,4,1,Heiter,2,2,4
6,Stark,4,4,1,Eisen,5,4,3


### Self Joins

In some cases, joining a table with itself is desired. In which case, we just need to properly specify the aliases of the table to prevent ambiguities.

In [18]:
query = """
        SELECT *
        FROM journeys_end AS je1
        LEFT JOIN journeys_end AS je2
            ON je1.int <= je2.int
        """
pd.read_sql(query, db)

Unnamed: 0,name,str,dex,int,name.1,str.1,dex.1,int.1
0,Frieren,3,2,5,Frieren,3,2,5
1,Fern,2,2,4,Frieren,3,2,5
2,Fern,2,2,4,Fern,2,2,4
3,Stark,4,4,1,Frieren,3,2,5
4,Stark,4,4,1,Fern,2,2,4
5,Stark,4,4,1,Stark,4,4,1


In [19]:
res = pd.merge(journeys_end, journeys_end, how='cross', suffixes=('_je', '_dk'))
res.loc[res.int_je <= res.int_dk].reset_index(drop=True)

Unnamed: 0,name_je,str_je,dex_je,int_je,name_dk,str_dk,dex_dk,int_dk
0,Frieren,3,2,5,Frieren,3,2,5
1,Fern,2,2,4,Frieren,3,2,5
2,Fern,2,2,4,Fern,2,2,4
3,Stark,4,4,1,Frieren,3,2,5
4,Stark,4,4,1,Fern,2,2,4
5,Stark,4,4,1,Stark,4,4,1


## 3 Using `GROUP BY`

Here we show how `GROUP BY` works from the example in the lecture. Let's first inspect the `characters` table.

In [20]:
query = """
        SELECT *
        FROM characters
        """
pd.read_sql(query, db)

Unnamed: 0,name,class,str,dex,int
0,Frieren,Mage,3,2,5
1,Himmel,Paladin,4,4,2
2,Eisen,Barbarian,5,4,3
3,Fern,Mage,2,2,4
4,Stark,Barbarian,4,4,2
5,Flamme,Mage,3,3,5


Here we have the character names, class, and attributes of each characters. We can summarize this table using the following query for data aggregation:

In [21]:
query = """
        SELECT
            AVG(str) AS avg_str,
            AVG(dex) AS avg_dex,
            AVG(int) AS avg_int
        FROM characters
        """
pd.read_sql(query, db)

Unnamed: 0,avg_str,avg_dex,avg_int
0,3.5,3.166667,3.5


If we want to summarize the same table in a more specific level, say according to the `class`, we can use the `GROUP BY` clause.

In [22]:
query = """
        SELECT
            class,
            AVG(str) AS avg_str,
            AVG(dex) AS avg_dex,
            AVG(int) AS avg_int
        FROM characters
        GROUP BY class
        ORDER BY COUNT(*) DESC
        """
pd.read_sql(query, db)

Unnamed: 0,class,avg_str,avg_dex,avg_int
0,Mage,2.666667,2.333333,4.666667
1,Barbarian,4.5,4.0,2.5
2,Paladin,4.0,4.0,2.0


<img src="images/banner-down.png" style="width: 100%;">