# SQL Tutorial 4: Fill in the Blanks

-   Let's create a very small *in-memory database*

In [3]:
%load_ext sql
%config SqlMagic.autolimit = 0
%config SqlMagic.displaylimit = 0
%sql sqlite://

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [4]:
%%sql
drop table if exists person;
create table person(
    person_id integer not null primary key,
    person_name text not null
);

drop table if exists job;
create table job(
    job_id integer not null primary key,
    job_name text not null
);

drop table if exists did;
create table did(
    person_fk integer not null,
    job_fk integer not null
);

insert into person values (1, "Alice"), (2, "Bob");
insert into job values (20, "cook"), (30, "clean");
insert into did values(1, 20), (1, 30), (2, 20);

In [5]:
%%sql
select * from person;

person_id,person_name
1,Alice
2,Bob


In [7]:
%%sql
    select * from job;

job_id,job_name
20,cook
30,clean


In [8]:
%%sql
select * from did;

person_fk,job_fk
1,20
1,30
2,20


-   We want a table that looks like this (with a count of 0 for Bob cleaning)

| person_name | job_name | num |
|-------------|----------|-----|
| Alice       | clean    | 1   |
| Alice       | cook     | 1   |
| Bob         | clean    | 0   |
| Bob         | cook     | 1   |

-   Let's use the join table `did` as in the previous lesson
-   The full join gives us 2×2×3=12 rows

In [9]:
%%sql
select *
from person join did join job;

person_id,person_name,person_fk,job_fk,job_id,job_name
1,Alice,1,20,20,cook
1,Alice,1,20,30,clean
1,Alice,1,30,20,cook
1,Alice,1,30,30,clean
1,Alice,2,20,20,cook
1,Alice,2,20,30,clean
2,Bob,1,20,20,cook
2,Bob,1,20,30,clean
2,Bob,1,30,20,cook
2,Bob,1,30,30,clean


-   Filter to keep only the rows that make sense (`person_id` matches `person_fk`, `job_id` matches `job_fk`), then count
-   Only gives us three rows

In [23]:
%%sql
select person_name, job_name, count(*)
from person join did join job
on person_id = person_fk and job_fk = job_id
group by person_id, job_id
order by person_id, job_id;

person_name,job_name,count(*)
Alice,cook,1
Alice,clean,1
Bob,cook,1


> Since all the columns in this database have distinct names,
> I'm saving a bit of typing by using `column` rather than `table.column`.
> Doing this is unsafe, and I should be ashamed of myself.

-   Trick is to join `person` and `job` first to create all (person, job) pairs
-   And then *left join* that with `did` to create a table with blanks

In [24]:
%%sql
select *
from
    (person cross join job)
    left join did on person_id = person_fk and job_id = job_fk;

person_id,person_name,job_id,job_name,person_fk,job_fk
1,Alice,20,cook,1.0,20.0
1,Alice,30,clean,1.0,30.0
2,Bob,20,cook,2.0,20.0
2,Bob,30,clean,,


-   If we `count(*)` we just get the number of rows again

In [25]:
%%sql
select person_name, job_name, count(*) as num
from
    (person cross join job)
    left join did on person_id = person_fk and job_id = job_fk
group by person_name, job_name
order by person_name, job_name;

person_name,job_name,num
Alice,clean,1
Alice,cook,1
Bob,clean,1
Bob,cook,1


-   But if we count the column containing `None`, we get 0 where we want

In [26]:
%%sql
select person_name, job_name, count(person_fk) as num
from
    (person cross join job)
    left join did on person_id = person_fk and job_id = job_fk
group by person_name, job_name
order by person_name, job_name;

person_name,job_name,num
Alice,clean,1
Alice,cook,1
Bob,clean,0
Bob,cook,1


-   We can make this a little clearer with a *common table expression* (CTE)
    -   Like a temporary convenience variable in a program

In [15]:
%%sql
with person_job as (
    select person_id, person_name, job_id, job_name
    from person cross join job
)
select * from person_job;

person_id,person_name,job_id,job_name
1,Alice,20,cook
1,Alice,30,clean
2,Bob,20,cook
2,Bob,30,clean


-   Let's use that

In [21]:
%%sql
with
person_job as (
    select person_id, person_name, job_id, job_name
    from person cross join job
)
select person_name, job_name, count(job_fk) as num
from person_job left join did
on person_job.person_id = did.person_fk and person_job.job_id = did.job_fk;

person_name,job_name,num
Alice,cook,3


-   Whoops: we forgot the `group by`, so the database gets to pick whatever row of output it wants
-   Let's try again

In [22]:
%%sql
with
person_job as (
    select person_id, person_name, job_id, job_name
    from person cross join job
)
select person_name, job_name, count(job_fk) as num
from person_job left join did
on person_id = person_fk and job_id = job_fk
group by person_name, job_name
order by person_name, job_name;

person_name,job_name,num
Alice,clean,1
Alice,cook,1
Bob,clean,0
Bob,cook,1
