In [16]:
%reload_ext sql

#Connect the notebook with the database
%sql postgresql://user:password@localhost:5432/postgres

### For these exercises you have to use the students table that was created in the notebooks 02, 04 and 06


In [17]:
%%sql 
SELECT * FROM students;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


student_id,name,age,major
1,Alice,20,Computer Science
2,Bob,22,Economics
3,Charlie,19,Chemistry
4,Diana,21,Computer Science
5,Eve,18,
6,Mark,25,History


In [18]:
%%sql
SELECT * FROM alumni;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


alumni_id,name,graduation_year,major
7,Grace,2019,Arts
8,Hannah,2021,History
9,Ian,2020,Biology
10,Diana,2023,Biology
11,Alice,2022,Civil Engineering
12,Frank,2020,Social Work


In [19]:
%%sql
SELECT * FROM mentorship;

 * postgresql://user:***@localhost:5432/postgres
7 rows affected.


mentor_name,mentee_name,mentee_major
Alice,Bob,Economics
Bob,Charlie,Chemistry
Hannah,Diana,Computer Science
Grace,Mark,History
Alice,Grace,Arts
Frank,Ian,Biology
Eve,Frank,Social Work


### 1. List all students with their age and show their rank (1 = youngest). Use a ranking function.

In [22]:
%%sql
SELECT name, age, RANK() OVER(ORDER BY age ASC) AS rank_position
FROM students;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


name,age,rank_position
Eve,18,1
Charlie,19,2
Alice,20,3
Diana,21,4
Bob,22,5
Mark,25,6


### 2. List alumni with their graduation year and assign a dense rank (1 = earliest year).

In [24]:
%%sql
SELECT name, graduation_year, DENSE_RANK() OVER(ORDER BY graduation_year DESC)
FROM alumni;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


name,graduation_year,dense_rank
Diana,2023,1
Alice,2022,2
Hannah,2021,3
Ian,2020,4
Frank,2020,4
Grace,2019,5


### 3. List students and rank them by age within each major.

In [27]:
%%sql
SELECT name, age, major, RANK() OVER(PARTITION BY major ORDER BY age ASC)
FROM students;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


name,age,major,rank
Charlie,19,Chemistry,1
Alice,20,Computer Science,1
Diana,21,Computer Science,2
Bob,22,Economics,1
Mark,25,History,1
Eve,18,,1


### 4. Assign a rank, a dense rank and a sequential row number to all alumni ordered by graduation year. Tell the difference

In [35]:
%%sql
SELECT name, graduation_year, 
RANK() OVER (ORDER BY graduation_year) AS rank,
DENSE_RANK() OVER (ORDER BY graduation_year) AS dense_rank,
ROW_NUMBER() OVER (ORDER BY graduation_year) AS seq_row_number
FROM alumni;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


name,graduation_year,rank,dense_rank,seq_row_number
Grace,2019,1,1,1
Ian,2020,2,2,2
Frank,2020,2,2,3
Hannah,2021,4,3,4
Alice,2022,5,4,5
Diana,2023,6,5,6


1. Rank() assigns a rank to each row, if there are some with same value, it labels them with the same value and skips the correspondent same amount of number until next label.
2. Dense_rank() assigns a rank to each row without gaps in ranking values.
3. Row_number() assigns a unique row number to each row.

### 5. Show all students ordered by age and calculate a running total of their ages.



In [38]:
%%sql
SELECT name, age, SUM(age) OVER(ORDER BY age) AS running_total_age
FROM students;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


name,age,running_total_age
Eve,18,18
Charlie,19,37
Alice,20,57
Diana,21,78
Bob,22,100
Mark,25,125


### 6. Show each student’s age together with the average age of the previous and next student (a 3-row moving average). The calculate the avg number of that column.

In [48]:
%%sql
    SELECT name, age, 
    LEAD(age) OVER(ORDER BY age) - LAG(age) OVER(ORDER BY age) AS avg_ages
FROM students

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


name,age,avg_ages
Eve,18,
Charlie,19,2.0
Alice,20,2.0
Diana,21,2.0
Bob,22,4.0
Mark,25,


In [58]:
%%sql
WITH diff AS(
    SELECT name, age, 
    LEAD(age) OVER(ORDER BY age) - LAG(age) OVER(ORDER BY age) AS avg_ages
FROM students)

SELECT  ROUND(AVG(avg_ages)) AS total_average
FROM diff;

 * postgresql://user:***@localhost:5432/postgres
1 rows affected.


total_average
3


### 7. List all students with their percentile rank based on age.

In [59]:
%%sql
SELECT name, PERCENT_RANK() OVER (ORDER BY age)
FROM students;

 * postgresql://user:***@localhost:5432/postgres
6 rows affected.


name,percent_rank
Eve,0.0
Charlie,0.2
Alice,0.4
Diana,0.6
Bob,0.8
Mark,1.0


### 8. Create a table with students and alumni names, majors and ages (alumni age is current age - 2000). For each major, show the first and last student name ordered by age. 

Hint: Use PARTITION BY major with FIRST_VALUE and LAST_VALUE.

In [70]:
%%sql

    SELECT name, major, age
    FROM students
    UNION ALL
    SELECT name, major, graduation_year -2000 AS age
    FROM alumni
    ORDER BY major;

 * postgresql://user:***@localhost:5432/postgres
12 rows affected.


name,major,age
Grace,Arts,19
Ian,Biology,20
Diana,Biology,23
Charlie,Chemistry,19
Alice,Civil Engineering,22
Alice,Computer Science,20
Diana,Computer Science,21
Bob,Economics,22
Hannah,History,21
Mark,History,25


In [89]:
%%sql
-- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING to define LAST_VALUE properly, otherwise
-- it goes until the current row
WITH summary AS (
    SELECT name, major, age
    FROM students
    UNION ALL
    SELECT name, major, graduation_year -2000 AS age
    FROM alumni
)   
SELECT
    DISTINCT major,
    name,
    age,
    FIRST_VALUE(name) OVER (PARTITION BY major ORDER BY age) AS first_student,
    LAST_VALUE(name) OVER (
        PARTITION BY major 
        ORDER BY age 
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_student
FROM summary
ORDER BY major, age;


 * postgresql://user:***@localhost:5432/postgres
12 rows affected.


major,name,age,first_student,last_student
Arts,Grace,19,Grace,Grace
Biology,Ian,20,Ian,Diana
Biology,Diana,23,Ian,Diana
Chemistry,Charlie,19,Charlie,Charlie
Civil Engineering,Alice,22,Alice,Alice
Computer Science,Alice,20,Alice,Diana
Computer Science,Diana,21,Alice,Diana
Economics,Bob,22,Bob,Bob
History,Hannah,21,Hannah,Mark
History,Mark,25,Hannah,Mark


### 9.Continue using the table formed by students + alumni. For each major, show the 2nd youngest student.



In [103]:
%%sql
WITH summary AS (
    SELECT name, major, age
    FROM students
    UNION ALL
    SELECT name, major, graduation_year -2000 AS age
    FROM alumni
)   
SELECT major, 
    NTH_VALUE(age, 2) OVER (PARTITION BY major ORDER BY age ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as second_youngest
FROM summary


 * postgresql://user:***@localhost:5432/postgres
12 rows affected.


major,second_youngest
Arts,
Biology,23.0
Biology,23.0
Chemistry,
Civil Engineering,
Computer Science,21.0
Computer Science,21.0
Economics,
History,25.0
History,25.0


### 10. For each student, show their name, major, and the total number of students in the same major (without grouping).

In [111]:

%%sql
WITH summary AS (
    SELECT name, major, age
    FROM students
    UNION ALL
    SELECT name, major, graduation_year -2000 AS age
    FROM alumni
)   
SELECT name, major, COUNT(*) OVER(PARTITION BY major) AS total_students_in_major
FROM summary
ORDER BY major, name;

 * postgresql://user:***@localhost:5432/postgres
12 rows affected.


name,major,total_students_in_major
Grace,Arts,1
Diana,Biology,2
Ian,Biology,2
Charlie,Chemistry,1
Alice,Civil Engineering,1
Alice,Computer Science,2
Diana,Computer Science,2
Bob,Economics,1
Hannah,History,2
Mark,History,2
