# **Setup**

In [1]:
# !pip install ipython-sql 
# !pip install psycopg2

In [1]:
import os
import pandas as pd 
from sqlalchemy import create_engine
from IPython.display import clear_output, display
import regex as re
import psycopg2

In [3]:
%lsmagic

Available line magics:
%alias  %alias_magic  %autoawait  %autocall  %automagic  %autosave  %bookmark  %cd  %clear  %cls  %code_wrap  %colors  %conda  %config  %connect_info  %copy  %ddir  %debug  %dhist  %dirs  %doctest_mode  %echo  %ed  %edit  %env  %gui  %hist  %history  %killbgscripts  %ldir  %less  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %macro  %magic  %mamba  %matplotlib  %micromamba  %mkdir  %more  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %pip  %popd  %pprint  %precision  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %ren  %rep  %rerun  %reset  %reset_selective  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%capture  %%cmd  %%code_wrap  %%debug  %%file  %%html  %%javascript  %%js  %%latex 

# **Connect Database**

In [2]:
# Database Connection Details
PGSQL_Server = 'localhost:5432'  # Use Your Server Name
PGSQL_Username = 'postgres'     # Use Your Username
PGSQL_Password = os.getenv('PGAdmin_Password') # Use Your Password
PGSQL_Database = 'olympic'

# Create Database Connection
connection_string = f'postgresql://{PGSQL_Username}:{PGSQL_Password}@{PGSQL_Server}/{PGSQL_Database}'

# Activate SQL Extension in Jupyter Notebook
%load_ext sql
%sql $connection_string

# Create Database Engine
Query_Engine = create_engine(connection_string)
Connection = Query_Engine.connect()

# **Check Extensions**

In [4]:
%%sql 

-- Installed Extensions
SELECT extname
FROM pg_extension;

 * postgresql://postgres:***@localhost:5432/olympic
1 rows affected.


extname
plpgsql


In [5]:
%%sql 

-- Available Extensions
SELECT name
FROM pg_available_extensions;

 * postgresql://postgres:***@localhost:5432/olympic
113 rows affected.


name
address_standardizer
address_standardizer_data_us
adminpack
amcheck
autoinc
bloom
bool_plperl
bool_plperlu
btree_gin
btree_gist


# **Data Import**

In [5]:
# Get List Of Files In Folder
Folder_Input = 'Input Data'
Files_List = os.listdir(Folder_Input)
Files_List = sorted(Files_List)

print('No. Of Files Found:',len(Files_List))
print(Files_List,'\n')

No. Of Files Found: 1
['summer.csv'] 



In [6]:
# Filter CSV Files
CSV_Files_List = [filename for filename in Files_List if '.csv' in filename.lower()]
# CSV_Files_List = [filename for filename in Files_List if re.findall(r'\.csv',filename,re.I)]

print('No. Of CSV Files:', len(CSV_Files_List))
print(CSV_Files_List,'\n')

No. Of CSV Files: 1
['summer.csv'] 



In [7]:
# # Load Data To SQL Database
# for filename in CSV_Files_List:
#     clear_output(wait=True)
#     print('Reading & Loading: ',filename)
#     df = pd.read_csv(f'{Folder_Input}/{filename}')
#     # display(df.head(10))
#     table_name = re.findall('(\w+)\.csv$',filename)[0]
#     df.to_sql(name=table_name,con=Connection,if_exists='replace',index=False)

Reading & Loading:  summer.csv


# **01 Window Function** 

## **Query 01**

* Use Window Function To Create Row Number Column

In [8]:
%%sql

SELECT "Year","Event","Country","Medal",
    ROW_NUMBER() OVER() AS "Row_N"
FROM summer
WHERE "Medal" = 'Gold'
LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olympic
10 rows affected.


Year,Event,Country,Medal,Row_N
1896,100M Freestyle,HUN,Gold,1
1896,100M Freestyle For Sailors,GRE,Gold,2
1896,1200M Freestyle,HUN,Gold,3
1896,400M Freestyle,AUT,Gold,4
1896,100M,USA,Gold,5
1896,110M Hurdles,USA,Gold,6
1896,1500M,AUS,Gold,7
1896,400M,USA,Gold,8
1896,800M,AUS,Gold,9
1896,Discus Throw,USA,Gold,10


## **Query 02**

* Use ORDER_BY() Clause In Window Function

In [9]:
%%sql

SELECT "Year","Event","Country","Medal",
    ROW_NUMBER() OVER(ORDER BY "Year" DESC) AS "Row_N"
FROM summer
WHERE "Medal" = 'Gold'
LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olympic
10 rows affected.


Year,Event,Country,Medal,Row_N
2012,10M Platform,USA,Gold,1
2012,Wg 84 KG,RUS,Gold,2
2012,Wg 74 KG,RUS,Gold,3
2012,Wg 66 KG,KOR,Gold,4
2012,Wg 60 KG,IRI,Gold,5
2012,Wg 55 KG,IRI,Gold,6
2012,Wg 120 KG,CUB,Gold,7
2012,Wf 96 KG,USA,Gold,8
2012,Wf 84 KG,AZE,Gold,9
2012,Wf 74 KG,USA,Gold,10


## **Query 03**
* Use ORDER BY Clause For Multiple Columns

In [10]:
%%sql

SELECT "Year","Event","Country","Medal",
    ROW_NUMBER() OVER(ORDER BY "Year" DESC, "Event" ASC) AS "Row_N"
FROM summer
WHERE "Medal" = 'Gold'
LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olympic
10 rows affected.


Year,Event,Country,Medal,Row_N
2012,- 48 KG,BRA,Gold,1
2012,- 49 KG,CHN,Gold,2
2012,- 58 KG,ESP,Gold,3
2012,- 60 KG,RUS,Gold,4
2012,-56KG,PRK,Gold,5
2012,+ 100KG,FRA,Gold,6
2012,+ 67 KG,SRB,Gold,7
2012,+ 78KG,CUB,Gold,8
2012,+ 80 KG,ITA,Gold,9
2012,+ 91KG,GBR,Gold,10


## **LAG Function**

In [11]:
%%sql 
SELECT * FROM summer
LIMIT 10

 * postgresql://postgres:***@localhost:5432/olympic
10 rows affected.


Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
1896,Athens,Aquatics,Swimming,HAJOS Alfred,HUN,Men,100M Freestyle,Gold
1896,Athens,Aquatics,Swimming,HERSCHMANN Otto,AUT,Men,100M Freestyle,Silver
1896,Athens,Aquatics,Swimming,DRIVAS Dimitrios,GRE,Men,100M Freestyle For Sailors,Bronze
1896,Athens,Aquatics,Swimming,MALOKINIS Ioannis,GRE,Men,100M Freestyle For Sailors,Gold
1896,Athens,Aquatics,Swimming,CHASAPIS Spiridon,GRE,Men,100M Freestyle For Sailors,Silver
1896,Athens,Aquatics,Swimming,CHOROPHAS Efstathios,GRE,Men,1200M Freestyle,Bronze
1896,Athens,Aquatics,Swimming,HAJOS Alfred,HUN,Men,1200M Freestyle,Gold
1896,Athens,Aquatics,Swimming,ANDREOU Joannis,GRE,Men,1200M Freestyle,Silver
1896,Athens,Aquatics,Swimming,CHOROPHAS Efstathios,GRE,Men,400M Freestyle,Bronze
1896,Athens,Aquatics,Swimming,NEUMANN Paul,AUT,Men,400M Freestyle,Gold


In [12]:
%%sql

-- Compare Regining Champion
SELECT "Year","Country" AS "Champion"
FROM summer
WHERE "Gender" = 'Men'
    AND "Medal" = 'Gold'
    AND "Event" = 'Discus Throw'

 * postgresql://postgres:***@localhost:5432/olympic
27 rows affected.


Year,Champion
1896,USA
1900,HUN
1904,USA
1908,USA
1912,FIN
1920,FIN
1924,USA
1928,USA
1932,USA
1936,USA


In [13]:
%%sql

-- Create CTE 
WITH discus_gold AS (
    SELECT "Year","Country" AS "Champion"
    FROM summer
    WHERE "Gender" = 'Men'
        AND "Medal" = 'Gold'
        AND "Event" = 'Discus Throw'
)

-- Get Current Champion & Compare To CTE Table
SELECT "Year","Champion",
    LAG("Champion",1) OVER (ORDER BY "Year" ASC) AS "Previous Champion"
FROM discus_gold
ORDER BY "Year" ASC

 * postgresql://postgres:***@localhost:5432/olympic
27 rows affected.


Year,Champion,Previous Champion
1896,USA,
1900,HUN,USA
1904,USA,HUN
1908,USA,USA
1912,FIN,USA
1920,FIN,FIN
1924,USA,FIN
1928,USA,USA
1932,USA,USA
1936,USA,USA


## **PARTITION BY**

In [15]:
%%sql

-- CTE Table
WITH Tennis_Gold AS (
  SELECT DISTINCT
    "Gender", "Year", "Country"
  FROM summer
  WHERE
    "Year" >= 2000 AND
    "Event" = 'Javelin Throw' AND
    "Medal" = 'Gold')

SELECT * FROM Tennis_Gold

 * postgresql://postgres:***@localhost:5432/olympic
8 rows affected.


Gender,Year,Country
Men,2000,CZE
Men,2004,NOR
Men,2008,NOR
Men,2012,TTO
Women,2000,NOR
Women,2004,CUB
Women,2008,CZE
Women,2012,CZE


In [22]:
%%sql 

-- CTE Table
WITH Tennis_Gold AS (
  SELECT DISTINCT
    "Gender", "Year", "Country"
  FROM summer
  WHERE
    "Year" >= 2000 AND
    "Event" = 'Javelin Throw' AND
    "Medal" = 'Gold')

-- Use LAG To Get Previous Year Champion & Partition By Gender
SELECT
  "Gender", "Year",
  "Country" AS "Champion",
  -- Fetch the previous "year"'s champion by "gender"
  LAG("Country") OVER (PARTITION BY "Gender"
            ORDER BY "Year" ASC) AS Last_Champion
FROM Tennis_Gold
ORDER BY "Gender" ASC, "Year" ASC;

 * postgresql://postgres:***@localhost:5432/olympic
8 rows affected.


Gender,Year,Champion,last_champion
Men,2000,CZE,
Men,2004,NOR,CZE
Men,2008,NOR,NOR
Men,2012,TTO,NOR
Women,2000,NOR,
Women,2004,CUB,NOR
Women,2008,CZE,CUB
Women,2012,CZE,CZE


# **02 Fetch, Rank & Page**

# **03 Aggregate Window Function**

In [8]:
%%sql

WITH Brazil_Medals AS (
    SELECT
    "Year", COUNT(*) AS Medals
    FROM summer
    WHERE
    "Country" = 'BRA'
    AND "Medal" = 'Gold'
    AND "Year" >= 1992
    GROUP BY "Year"
    ORDER BY "Year" ASC)

SELECT * FROM Brazil_Medals;

 * postgresql://postgres:***@localhost:5432/olympic
5 rows affected.


Year,medals
1992,13
1996,5
2004,18
2008,14
2012,14


## **MAX Aggregate**

In [14]:
%%sql 

''' 
Return the year, country, medals, and the maximum medals earned so far for each country, 
ordered by year in ascending order. 
''' 

WITH Country_Medals AS (
  SELECT
    "Year", "Country", COUNT(*) AS "Medals"
  FROM summer
  WHERE
    "Country" IN ('CHN', 'KOR', 'JPN')
    AND "Medal" = 'Gold' AND "Year" >= 2000
  GROUP BY "Year", "Country")

SELECT
  -- Return the max medals earned so far per "country"
  "Year",
  "Country",
  "Medals",
  MAX("Medals") OVER (PARTITION BY "Country"
                ORDER BY "Year" ASC) AS Max_Medals
FROM Country_Medals
ORDER BY "Country" ASC, "Year" ASC;

 * postgresql://postgres:***@localhost:5432/olympic
12 rows affected.


Year,Country,Medals,max_medals
2000,CHN,39,39
2004,CHN,52,52
2008,CHN,74,74
2012,CHN,56,74
2000,JPN,5,5
2004,JPN,21,21
2008,JPN,23,23
2012,JPN,7,23
2000,KOR,12,12
2004,KOR,14,14


## **Moving AVG**