In [3]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import sqlite3

conn = sqlite3.connect('example.db')
def sql(query: str) -> pd.DataFrame:
    return pd.read_sql_query(query, conn)

In [4]:
sql("""-- Pull athlete_name and gold_medals for summer games

SELECT  
    a.name AS athlete_name, 
    sum(gold) AS gold_medals
FROM summer_games AS s
JOIN athletes AS a
ON s.athlete_id = a.id
GROUP BY a.name
HAVING sum(gold) >= 3
ORDER BY sum(gold) DESC;""")

Unnamed: 0,athlete_name,gold_medals
0,"Michael Fred Phelps, II",5.0
1,Simone Arianne Biles,4.0
2,"Kathleen Genevieve ""Katie"" Ledecky",4.0
3,Usain St. Leo Bolt,3.0
4,Ryan Murphy,3.0
5,Katinka Hossz,3.0


In [5]:
sql("""-- Query season, country, and events for all summer events
SELECT 
	'summer' AS season, 
    country, 
    COUNT(DISTINCT event) AS events
FROM summer_games AS s
JOIN countries AS c
ON s.country_id = c.id
GROUP BY country
-- Combine the queries
UNION ALL
-- Query season, country, and events for all winter events
SELECT 
	'winter' AS season, 
    country, 
    COUNT(DISTINCT event) AS events
FROM winter_games AS w
JOIN countries AS c
ON w.country_id = c.id
GROUP BY country
-- Sort the results to show most events at the top
ORDER BY events DESC;""")

Unnamed: 0,season,country,events
0,summer,USA - United States,94
1,summer,BRA - Brazil,79
2,summer,GBR - Great Britain,78
3,summer,GER - Germany,77
4,summer,JPN - Japan,74
...,...,...,...
276,winter,NEP - Nepal,1
277,winter,MEX - Mexico,1
278,winter,LUX - Luxembourg,1
279,winter,DMA - Dominica,1


In [6]:
sql("""-- Add outer layer to pull season, country and unique events
SELECT 
	season, 
    country, 
    COUNT(DISTINCT event) AS events
FROM
    -- Pull season, country_id, and event for both seasons
    (SELECT 
     	'summer' AS season, 
     	country_id, 
     	event
    FROM summer_games
    UNION ALL
    SELECT 
     	'winter' AS season, 
     	country_id, 
     	event
    FROM winter_games) AS subquery
JOIN countries AS c
ON subquery.country_id = c.id
-- Group by any unaggregated fields
GROUP BY season, country
-- Order to show most events at the top
ORDER BY events DESC;""")

Unnamed: 0,season,country,events
0,summer,USA - United States,94
1,summer,BRA - Brazil,79
2,summer,GBR - Great Britain,78
3,summer,GER - Germany,77
4,summer,JPN - Japan,74
...,...,...,...
276,winter,NEP - Nepal,1
277,winter,PAK - Pakistan,1
278,winter,TJK - Tajikistan,1
279,winter,TLS - Timor Leste,1


In [7]:
sql("""SELECT 
	name,
	CASE 
		WHEN height >= 175 AND gender = 'F' THEN 'Tall Female'
		WHEN height >= 190 AND gender = 'M' THEN 'Tall Male'
		ELSE 'Other'
	END AS segment
FROM athletes;""")

Unnamed: 0,name,segment
0,Nstor Abad Sanjun,Other
1,Antonio Abadia Beci,Other
2,Abubakar Abbas Abbas,Other
3,Forough Abbasi,Other
4,Bashir Abdi,Other
...,...,...
4211,Vaida sinait,Other
4212,Stepan Olegovich Zuyev,Other
4213,Anastasiya Valeryevna Zuyeva-Fesikova,Tall Female
4214,Kristaps Zvejnieks,Other


In [None]:
-- Pull in sport, bmi_bucket, and athletes
SELECT 
	sport,
    -- Bucket BMI in three groups: <.25, .25-.30, and >.30	
    CASE WHEN 100*weight/height^2 <.25 THEN '<.25'
    WHEN 100*weight/height^2 <=.30 THEN '.25-.30'
    WHEN 100*weight/height^2 >.30 THEN '>.30' END AS bmi_bucket,
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games AS s
JOIN athletes AS a
ON s.athlete_id = a.id
-- GROUP BY non-aggregated fields
GROUP BY sport, bmi_bucket
-- Sort by sport and then by athletes in descending order
ORDER BY sport, athletes DESC;

In [18]:
sql("""-- Pull in sport, bmi_bucket, and athletes
SELECT 
	sport,
    -- Bucket BMI in three groups: <.25, .25-.30, and >.30	
    CASE WHEN 100*(weight/POWER(height, 2)) <.25 THEN '<.25'
    WHEN 100*(weight/POWER(height, 2)) <=.30 THEN '.25-.30'
    WHEN 100*(weight/POWER(height, 2)) >.30 THEN '>.30' 
    ELSE 'no weight recorded' END AS bmi_bucket,
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games AS s
JOIN athletes AS a
ON s.athlete_id = a.id
-- GROUP BY non-aggregated fields
GROUP BY sport, bmi_bucket
-- Sort by sport and then by athletes in descending order
ORDER BY sport, athletes DESC;""")

Unnamed: 0,sport,bmi_bucket,athletes
0,Gymnastics,<.25,190
1,Gymnastics,.25-.30,4
2,Gymnastics,no weight recorded,1
3,Gymnastics,>.30,1
4,Swimming,<.25,864
5,Swimming,.25-.30,46
6,Swimming,no weight recorded,32
7,Track and Field,<.25,1880
8,Track and Field,.25-.30,168
9,Track and Field,>.30,117


In [None]:
-- Query from last exercise shown below.  Comment it out.
/*SELECT 
	sport,
    CASE WHEN weight/height^2*100 <.25 THEN '<.25'
    WHEN weight/height^2*100 <=.30 THEN '.25-.30'
    WHEN weight/height^2*100 >.30 THEN '>.30' END AS bmi_bucket,
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games AS s
JOIN athletes AS a
ON s.athlete_id = a.id
GROUP BY sport, bmi_bucket
ORDER BY sport, athletes DESC;*/

-- Show height, weight, and bmi for all athletes
SELECT 
	height, 
    weight, 
    weight/height^2*100 AS bmi
FROM athletes
-- Filter for NULL bmi values
WHERE weight/height^2*100 IS NULL;

In [20]:
sql("""-- Show height, weight, and bmi for all athletes
SELECT 
	height, 
    weight, 
    100*(weight/POWER(height, 2)) AS bmi
FROM athletes
-- Filter for NULL bmi values
WHERE 100*(weight/POWER(height, 2)) IS NULL;""")

Unnamed: 0,height,weight,bmi
0,,,
1,,,
2,,,
3,,,
4,182.0,,
...,...,...,...
162,,,
163,,66.0,
164,,,
165,163.0,,


In [21]:
sql("""-- Pull summer bronze_medals, silver_medals, and gold_medals
SELECT 
    SUM(s.bronze) AS bronze_medals, 
    SUM(s.silver) AS silver_medals, 
    SUM(s.gold) AS gold_medals
FROM summer_games AS s
JOIN athletes AS a
ON s.athlete_id = a.id
-- Filter for athletes age 16 or below
WHERE a.age <= 16;""")

Unnamed: 0,bronze_medals,silver_medals,gold_medals
0,8.0,3.0,2.0


In [22]:
sql("""-- Pull summer bronze_medals, silver_medals, and gold_medals
SELECT 
	sum(bronze) AS bronze_medals, 
    sum(silver) AS silver_medals, 
    sum(gold) AS gold_medals
FROM summer_games
-- Add the WHERE statement below
WHERE athlete_id IN
    -- Create subquery list for athlete_ids age 16 or below    
    (SELECT id
     FROM athletes
     WHERE age <= 16); """)

Unnamed: 0,bronze_medals,silver_medals,gold_medals
0,8.0,3.0,2.0


In [23]:
sql("""-- Pull event and unique athletes from summer_games 
SELECT 
    event,
    -- Add the gender field below
    CASE WHEN event LIKE '%Women%' THEN 'female' 
   	ELSE 'male' END AS gender,
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games
GROUP BY event;""")

Unnamed: 0,event,gender,athletes
0,Gymnastics Men's Floor Exercise,male,72
1,Gymnastics Men's Horizontal Bar,male,71
2,Gymnastics Men's Horse Vault,male,17
3,Gymnastics Men's Individual All-Around,male,50
4,Gymnastics Men's Parallel Bars,male,67
...,...,...,...
90,Women's Long Jump,female,38
91,Women's Marathon,female,156
92,Women's Pole Vault,female,36
93,Women's Shot Put,female,36


In [24]:
sql("""-- Pull event and unique athletes from summer_games 
SELECT 
    event,
    -- Add the gender field below
    CASE WHEN event LIKE '%Women%' THEN 'female' 
    ELSE 'male' END AS gender,
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games
-- Only include countries that won a nobel prize
WHERE country_id IN 
	(SELECT country_id
    FROM country_stats
    WHERE nobel_prize_winners>0)
GROUP BY event;""")

Unnamed: 0,event,gender,athletes
0,Gymnastics Men's Floor Exercise,male,37
1,Gymnastics Men's Horizontal Bar,male,37
2,Gymnastics Men's Horse Vault,male,6
3,Gymnastics Men's Individual All-Around,male,24
4,Gymnastics Men's Parallel Bars,male,34
...,...,...,...
90,Women's Long Jump,female,15
91,Women's Marathon,female,28
92,Women's Pole Vault,female,19
93,Women's Shot Put,female,12


In [25]:
sql("""-- Pull event and unique athletes from summer_games 
SELECT 
    event,
    -- Add the gender field below
    CASE WHEN event LIKE '%Women%' THEN 'female' 
    ELSE 'male' END AS gender,
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games
-- Only include countries that won a nobel prize
WHERE country_id IN 
	(SELECT country_id 
    FROM country_stats 
    WHERE nobel_prize_winners > 0)
GROUP BY event
-- Add the second query below and combine with a UNION
UNION
SELECT 
    event,
    -- Add the gender field below
    CASE WHEN event LIKE '%Women%' THEN 'female' 
    ELSE 'male' END AS gender,
    COUNT(DISTINCT athlete_id) AS athletes
FROM winter_games
-- Only include countries that won a nobel prize
WHERE country_id IN 
	(SELECT country_id 
    FROM country_stats 
    WHERE nobel_prize_winners > 0)
GROUP BY event
-- Order and limit the final output
ORDER BY athletes desc
LIMIT 10;""")

Unnamed: 0,event,gender,athletes
0,Swimming Women's 4 x 100 metres Medley Relay,female,56
1,Swimming Women's 4 x 100 metres Freestyle Relay,female,55
2,Swimming Women's 4 x 200 metres Freestyle Relay,female,52
3,Gymnastics Women's Team All-Around,female,50
4,Swimming Men's 4 x 200 metres Freestyle Relay,male,49
5,Gymnastics Men's Team All-Around,male,45
6,Swimming Men's 4 x 100 metres Medley Relay,male,45
7,Swimming Men's 4 x 100 metres Freestyle Relay,male,44
8,Gymnastics Women's Floor Exercise,female,43
9,Gymnastics Women's Uneven Bars,female,43
