In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import sqlite3

conn = sqlite3.connect('example.db')
def sql(query: str) -> pd.DataFrame:
    return pd.read_sql_query(query, conn)

In [None]:
-- Pull column_name & data_type from the columns table
SELECT 
	column_name, 
    data_type
FROM information_schema.columns
-- Filter for the table 'country_stats'
WHERE table_name = 'country_stats';

In [6]:
sql("""-- Comment out the previous query
SELECT AVG(CAST(population AS float)) AS avg_population
FROM country_stats;

""")

Unnamed: 0,avg_population
0,33935340.0


In [5]:
sql("""-- Uncomment the following block & run the query
SELECT 
	s.country_id, 
    COUNT(DISTINCT s.athlete_id) AS summer_athletes, 
    COUNT(DISTINCT w.athlete_id) AS winter_athletes
FROM summer_games AS s
JOIN winter_games AS w
-- Fix the error by making both columns integers
ON s.country_id = cast(w.country_id as int)
GROUP BY s.country_id;""")

Unnamed: 0,country_id,summer_athletes,winter_athletes
0,2,4,1
1,5,2,5
2,8,20,7
3,9,9,4
4,11,99,11
...,...,...,...
73,195,185,43
74,196,5,1
75,198,20,2
76,199,20,1


In [None]:
SELECT 
    year, -- Convert year to integer 
    -- Pull decade using DATE_PART and DATE_TRUNC
    DATE_PART('decade', CAST(year AS date)) AS decade,
    DATE_TRUNC('decade', CAST(year AS date)) AS decade_truncated,
    -- Compute world GDP by summing gdp
    SUM(gdp) AS world_gdp
FROM country_stats
GROUP BY 1 -- Group by year
ORDER BY 1 DESC; -- Order by year in descending order

In [8]:
sql("""SELECT 
    year, -- Convert year to integer 
    -- Pull decade using strftime
    CAST((strftime('%Y', CAST(year AS date)) / 10) AS INTEGER) * 10 AS decade,
    -- Compute world GDP by summing gdp
    SUM(gdp) AS world_gdp
FROM country_stats
GROUP BY 1 -- Group by year
ORDER BY 1 DESC; -- Order by year in descending order""")

Unnamed: 0,year,decade,world_gdp
0,2016-01-01,-4700,74350390000000.0
1,2015-01-01,-4700,73358270000000.0
2,2014-01-01,-4700,77976300000000.0
3,2013-01-01,-4700,75917400000000.0
4,2012-01-01,-4700,73872630000000.0
5,2011-01-01,-4700,72288370000000.0
6,2010-01-01,-4700,65085410000000.0
7,2009-01-01,-4700,59377300000000.0
8,2008-01-01,-4700,62580540000000.0
9,2007-01-01,-4700,57119690000000.0


In [10]:
-- Output all characters starting with position 7
SELECT 
	country, 
    SUBSTR(country, 7)  AS country_altered,
    left(country,3) as country_altered1,
    right(country,3) as country_altered2,
    upper(country) as country_altered3
FROM countries
GROUP BY country;

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 4)

In [12]:
sql("""-- Output all characters starting with position 7
SELECT 
	country, 
    SUBSTR(country, 7)  AS country_altered,
    SUBSTR(country, 1, 3) as country_altered1,
    SUBSTR(country, -3) as country_altered2,
    upper(country) as country_altered3
FROM countries
GROUP BY country;""")

Unnamed: 0,country,country_altered,country_altered1,country_altered2,country_altered3
0,AFG - Afghanistan,Afghanistan,AFG,tan,AFG - AFGHANISTAN
1,ALB - Albania,Albania,ALB,nia,ALB - ALBANIA
2,ALG - Algeria,Algeria,ALG,ria,ALG - ALGERIA
3,AND - Andorra,Andorra,AND,rra,AND - ANDORRA
4,ANG - Angola,Angola,ANG,ola,ANG - ANGOLA
...,...,...,...,...,...
198,VIE - Vietnam,Vietnam,VIE,nam,VIE - VIETNAM
199,VIN - Saint Vincent and the Grenadines,Saint Vincent and the Grenadines,VIN,nes,VIN - SAINT VINCENT AND THE GRENADINES
200,YEM - Yemen,Yemen,YEM,men,YEM - YEMEN
201,ZAM - Zambia,Zambia,ZAM,bia,ZAM - ZAMBIA


In [18]:
sql("""SELECT 
	region, 
    -- Replace all '&' characters with the string 'and'
    REPLACE(region,'&','and') AS character_swap,
    -- Remove all periods
    REPLACE(region,'.','') AS character_remove,
    -- Combine the functions to run both changes at once
    REPLACE(REPLACE(region, '.', ''), '&', 'and')  AS character_swap_and_remove
FROM countries
WHERE region like '%LATIN AMER. & CARIB%'
GROUP BY region;""")

Unnamed: 0,region,character_swap,character_remove,character_swap_and_remove
0,LATIN AMER. & CARIB,LATIN AMER. and CARIB,LATIN AMER & CARIB,LATIN AMER and CARIB


In [20]:
sql("""-- Pull event and unique athletes from summer_games_messy 
SELECT 
	event, 
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games
-- Group by the non-aggregated field
GROUP BY event;""")

Unnamed: 0,event,athletes
0,Gymnastics Men's Floor Exercise,72
1,Gymnastics Men's Horizontal Bar,71
2,Gymnastics Men's Horse Vault,17
3,Gymnastics Men's Individual All-Around,50
4,Gymnastics Men's Parallel Bars,67
...,...,...
90,Women's Long Jump,38
91,Women's Marathon,156
92,Women's Pole Vault,36
93,Women's Shot Put,36


In [23]:
sql("""-- Pull event and unique athletes from summer_games_messy 
SELECT
    -- Remove dashes from all event values
	REPLACE(TRIM(event),'-',' ') AS event_fixed, 
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games
-- Update the group by accordingly
GROUP BY event_fixed;""")

Unnamed: 0,event_fixed,athletes
0,Gymnastics Men's Floor Exercise,72
1,Gymnastics Men's Horizontal Bar,71
2,Gymnastics Men's Horse Vault,17
3,Gymnastics Men's Individual All Around,50
4,Gymnastics Men's Parallel Bars,67
...,...,...
90,Women's Long Jump,38
91,Women's Marathon,156
92,Women's Pole Vault,36
93,Women's Shot Put,36


In [25]:
sql("""-- Show total gold_medals by country
SELECT 
	country, 
    SUM(gold) AS gold_medals
FROM winter_games AS w
JOIN countries AS c
ON w.country_id = c.id
GROUP BY country
-- Order by gold_medals in descending order
ORDER BY gold_medals DESC;""")

Unnamed: 0,country,gold_medals
0,NOR - Norway,13.0
1,SWE - Sweden,8.0
2,RUS - Russia,5.0
3,UKR - Ukraine,4.0
4,SUI - Switzerland,4.0
...,...,...
73,AUS - Australia,
74,ARM - Armenia,
75,ARG - Argentina,
76,AND - Andorra,


In [27]:
sql("""-- Show total gold_medals by country
SELECT 
	country, 
    SUM(gold) AS gold_medals
FROM winter_games AS w
JOIN countries AS c
ON w.country_id = c.id
-- Comment out the WHERE statement
--WHERE gold IS NOT NULL
GROUP BY country
-- Replace WHERE statement with equivalent HAVING statement
HAVING SUM(gold) IS NOT NULL
-- Order by gold_medals in descending order
ORDER BY gold_medals DESC;""")

Unnamed: 0,country,gold_medals
0,NOR - Norway,13.0
1,SWE - Sweden,8.0
2,RUS - Russia,5.0
3,UKR - Ukraine,4.0
4,SUI - Switzerland,4.0
5,BLR - Belarus,3.0
6,AUT - Austria,3.0
7,USA - United States,2.0
8,SLO - Slovenia,2.0
9,FRA - France,2.0


In [28]:
sql("""-- Pull events and golds by athlete_id for summer events
-- Pull events and golds by athlete_id for summer events
SELECT 
    athlete_id, 
    -- Replace all null gold values with 0
    AVG(COALESCE(gold,0)) AS avg_golds,
    COUNT(event) AS total_events, 
    SUM(gold) AS gold_medals
FROM summer_games
GROUP BY athlete_id
-- Order by total_events descending and athlete_id ascending
ORDER BY total_events DESC, athlete_id;""")

Unnamed: 0,athlete_id,avg_golds,total_events,gold_medals
0,84138,0.000,8,
1,105918,0.000,8,
2,126096,0.125,8,1.0
3,5948,0.000,7,
4,8114,0.000,7,
...,...,...,...,...
3402,135312,0.000,1,
3403,135318,0.000,1,
3404,135410,0.000,1,
3405,135431,0.000,1,


In [30]:
sql("""SELECT SUM(gold) AS gold_medals
FROM winter_games;""")

Unnamed: 0,gold_medals
0,51.0


In [29]:
sql("""-- Comment out the query after noting the gold medal count
/*SELECT SUM(gold) AS gold_medals
FROM winter_games;*/
-- TOTAL GOLD MEDALS: 47 

-- Calculate the total gold_medals in your query
SELECT sum(gold_medals)
FROM
	(SELECT 
        w.country_id, 
     	SUM(gold) AS gold_medals, 
        AVG(gdp) AS avg_gdp
    FROM winter_games AS w
    JOIN country_stats AS c
    ON c.country_id = w.country_id
    -- Alias your query as subquery
    GROUP BY w.country_id) AS subquery;""")

Unnamed: 0,sum(gold_medals)
0,867.0


In [31]:
sql("""SELECT SUM(gold_medals) AS gold_medals
FROM
	(SELECT 
     	w.country_id, 
     	SUM(gold) AS gold_medals, 
     	AVG(gdp) AS avg_gdp
    FROM winter_games AS w
    JOIN country_stats AS c
    -- Update the subquery to join on a second field
    ON c.country_id = w.country_id AND w.year = CAST(c.year AS date)
    GROUP BY w.country_id) AS subquery;""")

Unnamed: 0,gold_medals
0,


In [34]:
sql("""SELECT 
	c.country,
    -- Pull in pop_in_millions and medals_per_million 
	population,
    -- Add the three medal fields using one sum function
	SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) AS medals,
	SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) / CAST(cs.population AS float) AS medals_per_million
FROM summer_games AS s
JOIN countries AS c 
ON s.country_id = c.id
-- Add a join
JOIN country_stats AS cs 
ON s.country_id = cs.country_id
GROUP BY c.country, population
ORDER BY medals DESC;""")

Unnamed: 0,country,population,medals,medals_per_million
0,USA - United States,282162411.0,133.0,4.713597e-07
1,USA - United States,284968955.0,133.0,4.667175e-07
2,USA - United States,287625193.0,133.0,4.624073e-07
3,USA - United States,290107933.0,133.0,4.584501e-07
4,USA - United States,292805298.0,133.0,4.542268e-07
...,...,...,...,...
3358,ZIM - Zimbabwe,14710826.0,0.0,0.000000e+00
3359,ZIM - Zimbabwe,15054506.0,0.0,0.000000e+00
3360,ZIM - Zimbabwe,15411675.0,0.0,0.000000e+00
3361,ZIM - Zimbabwe,15777451.0,0.0,0.000000e+00


In [50]:
sql("""SELECT 
-- Clean the country field to only show country_code
SUBSTR(REPLACE(UPPER(TRIM(c.country)), '.', ''), 1, 3) AS country_code,
-- Pull in population and medals_per_million 
cs.population,
-- Add the three medal fields using one sum function
SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) AS medals,
SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) / CAST(cs.population AS float) AS medals_per_million
FROM summer_games AS s
JOIN countries AS c 
ON s.country_id = c.id
JOIN country_stats AS cs 
ON c.id = cs.country_id AND CAST(s.year AS date) = CAST(cs.year AS date)
GROUP BY country_code, cs.population
-- Keep only the top 25 medals_per_million rows
ORDER BY medals_per_million DESC
LIMIT 25
""")

Unnamed: 0,country_code,population,medals,medals_per_million
0,BAH,391232.0,6.0,1.533617e-05
1,JAM,2881355.0,30.0,1.041177e-05
2,GRN,107317.0,1.0,9.318188e-06
3,AUS,24210809.0,34.0,1.404331e-06
4,BRN,1425171.0,2.0,1.40334e-06
5,DEN,5728010.0,6.0,1.047484e-06
6,NZL,4693200.0,4.0,8.522969e-07
7,HUN,9814023.0,8.0,8.151601e-07
8,TTO,1364962.0,1.0,7.326211e-07
9,CRO,4174349.0,3.0,7.186749e-07
