# Analyze Olympics Using SQL

<img width="983" alt="Olympics_db" src="https://user-images.githubusercontent.com/50973416/59984266-349ace00-9663-11e9-985a-0739d6d3ceb4.png">


다음 KPI를 분석하겠다.
1. Number of events in each sports
2. Top athletes in nobel-prized countries
3. Countries with high medal rates
4. Most decorated athlete per region
5. Percent of gdp per country
6. GDP per capita performance index
7. Month-over-month comparison
8. Week-over-week comparison

### Number of events in each sports

In [1]:
sql = '''

SELECT 
    sport, 
    COUNT(DISTINCT event) AS events
FROM summer_games
GROUP BY sport

UNION

SELECT 
    sport, 
    COUNT(DISTINCT event) AS events
FROM winter_games
GROUP BY sport
-- Show the most events at the top of the report
ORDER BY events DESC;

'''

### Top athletes in nobel-prized countries

In [2]:
sql = '''


SELECT 
    event,
    
    CASE WHEN event LIKE '%Women%' THEN 'female' 
    ELSE 'male' END AS gender,
    COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games
WHERE country_id IN 
    (SELECT country_id 
    FROM country_stats 
    WHERE nobel_prize_winners > 0)
GROUP BY event

UNION

SELECT 
    event,
    CASE WHEN event LIKE '%Women%' THEN 'female' 
    ELSE 'male' END AS gender,
    COUNT(DISTINCT athlete_id) AS athletes
FROM winter_games
WHERE country_id IN 
    (SELECT country_id 
    FROM country_stats 
    WHERE nobel_prize_winners > 0)
GROUP BY event

ORDER BY athletes DESC
LIMIT 10;

'''

### Countries with high medal rates

In [3]:
sql = '''

SELECT 
    LEFT(REPLACE(UPPER(TRIM(c.country)), '.', ''), 3) AS country_code,
    pop_in_millions,
    SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) AS medals,
    SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) / CAST(cs.pop_in_millions AS float) AS medals_per_million
FROM summer_games AS s
JOIN countries AS c 
ON s.country_id = c.id
JOIN country_stats AS cs 
ON s.country_id = cs.country_id AND s.year = CAST(cs.year AS date)
WHERE cs.pop_in_millions IS NOT NULL
GROUP BY c.country, pop_in_millions
ORDER BY medals_per_million DESC
LIMIT 25;

'''

### Most decorated athlete per region

In [4]:
sql = '''

SELECT 
    region,
    athlete_name,
    total_golds
FROM
    (SELECT 
        region, 
        name AS athlete_name, 
        SUM(gold) AS total_golds,
        ROW_NUMBER() OVER (PARTITION BY region ORDER BY SUM(gold) DESC) AS row_num
    FROM summer_games_clean AS s
    JOIN athletes AS a
    ON a.id = s.athlete_id
    JOIN countries AS c
    ON s.country_id = c.id
    GROUP BY region, athlete_name) AS subquery
WHERE row_num = 1;

'''

### Percent of gdp per country

In [5]:
sql = '''

SELECT 
    region,
    country,
    SUM(gdp) AS country_gdp,
    SUM(SUM(gdp)) OVER () AS global_gdp,
    SUM(gdp) / SUM(SUM(gdp)) OVER () AS perc_global_gdp,
    SUM(gdp) / SUM(SUM(gdp)) OVER (PARTITION BY region) AS perc_region_gdp
FROM country_stats AS cs
JOIN countries AS c
ON cs.country_id = c.id
WHERE gdp IS NOT NULL
GROUP BY region, country
ORDER BY country_gdp DESC;

'''

### GDP per capita performance index

In [6]:
sql = '''

SELECT 
    region,
    country,
    SUM(gdp) / SUM(pop_in_millions) AS gdp_per_million,
    SUM(SUM(gdp)) OVER () / SUM(SUM(pop_in_millions)) OVER () AS gdp_per_million_total,
    (SUM(gdp) / SUM(pop_in_millions)) / (SUM(SUM(gdp)) OVER () / SUM(SUM(pop_in_millions)) OVER ()) AS performance_index
FROM country_stats_clean AS cs
JOIN countries AS c 
ON cs.country_id = c.id
WHERE year = '2016-01-01' AND gdp IS NOT NULL
GROUP BY region, country
ORDER BY gdp_per_million DESC;

'''

### Month-over-month comparison

In [7]:
sql = '''

SELECT
    DATE_PART('month', date) AS month,
    country_id,
    SUM(views) AS month_views,
    LAG(SUM(views)) OVER (PARTITION BY country_id ORDER BY DATE_PART('month', date)) AS previous_month_views,
    SUM(views) / LAG(SUM(views)) OVER (PARTITION BY country_id ORDER BY DATE_PART('month', date)) - 1 AS perc_change
FROM web_data
WHERE date <= '2018-05-31'
GROUP BY month, country_id;

'''

### Week-over-week comparison

In [8]:
sql = '''

SELECT 
    date,
    weekly_avg,
    LAG(weekly_avg,7) OVER (ORDER BY date) AS weekly_avg_previous,
    weekly_avg / LAG(weekly_avg,7) OVER (ORDER BY date) - 1 as perc_change
FROM
  (SELECT
      date,
      SUM(views) AS daily_views,
      -- Calculate the rolling 7 day average
      AVG(SUM(views)) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS weekly_avg
  FROM web_data
  GROUP BY date) AS subquery
ORDER BY date DESC;

'''