In [117]:
# Import Dependencies
import pandas as pd
import os as os
import numpy as np

### Athletes Data

In [118]:
# Open Athletes Data File
athletes_file_to_load = os.path.join("athletes.csv")
athletes_df = pd.read_csv(athletes_file_to_load)
athletes_df

Unnamed: 0,Name,Sex,NOC,Year,Sport,Event,Medal
0,Nikolaos Andriakopoulos,M,GRE,1896,Gymnastics,Gymnastics Men's Rope Climbing,Gold
1,Conrad Helmut Fritz Bcker,M,GER,1896,Gymnastics,"Gymnastics Men's Parallel Bars, Teams",Gold
2,Conrad Helmut Fritz Bcker,M,GER,1896,Gymnastics,"Gymnastics Men's Horizontal Bar, Teams",Gold
3,John Mary Pius Boland,M,GBR,1896,Tennis,Tennis Men's Singles,Gold
4,John Mary Pius Boland,M,GBR,1896,Tennis,Tennis Men's Doubles,Gold
...,...,...,...,...,...,...,...
12235,PIJETLOVIC Dusko,M,SRB,2020,Water Polo,Men,Gold
12236,PIJETLOVIC Gojko,M,SRB,2020,Water Polo,Men,Gold
12237,PRLAINOVIC Andrija,M,SRB,2020,Water Polo,Men,Gold
12238,RANDELOVIC Sava,M,SRB,2020,Water Polo,Men,Gold


In [119]:
# Rename Headers
athletes_df = athletes_df.rename(columns={"NOC": "Code"})

In [120]:
# Group By Year and Country
athletes = athletes_df.groupby(["Year", "Code"]).size().reset_index(name="Medals")

In [121]:
# New Dataframe
athlete_df = pd.DataFrame(athletes)
athlete_df

Unnamed: 0,Year,Code,Medals
0,1896,AUS,2
1,1896,AUT,2
2,1896,DEN,1
3,1896,FRA,5
4,1896,GBR,3
...,...,...,...
901,2020,UGA,2
902,2020,UKR,1
903,2020,USA,112
904,2020,UZB,3


In [122]:
# Rename Headers
athlete_df = athlete_df.rename(columns={"Code": "Country Code"})
athlete_df

Unnamed: 0,Year,Country Code,Medals
0,1896,AUS,2
1,1896,AUT,2
2,1896,DEN,1
3,1896,FRA,5
4,1896,GBR,3
...,...,...,...
901,2020,UGA,2
902,2020,UKR,1
903,2020,USA,112
904,2020,UZB,3


In [123]:
athlete_df.loc['1960-01-01':'2021-02-01']

Unnamed: 0,Year,Country Code,Medals
197,1948,HUN,15
198,1948,IND,20
199,1948,ITA,21
200,1948,JAM,1
201,1948,MEX,4
202,1948,NED,8


### Country Data

In [124]:
# Open Country Data File
country_file_to_load = os.path.join("country.csv")
country_df = pd.read_csv(country_file_to_load)

In [125]:
# Reorder Columns
country_df = country_df.reindex(['Year', 'Country Code','Country Name','GDP','Population'], axis=1)
country_df

Unnamed: 0,Year,Country Code,Country Name,GDP,Population
0,1960,ABW,Aruba,,54211.0
1,1960,AFE,Africa Eastern and Southern,1.929193e+10,
2,1960,AFG,Afghanistan,5.377778e+08,8996973.0
3,1960,AFW,Africa Western and Central,1.040732e+10,
4,1960,AGO,Angola,,5454933.0
...,...,...,...,...,...
16221,2020,XKX,Kosovo,7.611402e+09,
16222,2020,YEM,"Yemen, Rep.",,
16223,2020,ZAF,South Africa,3.019236e+11,59308690.0
16224,2020,ZMB,Zambia,1.932005e+10,18383955.0


### New Dataframe: Medals Data

In [126]:
# Merge DataFrame
medals_df = pd.merge(country_df,athlete_df, on=['Year','Country Code'], how="left")

In [127]:
# Replace NaN with 0
medals_df['Medals'] = medals_df['Medals'].replace(np.nan, 0)
medals_df

Unnamed: 0,Year,Country Code,Country Name,GDP,Population,Medals
0,1960,ABW,Aruba,,54211.0,0.0
1,1960,AFE,Africa Eastern and Southern,1.929193e+10,,0.0
2,1960,AFG,Afghanistan,5.377778e+08,8996973.0,0.0
3,1960,AFW,Africa Western and Central,1.040732e+10,,0.0
4,1960,AGO,Angola,,5454933.0,0.0
...,...,...,...,...,...,...
16221,2020,XKX,Kosovo,7.611402e+09,,0.0
16222,2020,YEM,"Yemen, Rep.",,,0.0
16223,2020,ZAF,South Africa,3.019236e+11,59308690.0,0.0
16224,2020,ZMB,Zambia,1.932005e+10,18383955.0,0.0


In [128]:
# Export CSV
medals_df.to_csv('medals.csv', index=False)