In [1]:
import pandas as pd

## Load and Clean Data

In [2]:
# Load data
athlete_df = pd.read_csv('athlete_events.csv')
athlete_df.head()

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,1,A Dijiang,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,Barcelona,Basketball,Basketball Men's Basketball,
1,2,A Lamusi,M,23.0,170.0,60.0,China,CHN,2012 Summer,2012,Summer,London,Judo,Judo Men's Extra-Lightweight,
2,3,Gunnar Nielsen Aaby,M,24.0,,,Denmark,DEN,1920 Summer,1920,Summer,Antwerpen,Football,Football Men's Football,
3,4,Edgar Lindenau Aabye,M,34.0,,,Denmark/Sweden,DEN,1900 Summer,1900,Summer,Paris,Tug-Of-War,Tug-Of-War Men's Tug-Of-War,Gold
4,5,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Netherlands,NED,1988 Winter,1988,Winter,Calgary,Speed Skating,Speed Skating Women's 500 metres,


In [3]:
# Filtering for Sex, Age, Height, Weight, Team, Sport, Event, Medal

nan_sports_df = athlete_df [['NOC','Name','Sex', 'Age', 'Height', 'Weight', 'Season','Year',
                        'Sport', 'Event', 'Medal']]
nan_sports_df.head()

Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
0,CHN,A Dijiang,M,24.0,180.0,80.0,Summer,1992,Basketball,Basketball Men's Basketball,
1,CHN,A Lamusi,M,23.0,170.0,60.0,Summer,2012,Judo,Judo Men's Extra-Lightweight,
2,DEN,Gunnar Nielsen Aaby,M,24.0,,,Summer,1920,Football,Football Men's Football,
3,DEN,Edgar Lindenau Aabye,M,34.0,,,Summer,1900,Tug-Of-War,Tug-Of-War Men's Tug-Of-War,Gold
4,NED,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Winter,1988,Speed Skating,Speed Skating Women's 500 metres,


In [4]:
# Checking to see count of datapoints
nan_sports_df.count()

NOC       271116
Name      271116
Sex       271116
Age       261642
Height    210945
Weight    208241
Season    271116
Year      271116
Sport     271116
Event     271116
Medal      39783
dtype: int64

In [5]:
# Dropping NaN
sports_df = nan_sports_df.dropna(how='any')
sports_df.count()

NOC       30181
Name      30181
Sex       30181
Age       30181
Height    30181
Weight    30181
Season    30181
Year      30181
Sport     30181
Event     30181
Medal     30181
dtype: int64

In [6]:
sports_df.head()

Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
40,FIN,Juhamatti Tapio Aaltonen,M,28.0,184.0,85.0,Winter,2014,Ice Hockey,Ice Hockey Men's Ice Hockey,Bronze
41,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Individual All-Around,Bronze
42,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Team All-Around,Gold
44,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Horse Vault,Gold
48,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Pommelled Horse,Gold


## Female Sports

### Gold

In [7]:
# Filtering for FEMALE, Age, Height, Weight, Team, Sport, Event, Medal (GOLD)

female_sports_df = sports_df.loc[(sports_df['Sex'] =='F') & (sports_df['Medal'] =='Gold'), :]
female_sports_df.head()



Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
1494,GBR,Nicola Virginia Adams,F,29.0,164.0,51.0,Summer,2012,Boxing,Boxing Women's Flyweight,Gold
1495,GBR,Nicola Virginia Adams,F,33.0,164.0,51.0,Summer,2016,Boxing,Boxing Women's Flyweight,Gold
1527,NZL,Valerie Kasanita Adams-Vili (-Price),F,23.0,193.0,120.0,Summer,2008,Athletics,Athletics Women's Shot Put,Gold
1528,NZL,Valerie Kasanita Adams-Vili (-Price),F,27.0,193.0,120.0,Summer,2012,Athletics,Athletics Women's Shot Put,Gold
1668,BRA,Adenzia Aparecida Ferreira da Silva,F,25.0,187.0,65.0,Summer,2012,Volleyball,Volleyball Women's Volleyball,Gold


In [8]:
# Saving Dataframe into a csv file
female_sports_df.to_csv('Female-Sports/female_athletes_gold.csv', index=False)

### Silver

In [9]:
# Filtering for FEMALE, Age, Height, Weight, Team, Sport, Event, Medal (SILVER)

female_sports_df = sports_df.loc[(sports_df['Sex'] =='F') & (sports_df['Medal'] =='Silver'), :]
female_sports_df.head()


Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
163,RUS,Mariya Vasilyevna Abakumova (-Tarabina),F,22.0,179.0,80.0,Summer,2008,Athletics,Athletics Women's Javelin Throw,Silver
195,RUS,Tamila Rashidovna Abasova,F,21.0,163.0,60.0,Summer,2004,Cycling,Cycling Women's Sprint,Silver
286,USA,Monica Cecilia Abbott,F,23.0,191.0,88.0,Summer,2008,Softball,Softball Women's Softball,Silver
311,USA,Nia Nicole Abdallah,F,20.0,175.0,56.0,Summer,2004,Taekwondo,Taekwondo Women's Featherweight,Silver
677,GDR,Irene Abel,F,19.0,160.0,48.0,Summer,1972,Gymnastics,Gymnastics Women's Team All-Around,Silver


In [10]:
# Saving Dataframe into a csv file
female_sports_df.to_csv('Female-Sports/female_athletes_silver.csv', index=False)

### Bronze

In [11]:
# Filtering for FEMALE, Age, Height, Weight, Team, Sport, Event, Medal (BRONZE)

female_sports_df = sports_df.loc[(sports_df['Sex'] =='F') & (sports_df['Medal'] =='Bronze'), :]
female_sports_df.head()


Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
105,NOR,Ann Kristin Aarnes,F,23.0,182.0,64.0,Summer,1996,Football,Football Women's Football,Bronze
161,AZE,Patimat Abakarova,F,21.0,165.0,49.0,Summer,2016,Taekwondo,Taekwondo Women's Flyweight,Bronze
476,CAN,Reema Abdo,F,21.0,173.0,59.0,Summer,1984,Swimming,Swimming Women's 4 x 100 metres Medley Relay,Bronze
684,CAN,Jennifer Abel,F,20.0,160.0,62.0,Summer,2012,Diving,Diving Women's Synchronized Springboard,Bronze
974,RUS,Yekaterina Konstantinovna Abramova,F,23.0,167.0,65.0,Winter,2006,Speed Skating,Speed Skating Women's Team Pursuit (6 laps),Bronze


In [12]:
# Saving Dataframe into a csv file
female_sports_df.to_csv('Female-Sports/female_athletes_bronze.csv', index=False)

## Female Sport Categories

In [13]:
# Finding all female sports categories 
unique_f_sports_df = female_sports_df['Sport']
unique_f_sports_df = pd.DataFrame(unique_f_sports_df)

# unique_f_sports_df.Sport.unique()

## Female Sport Events

In [14]:
# Finding all female sport events
unique_f_events_df = female_sports_df['Event']
unique_f_events_df = pd.DataFrame(unique_f_events_df)

# unique_f_events_df.Event.unique()

## Male Sports

### Gold

In [15]:
# Filtering for MALE, Age, Height, Weight, Team, Sport, Event, Medal (GOLD)

male_sports_df = sports_df.loc[(sports_df['Sex'] =='M') & (sports_df['Medal'] =='Gold'), :]
male_sports_df.head()

Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
42,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Team All-Around,Gold
44,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Horse Vault,Gold
48,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Pommelled Horse,Gold
60,NOR,Kjetil Andr Aamodt,M,20.0,176.0,85.0,Winter,1992,Alpine Skiing,Alpine Skiing Men's Super G,Gold
73,NOR,Kjetil Andr Aamodt,M,30.0,176.0,85.0,Winter,2002,Alpine Skiing,Alpine Skiing Men's Super G,Gold


In [16]:
# Saving Dataframe into a csv file
male_sports_df.to_csv('Male-Sports/male_athletes_gold.csv', index=False)

### Silver

In [17]:
# Filtering for MALE, Age, Height, Weight, Team, Sport, Event, Medal (SILVER)

male_sports_df = sports_df.loc[(sports_df['Sex'] =='M') & (sports_df['Medal'] =='Silver'), :]
male_sports_df.head()

Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
63,NOR,Kjetil Andr Aamodt,M,22.0,176.0,85.0,Winter,1994,Alpine Skiing,Alpine Skiing Men's Downhill,Silver
67,NOR,Kjetil Andr Aamodt,M,22.0,176.0,85.0,Winter,1994,Alpine Skiing,Alpine Skiing Men's Combined,Silver
92,NED,Pepijn Aardewijn,M,26.0,189.0,72.0,Summer,1996,Rowing,Rowing Men's Lightweight Double Sculls,Silver
175,FRA,Luc Abalo,M,31.0,182.0,86.0,Summer,2016,Handball,Handball Men's Handball,Silver
186,USA,Stephen Anthony Abas,M,26.0,165.0,55.0,Summer,2004,Wrestling,"Wrestling Men's Featherweight, Freestyle",Silver


In [18]:
# Saving Dataframe into a csv file
male_sports_df.to_csv('Male-Sports/male_athletes_silver.csv', index=False)

### Bronze

In [19]:
# Filtering for MALE, Age, Height, Weight, Team, Sport, Event, Medal (BRONZE)

male_sports_df = sports_df.loc[(sports_df['Sex'] =='M') & (sports_df['Medal'] =='Bronze'), :]
male_sports_df.head()

Unnamed: 0,NOC,Name,Sex,Age,Height,Weight,Season,Year,Sport,Event,Medal
40,FIN,Juhamatti Tapio Aaltonen,M,28.0,184.0,85.0,Winter,2014,Ice Hockey,Ice Hockey Men's Ice Hockey,Bronze
41,FIN,Paavo Johannes Aaltonen,M,28.0,175.0,64.0,Summer,1948,Gymnastics,Gymnastics Men's Individual All-Around,Bronze
50,FIN,Paavo Johannes Aaltonen,M,32.0,175.0,64.0,Summer,1952,Gymnastics,Gymnastics Men's Team All-Around,Bronze
61,NOR,Kjetil Andr Aamodt,M,20.0,176.0,85.0,Winter,1992,Alpine Skiing,Alpine Skiing Men's Giant Slalom,Bronze
64,NOR,Kjetil Andr Aamodt,M,22.0,176.0,85.0,Winter,1994,Alpine Skiing,Alpine Skiing Men's Super G,Bronze


In [20]:
# Saving Dataframe into a csv file
male_sports_df.to_csv('Male-Sports/male_athletes_bronze.csv', index=False)

## Male Sport Categories

In [21]:
# Finding all male sport categories 
unique_m_sports_df = male_sports_df['Sport']
unique_m_sports_df = pd.DataFrame(unique_m_sports_df)

# unique_m_sports_df.Sport.unique()

## Male Sport Events

In [22]:
# Finding all male sport events
unique_m_events_df = male_sports_df['Event']
unique_m_events_df = pd.DataFrame(unique_m_events_df)

# unique_m_events_df.Event.unique()