## Olympic Athlete Data Analysis

In [None]:
import pandas as pd
import numpy as np
import re

In [None]:
bio_info=pd.read_csv("Biological Data.csv")

In [None]:
bio_info

Unnamed: 0,Athlete ID,Roles,Sex,Full Name,Used Name,Other Names,Born,Died,Height (cm),Weight (kg),Measurements,Affiliations,NOC
0,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,,"12 December 1886 in Bordeaux, Gironde (FRA)","2 October 1960 in Saint-Jean-de-Luz, Pyrénées-...",,,,,France
1,2.0,Competed in Olympic Games,Male,Arnaud Benjamin•Boetsch,Arnaud•Boetsch,,"1 April 1969 in Meulan, Yvelines (FRA)",,183 cm,76 kg,,"Racing Club de France, Paris (FRA)",France
2,3.0,Competed in Olympic Games • Administrator,Male,Jean Laurent Robert•Borotra,Jean•Borotra,,"13 August 1898 in Biarritz, Pyrénées-Atlantiqu...","17 July 1994 in Arbonne, Pyrénées-Atlantiques ...",183 cm,76 kg,,"TCP, Paris (FRA)",France
3,4.0,Competed in Olympic Games,Male,Jacques Marie Stanislas Jean•Brugnon,Jacques•Brugnon,,"11 May 1895 in Paris VIIIe, Paris (FRA)","20 March 1978 in Monaco, Monaco (MON)",168 cm,64 kg,,"Sporting club de Paris, Paris (FRA)",France
4,5.0,Competed in Olympic Games,Male,Henry Albert•Canet,Albert•Canet,,"17 April 1878 in Wandsworth, England (GBR)","25 July 1930 in Paris VIIe, Paris (FRA)",,,,"TCP, Paris (FRA)",France
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49762,49996.0,Competed in Olympic Games,Male,Aubrey Martin•Bürer,Aubrey•Bürer,,"13 March 1939 in Johannesburg, Gauteng (RSA)",,183 cm,72 kg,,"SMU Mustangs, Dallas (USA)",South Africa
49763,49997.0,Competed in Olympic Games,Male,Kenneth Philip•Cawood,Kenneth•Cawood,,05-Jul-71,,,,,,South Africa
49764,49998.0,Competed in Olympic Games,Male,"Desmond Vernon ""Des""•Cohen",Des•Cohen,,8 August 1927 in ? (RSA),24-Feb-12,,,,,South Africa
49765,49999.0,Competed in Olympic Games,Male,Darryl Melwyn•Cronje,Darryl•Cronje,,"4 August 1967 in Durban, KwaZulu-Natal (RSA)",,,,,,South Africa


In [None]:
# Step 3: Helper function to split birth/death info
def split_birth_death_info(column):
    dates, cities, regions, countries = [], [], [], []
    for entry in column.fillna("").astype(str):
        if 'in' in entry:
            date_part, location_part = entry.split('in', 1)
            date = date_part.strip()
            location = location_part.strip()
            try:
                city, rest = location.split(',', 1)
                region, country = rest.rsplit('(', 1)
                country = country.replace(')', '').strip()
                cities.append(city.strip())
                regions.append(region.strip())
                countries.append(country.strip())
            except ValueError:
                cities.append(np.nan)
                regions.append(np.nan)
                countries.append(np.nan)
            dates.append(date)
        else:
            dates.append(np.nan)
            cities.append(np.nan)
            regions.append(np.nan)
            countries.append(np.nan)
    return dates, cities, regions, countries

# Step 4: Apply to Born and Died columns
born_dates, born_cities, born_regions, born_countries = split_birth_death_info(bio_info['Born'])
died_dates, died_cities, died_regions, died_countries = split_birth_death_info(bio_info['Died'])

# Step 5: Create new columns
bio_info['Date of Birth'] = born_dates
bio_info['City of Birth'] = born_cities
bio_info['Region of Birth'] = born_regions
bio_info['Country of Birth'] = born_countries

bio_info['Date of Death'] = died_dates
bio_info['City of Death'] = died_cities
bio_info['Region of Death'] = died_regions
bio_info['Country of Death'] = died_countries

# Step 6: Display selected columns
bio_info[
    [
        'Date of Birth', 'City of Birth', 'Region of Birth', 'Country of Birth',
        'Date of Death', 'City of Death', 'Region of Death', 'Country of Death'
    ]
]
# Step 6: Drop original columns
bio_info.drop(columns=['Born', 'Died'], inplace=True)

In [None]:
# 1. Drop rows missing Athlete ID (very few)
bio_info = bio_info.dropna(subset=['Athlete ID'])

# 2. Fill missing Roles with 'Unknown'
bio_info['Roles'] = bio_info['Roles'].fillna('Unknown')

# 3. Handle Sex: fill missing with 'Unknown'
bio_info['Sex'] = bio_info['Sex'].fillna('Unknown')

# Fill missing Full Name from Used Name where available
bio_info['Full Name'] = bio_info['Full Name'].fillna(bio_info['Used Name'])

# 10. Fill Affiliations missing with 'Unknown'
bio_info['Affiliations'] = bio_info['Affiliations'].fillna('Unknown')

# 11. Drop rows missing NOC (nationality code)
bio_info = bio_info.dropna(subset=['NOC'])

# 12. Drop rows missing Date of Birth (or optionally fill with a placeholder)
bio_info = bio_info.dropna(subset=['Date of Birth'])

# 13-16. Fill missing City, Region, Country of Birth with 'Unknown'
for col in ['City of Birth', 'Region of Birth', 'Country of Birth']:
    if col in bio_info.columns:
        bio_info[col] = bio_info[col].fillna('Unknown')

# 17-20. Handle Date/City/Region/Country of Death
# Fill missing Date of Death with 'Alive'
bio_info['Date of Death'] = bio_info['Date of Death'].fillna('Alive')

# Fill missing city/region/country of death with 'Unknown'
for col in ['City of Death', 'Region of Death', 'Country of Death']:
    if col in bio_info.columns:
        bio_info[col] = bio_info[col].fillna('Unknown')

# Optional: Create 'is_alive' flag based on Date of Death
bio_info['is_alive'] = bio_info['Date of Death'].apply(lambda x: True if x == 'Alive' else False)

In [None]:
bio_info.isnull().sum()

Athlete ID              0
Roles                   0
Sex                     0
Full Name               0
Used Name               0
Height (cm)             0
Weight (kg)             0
Measurements        38347
Affiliations            0
NOC                     0
Date of Birth           0
City of Birth           0
Region of Birth         0
Country of Birth        0
Date of Death           0
City of Death           0
Region of Death         0
Country of Death        0
is_alive                0
Height              38581
Weight              39472
dtype: int64

In [None]:
import pandas as pd
import numpy as np
import re

# --- Step 1: Extract height and weight from Measurements column ---

def extract_height(measurement):
    if pd.isna(measurement):
        return np.nan
    match = re.search(r'(\d{2,3})\s*cm', measurement)
    return float(match.group(1)) if match else np.nan

def extract_weight(measurement):
    if pd.isna(measurement):
        return np.nan
    match = re.search(r'(\d{2,3})\s*kg', measurement)
    return float(match.group(1)) if match else np.nan

bio_info['Height_from_Measurements'] = bio_info['Measurements'].apply(extract_height)
bio_info['Weight_from_Measurements'] = bio_info['Measurements'].apply(extract_weight)

# --- Step 2: Clean Height and Weight columns ---

bio_info['Height (cm)'] = bio_info['Height (cm)'].astype(str).str.replace(' cm', '', regex=False)
bio_info['Height (cm)'] = pd.to_numeric(bio_info['Height (cm)'], errors='coerce')

bio_info['Weight (kg)'] = bio_info['Weight (kg)'].astype(str).str.replace(' kg', '', regex=False)
bio_info['Weight (kg)'] = pd.to_numeric(bio_info['Weight (kg)'], errors='coerce')

# --- Step 3: Fill missing Height and Weight with extracted values ---

bio_info['Height (cm)'] = bio_info['Height (cm)'].fillna(bio_info['Height_from_Measurements'])
bio_info['Weight (kg)'] = bio_info['Weight (kg)'].fillna(bio_info['Weight_from_Measurements'])

# --- Step 4: Fill remaining missing Height and Weight with median ---

bio_info['Height (cm)'] = bio_info['Height (cm)'].fillna(bio_info['Height (cm)'].median())
bio_info['Weight (kg)'] = bio_info['Weight (kg)'].fillna(bio_info['Weight (kg)'].median())

# --- Step 5: Reconstruct Measurements from Height and Weight for missing values ---

def reconstruct_measurement(row):
    if pd.isna(row['Measurements']) or row['Measurements'] == 'Unknown':
        if pd.notna(row['Height (cm)']) and pd.notna(row['Weight (kg)']):
            return f"{int(row['Height (cm)'])} cm / {int(row['Weight (kg)'])} kg"
        else:
            return 'Unknown'
    return row['Measurements']

bio_info['Measurements'] = bio_info.apply(reconstruct_measurement, axis=1)

# --- Step 6: Drop helper columns ---

bio_info = bio_info.drop(columns=['Height_from_Measurements', 'Weight_from_Measurements'])

# --- Optional: Check for remaining missing values ---

print("Missing Height (cm):", bio_info['Height (cm)'].isna().sum())
print("Missing Weight (kg):", bio_info['Weight (kg)'].isna().sum())
print("Missing Measurements:", bio_info['Measurements'].isna().sum())


Missing Height (cm): 0
Missing Weight (kg): 0
Missing Measurements: 0


In [None]:
bio_info = bio_info.drop(columns=['Height', 'Weight'])

In [None]:
bio_info

Unnamed: 0,Athlete ID,Roles,Sex,Full Name,Used Name,Height (cm),Weight (kg),Measurements,Affiliations,NOC,Date of Birth,City of Birth,Region of Birth,Country of Birth,Date of Death,City of Death,Region of Death,Country of Death,is_alive
0,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,12 December 1886,Bordeaux,Gironde,FRA,2 October 1960,Saint-Jean-de-Luz,Pyrénées-Atlantiques,FRA,False
1,2.0,Competed in Olympic Games,Male,Arnaud Benjamin•Boetsch,Arnaud•Boetsch,183.0,76.0,183 cm / 76 kg,"Racing Club de France, Paris (FRA)",France,1 April 1969,Meulan,Yvelines,FRA,Alive,Unknown,Unknown,Unknown,True
2,3.0,Competed in Olympic Games • Administrator,Male,Jean Laurent Robert•Borotra,Jean•Borotra,183.0,76.0,183 cm / 76 kg,"TCP, Paris (FRA)",France,13 August 1898,Biarritz,Pyrénées-Atlantiques,FRA,17 July 1994,Arbonne,Pyrénées-Atlantiques,FRA,False
3,4.0,Competed in Olympic Games,Male,Jacques Marie Stanislas Jean•Brugnon,Jacques•Brugnon,168.0,64.0,168 cm / 64 kg,"Sporting club de Paris, Paris (FRA)",France,11 May 1895,Paris VIIIe,Paris,FRA,20 March 1978,Monaco,Monaco,MON,False
4,5.0,Competed in Olympic Games,Male,Henry Albert•Canet,Albert•Canet,177.0,72.0,177 cm / 72 kg,"TCP, Paris (FRA)",France,17 April 1878,Wandsworth,England,GBR,25 July 1930,Paris VIIe,Paris,FRA,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49761,49995.0,Competed in Olympic Games,Male,"Anthony Wellesley ""Tony""•Briscoe",Tony•Briscoe,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,22 April 1939,Johannesburg,Gauteng,RSA,1 December 2018,Unknown,Unknown,Unknown,False
49762,49996.0,Competed in Olympic Games,Male,Aubrey Martin•Bürer,Aubrey•Bürer,183.0,72.0,183 cm / 72 kg,"SMU Mustangs, Dallas (USA)",South Africa,13 March 1939,Johannesburg,Gauteng,RSA,Alive,Unknown,Unknown,Unknown,True
49764,49998.0,Competed in Olympic Games,Male,"Desmond Vernon ""Des""•Cohen",Des•Cohen,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,8 August 1927,Unknown,Unknown,Unknown,Alive,Unknown,Unknown,Unknown,True
49765,49999.0,Competed in Olympic Games,Male,Darryl Melwyn•Cronje,Darryl•Cronje,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,4 August 1967,Durban,KwaZulu-Natal,RSA,Alive,Unknown,Unknown,Unknown,True


In [None]:
bio_info.to_csv('cleaned_bio_info.csv', index=False)


In [None]:
biographical_data=bio_info

In [None]:
biographical_data

Unnamed: 0,Athlete ID,Roles,Sex,Full Name,Used Name,Height (cm),Weight (kg),Measurements,Affiliations,NOC,Date of Birth,City of Birth,Region of Birth,Country of Birth,Date of Death,City of Death,Region of Death,Country of Death,is_alive
0,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,12 December 1886,Bordeaux,Gironde,FRA,2 October 1960,Saint-Jean-de-Luz,Pyrénées-Atlantiques,FRA,False
1,2.0,Competed in Olympic Games,Male,Arnaud Benjamin•Boetsch,Arnaud•Boetsch,183.0,76.0,183 cm / 76 kg,"Racing Club de France, Paris (FRA)",France,1 April 1969,Meulan,Yvelines,FRA,Alive,Unknown,Unknown,Unknown,True
2,3.0,Competed in Olympic Games • Administrator,Male,Jean Laurent Robert•Borotra,Jean•Borotra,183.0,76.0,183 cm / 76 kg,"TCP, Paris (FRA)",France,13 August 1898,Biarritz,Pyrénées-Atlantiques,FRA,17 July 1994,Arbonne,Pyrénées-Atlantiques,FRA,False
3,4.0,Competed in Olympic Games,Male,Jacques Marie Stanislas Jean•Brugnon,Jacques•Brugnon,168.0,64.0,168 cm / 64 kg,"Sporting club de Paris, Paris (FRA)",France,11 May 1895,Paris VIIIe,Paris,FRA,20 March 1978,Monaco,Monaco,MON,False
4,5.0,Competed in Olympic Games,Male,Henry Albert•Canet,Albert•Canet,177.0,72.0,177 cm / 72 kg,"TCP, Paris (FRA)",France,17 April 1878,Wandsworth,England,GBR,25 July 1930,Paris VIIe,Paris,FRA,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49761,49995.0,Competed in Olympic Games,Male,"Anthony Wellesley ""Tony""•Briscoe",Tony•Briscoe,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,22 April 1939,Johannesburg,Gauteng,RSA,1 December 2018,Unknown,Unknown,Unknown,False
49762,49996.0,Competed in Olympic Games,Male,Aubrey Martin•Bürer,Aubrey•Bürer,183.0,72.0,183 cm / 72 kg,"SMU Mustangs, Dallas (USA)",South Africa,13 March 1939,Johannesburg,Gauteng,RSA,Alive,Unknown,Unknown,Unknown,True
49764,49998.0,Competed in Olympic Games,Male,"Desmond Vernon ""Des""•Cohen",Des•Cohen,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,8 August 1927,Unknown,Unknown,Unknown,Alive,Unknown,Unknown,Unknown,True
49765,49999.0,Competed in Olympic Games,Male,Darryl Melwyn•Cronje,Darryl•Cronje,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,4 August 1967,Durban,KwaZulu-Natal,RSA,Alive,Unknown,Unknown,Unknown,True


In [None]:
biographical_data.isnull().sum()

Athlete ID          0
Roles               0
Sex                 0
Full Name           0
Used Name           0
Height (cm)         0
Weight (kg)         0
Measurements        0
Affiliations        0
NOC                 0
Date of Birth       0
City of Birth       0
Region of Birth     0
Country of Birth    0
Date of Death       0
City of Death       0
Region of Death     0
Country of Death    0
is_alive            0
dtype: int64

In [None]:
result_info=pd.read_excel("Olympic_performance_result.xlsx")

In [None]:
result_info

Unnamed: 0,Athlete_ID,Games,Discipline,Event,Team,NOC,Pos,Medal,As
0,1,1912 Summer Olympics,Tennis,"Singles, Men(Olympic)",,FRA,17,,Jean-François Blanchy
1,1,1912 Summer Olympics,Tennis,"Doubles, Men(Olympic)",Jean Montariol,FRA,DNS,,Jean-François Blanchy
2,1,1920 Summer Olympics,Tennis,"Singles, Men(Olympic)",,FRA,32,,Jean-François Blanchy
3,1,1920 Summer Olympics,Tennis,"Doubles, Mixed(Olympic)",Jeanne Vaussard,FRA,8,,Jean-François Blanchy
4,1,1920 Summer Olympics,Tennis,"Doubles, Men(Olympic)",Jacques Brugnon,FRA,4,,Jean-François Blanchy
...,...,...,...,...,...,...,...,...,...
109385,49999,1992 Summer Olympics,Swimming(Aquatics),"4 Ã— 100 metres Medley Relay, Men(Olympic)",South Africa,RSA,14,,Darryl Cronje
109386,50000,1996 Summer Olympics,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",,RSA,5,,Brendon Dedekind
109387,50000,1996 Summer Olympics,Swimming(Aquatics),"100 metres Freestyle, Men(Olympic)",,RSA,26,,Brendon Dedekind
109388,50000,2000 Summer Olympics,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",,RSA,9,,Brendon Dedekind


In [None]:
result_info.isnull().sum()

Athlete_ID        0
Games          1167
Discipline        0
Event             0
Team          58862
NOC               0
Pos               0
Medal         92765
As              921
dtype: int64

In [None]:

# Split Year and Season from Games
result_info[['Year', 'Season']] = result_info['Games'].str.extract(r'(\d{4})\s+(\w+)', expand=True)

# Convert Year to integer
result_info.dropna(subset=['Year'], inplace=True)
result_info['Year'] = result_info['Year'].astype(int)
result_info

Unnamed: 0,Athlete_ID,Games,Discipline,Event,Team,NOC,Pos,Medal,As,Year,Season
0,1,1912 Summer Olympics,Tennis,"Singles, Men(Olympic)",Unknown,FRA,17.0,Failed,Jean-François Blanchy,1912,Summer
1,1,1912 Summer Olympics,Tennis,"Doubles, Men(Olympic)",Jean Montariol,FRA,-1.0,Failed,Jean-François Blanchy,1912,Summer
2,1,1920 Summer Olympics,Tennis,"Singles, Men(Olympic)",Unknown,FRA,32.0,Failed,Jean-François Blanchy,1920,Summer
3,1,1920 Summer Olympics,Tennis,"Doubles, Mixed(Olympic)",Jeanne Vaussard,FRA,8.0,Failed,Jean-François Blanchy,1920,Summer
4,1,1920 Summer Olympics,Tennis,"Doubles, Men(Olympic)",Jacques Brugnon,FRA,4.0,Failed,Jean-François Blanchy,1920,Summer
...,...,...,...,...,...,...,...,...,...,...,...
109385,49999,1992 Summer Olympics,Swimming(Aquatics),"4 Ã— 100 metres Medley Relay, Men(Olympic)",South Africa,RSA,14.0,Failed,Darryl Cronje,1992,Summer
109386,50000,1996 Summer Olympics,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,5.0,Failed,Brendon Dedekind,1996,Summer
109387,50000,1996 Summer Olympics,Swimming(Aquatics),"100 metres Freestyle, Men(Olympic)",Unknown,RSA,26.0,Failed,Brendon Dedekind,1996,Summer
109388,50000,2000 Summer Olympics,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,9.0,Failed,Brendon Dedekind,2000,Summer


In [None]:
# Create a new column 'Tied': True if 'Pos' starts with '=', else False
result_info['Tied'] = result_info['Pos'].astype(str).str.startswith('=')

In [None]:
# Remove leading '=' from Pos column
result_info['Pos'] = result_info['Pos'].astype(str).str.lstrip('=')

In [None]:

# Convert Pos to numeric: non-numeric values like 'DNF', 'DNS', '' become NaN
result_info['Pos'] = pd.to_numeric(result_info['Pos'], errors='coerce')

In [None]:
result_info['Games'] = result_info['Games'].fillna("Unknown")
result_info['Team'] = result_info['Team'].fillna("Unknown")
result_info['Medal'] = result_info['Medal'].fillna("Failed")
result_info['As'] = result_info['As'].fillna("Unknown")


In [None]:
result_info['Pos'] = result_info['Pos'].fillna(-1)  # Use -1 as "did not finish/rank"

In [None]:
result_info.isnull().sum()

Athlete_ID    0
Games         0
Discipline    0
Event         0
Team          0
NOC           0
Pos           0
Medal         0
As            0
Year          0
Season        0
Tied          0
dtype: int64

In [None]:
athlete_performance_results=result_info

In [None]:
athlete_performance_results

Unnamed: 0,Athlete_ID,Games,Discipline,Event,Team,NOC,Pos,Medal,As,Year,Season,Tied
0,1,1912 Summer Olympics,Tennis,"Singles, Men(Olympic)",Unknown,FRA,17.0,Failed,Jean-François Blanchy,1912,Summer,False
1,1,1912 Summer Olympics,Tennis,"Doubles, Men(Olympic)",Jean Montariol,FRA,-1.0,Failed,Jean-François Blanchy,1912,Summer,False
2,1,1920 Summer Olympics,Tennis,"Singles, Men(Olympic)",Unknown,FRA,32.0,Failed,Jean-François Blanchy,1920,Summer,False
3,1,1920 Summer Olympics,Tennis,"Doubles, Mixed(Olympic)",Jeanne Vaussard,FRA,8.0,Failed,Jean-François Blanchy,1920,Summer,False
4,1,1920 Summer Olympics,Tennis,"Doubles, Men(Olympic)",Jacques Brugnon,FRA,4.0,Failed,Jean-François Blanchy,1920,Summer,False
...,...,...,...,...,...,...,...,...,...,...,...,...
109385,49999,1992 Summer Olympics,Swimming(Aquatics),"4 Ã— 100 metres Medley Relay, Men(Olympic)",South Africa,RSA,14.0,Failed,Darryl Cronje,1992,Summer,False
109386,50000,1996 Summer Olympics,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,5.0,Failed,Brendon Dedekind,1996,Summer,False
109387,50000,1996 Summer Olympics,Swimming(Aquatics),"100 metres Freestyle, Men(Olympic)",Unknown,RSA,26.0,Failed,Brendon Dedekind,1996,Summer,False
109388,50000,2000 Summer Olympics,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,9.0,Failed,Brendon Dedekind,2000,Summer,False


In [None]:
athlete_performance_results.to_csv('Athelete_performance_result_cleaned.csv', index=False)


In [None]:
athlete_performance_results.info()

<class 'pandas.core.frame.DataFrame'>
Index: 108223 entries, 0 to 109389
Data columns (total 12 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   Athlete_ID  108223 non-null  int64  
 1   Games       108223 non-null  object 
 2   Discipline  108223 non-null  object 
 3   Event       108223 non-null  object 
 4   Team        108223 non-null  object 
 5   NOC         108223 non-null  object 
 6   Pos         108223 non-null  float64
 7   Medal       108223 non-null  object 
 8   As          108223 non-null  object 
 9   Year        108223 non-null  int32  
 10  Season      108223 non-null  object 
 11  Tied        108223 non-null  bool   
dtypes: bool(1), float64(1), int32(1), int64(1), object(8)
memory usage: 9.6+ MB


In [None]:
biographical_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 39706 entries, 0 to 49766
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Athlete ID        39706 non-null  float64
 1   Roles             39706 non-null  object 
 2   Sex               39706 non-null  object 
 3   Full Name         39706 non-null  object 
 4   Used Name         39706 non-null  object 
 5   Height (cm)       39706 non-null  float64
 6   Weight (kg)       39706 non-null  float64
 7   Measurements      39706 non-null  object 
 8   Affiliations      39706 non-null  object 
 9   NOC               39706 non-null  object 
 10  Date of Birth     39706 non-null  object 
 11  City of Birth     39706 non-null  object 
 12  Region of Birth   39706 non-null  object 
 13  Country of Birth  39706 non-null  object 
 14  Date of Death     39706 non-null  object 
 15  City of Death     39706 non-null  object 
 16  Region of Death   39706 non-null  object 
 17

In [None]:
athlete_performance_results=athlete_performance_results.rename(columns={'Athlete_ID': 'Athlete ID'})

In [None]:
merged_df = pd.merge(biographical_data, athlete_performance_results, on='Athlete ID', how='inner')


In [None]:
merged_df

Unnamed: 0,Athlete ID,Roles,Sex,Full Name,Used Name,Height (cm),Weight (kg),Measurements,Affiliations,NOC_x,...,Discipline,Event,Team,NOC_y,Pos,Medal,As,Year,Season,Tied
0,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Singles, Men(Olympic)",Unknown,FRA,17.0,Failed,Jean-François Blanchy,1912,Summer,False
1,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Doubles, Men(Olympic)",Jean Montariol,FRA,-1.0,Failed,Jean-François Blanchy,1912,Summer,False
2,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Singles, Men(Olympic)",Unknown,FRA,32.0,Failed,Jean-François Blanchy,1920,Summer,False
3,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Doubles, Mixed(Olympic)",Jeanne Vaussard,FRA,8.0,Failed,Jean-François Blanchy,1920,Summer,False
4,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Doubles, Men(Olympic)",Jacques Brugnon,FRA,4.0,Failed,Jean-François Blanchy,1920,Summer,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90822,49999.0,Competed in Olympic Games,Male,Darryl Melwyn•Cronje,Darryl•Cronje,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,...,Swimming(Aquatics),"4 Ã— 100 metres Medley Relay, Men(Olympic)",South Africa,RSA,14.0,Failed,Darryl Cronje,1992,Summer,False
90823,50000.0,Competed in Olympic Games,Male,Brendon•Dedekind,Brendon•Dedekind,186.0,90.0,186 cm / 90 kg,Pietermaritzburg Seals,South Africa,...,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,5.0,Failed,Brendon Dedekind,1996,Summer,False
90824,50000.0,Competed in Olympic Games,Male,Brendon•Dedekind,Brendon•Dedekind,186.0,90.0,186 cm / 90 kg,Pietermaritzburg Seals,South Africa,...,Swimming(Aquatics),"100 metres Freestyle, Men(Olympic)",Unknown,RSA,26.0,Failed,Brendon Dedekind,1996,Summer,False
90825,50000.0,Competed in Olympic Games,Male,Brendon•Dedekind,Brendon•Dedekind,186.0,90.0,186 cm / 90 kg,Pietermaritzburg Seals,South Africa,...,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,9.0,Failed,Brendon Dedekind,2000,Summer,False


In [None]:
All_athelete_performance_result=merged_df

In [None]:
All_athelete_performance_result

Unnamed: 0,Athlete ID,Roles,Sex,Full Name,Used Name,Height (cm),Weight (kg),Measurements,Affiliations,NOC_x,...,Discipline,Event,Team,NOC_y,Pos,Medal,As,Year,Season,Tied
0,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Singles, Men(Olympic)",Unknown,FRA,17.0,Failed,Jean-François Blanchy,1912,Summer,False
1,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Doubles, Men(Olympic)",Jean Montariol,FRA,-1.0,Failed,Jean-François Blanchy,1912,Summer,False
2,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Singles, Men(Olympic)",Unknown,FRA,32.0,Failed,Jean-François Blanchy,1920,Summer,False
3,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Doubles, Mixed(Olympic)",Jeanne Vaussard,FRA,8.0,Failed,Jean-François Blanchy,1920,Summer,False
4,1.0,Competed in Olympic Games,Male,"François Joseph Marie Antoine ""Jean-François""•...",Jean-François•Blanchy,177.0,72.0,177 cm / 72 kg,Unknown,France,...,Tennis,"Doubles, Men(Olympic)",Jacques Brugnon,FRA,4.0,Failed,Jean-François Blanchy,1920,Summer,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90822,49999.0,Competed in Olympic Games,Male,Darryl Melwyn•Cronje,Darryl•Cronje,177.0,72.0,177 cm / 72 kg,Unknown,South Africa,...,Swimming(Aquatics),"4 Ã— 100 metres Medley Relay, Men(Olympic)",South Africa,RSA,14.0,Failed,Darryl Cronje,1992,Summer,False
90823,50000.0,Competed in Olympic Games,Male,Brendon•Dedekind,Brendon•Dedekind,186.0,90.0,186 cm / 90 kg,Pietermaritzburg Seals,South Africa,...,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,5.0,Failed,Brendon Dedekind,1996,Summer,False
90824,50000.0,Competed in Olympic Games,Male,Brendon•Dedekind,Brendon•Dedekind,186.0,90.0,186 cm / 90 kg,Pietermaritzburg Seals,South Africa,...,Swimming(Aquatics),"100 metres Freestyle, Men(Olympic)",Unknown,RSA,26.0,Failed,Brendon Dedekind,1996,Summer,False
90825,50000.0,Competed in Olympic Games,Male,Brendon•Dedekind,Brendon•Dedekind,186.0,90.0,186 cm / 90 kg,Pietermaritzburg Seals,South Africa,...,Swimming(Aquatics),"50 metres Freestyle, Men(Olympic)",Unknown,RSA,9.0,Failed,Brendon Dedekind,2000,Summer,False


In [None]:
All_athelete_performance_result.to_csv('All athelete performance resultcleaned.csv', index=False)
