In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
def weight_classes_count(sex,df):
    male_wcs = ['53','59','66','74','83','93','105','120','120+']
    female_wcs = ['43','47','52','57','63','69','72','76','84','84+']
    if sex == 'M':
        filtered_df = df[df['WeightClassKg'].isin(male_wcs)]
        unique_athletes = filtered_df[filtered_df['Sex'] == sex].drop_duplicates(subset=['Name', 'WeightClassKg'])
        plt.figure(figsize=(20,10))
    elif sex == 'F':
        filtered_df = df[df['WeightClassKg'].isin(female_wcs)]
        unique_athletes = filtered_df[filtered_df['Sex'] == sex].drop_duplicates(subset=['Name', 'WeightClassKg'])
        plt.figure(figsize=(20,10))
    unique_athletes['WeightClassKg'] = pd.Categorical(
        unique_athletes['WeightClassKg'], 
        categories=(male_wcs if sex == 'M' else female_wcs), 
        ordered=True
    )
    return sns.countplot(x='WeightClassKg',data = unique_athletes.sort_values(by='WeightClassKg'))

In [11]:
def lifter_profile(name,df):
    lifter = df[df['Name'] == name]
    division = 0
    if lifter['Age'].max() <= 18:
        division = 'Sub-Junior'
    elif lifter['Age'].max() <= 23:
        division = 'Junior'
    else:
        division = 'Open'
    
    dict = {
        'name': name,
        'sex': lifter['Sex'].iloc[0],
        'division': division,
        'weight classes': lifter['WeightClassKg'].dropna().unique().tolist(),
        'best_squat':lifter['Best3SquatKg'].max() ,
        'best_bench':lifter['Best3BenchKg'].max() ,
        'best_deadlift':lifter['Best3DeadliftKg'].max(),
        'best_total':lifter['TotalKg'].max()
    
    }
    return dict

In [13]:
def plot_lift_evolution(name,lift,df):
    if lift == 'squat':
        squat_yearly_best = df[df['Name'] == name].groupby('Year')['Best3SquatKg'].max().reset_index()
        plt.plot(squat_yearly_best['Year'],squat_yearly_best['Best3SquatKg'])
        plt.title(f"{name}'s Squat Progression Over the Years")  
        plt.xlabel('Year')
        plt.ylabel('Squat (kg)')
        plt.grid(True)
        return plt
    elif lift == 'bench':
        bench_yearly_best = df[df['Name'] == name].groupby('Year')['Best3BenchKg'].max().reset_index()
        plt.plot(bench_yearly_best['Year'],bench_yearly_best['Best3BenchKg'])
        plt.title(f"{name}'s Bench Progression Over the Years")  
        plt.xlabel('Year')
        plt.ylabel('Bench (kg)')
        plt.grid(True)
        return plt
    elif lift == 'deadlift':   
        deadlift_yearly_best = df[df['Name'] == name].groupby('Year')['Best3DeadliftKg'].max().reset_index()
        plt.plot(deadlift_yearly_best['Year'],deadlift_yearly_best['Best3DeadliftKg'])
        plt.title(f"{name}'s Deadlift Progression Over the Years")  
        plt.xlabel('Year')
        plt.ylabel('Deadlift (kg)')
        plt.grid(True)
        return plt
    
    

In [17]:
def plot_total_evolution(name):
    total_yearly_best = df[df['Name'] == name].groupby('Year')['TotalKg'].max().reset_index()
    plt.plot(total_yearly_best['Year'],total_yearly_best['TotalKg'])
    plt.title(f"{name}'s Total Progression Over the Years")  
    plt.xlabel('Year')
    plt.ylabel('Total (kg)')
    plt.grid(True)
    return plt

In [19]:
def best_year_lifts(name):
    records = df_final[df_final['Name'] == name]
    sq_bests = records.groupby('Year')['Best3SquatKg'].max().reset_index()
    bnch_bests = records.groupby('Year')['Best3BenchKg'].max().reset_index()
    dl_bests = records.groupby('Year')['Best3DeadliftKg'].max().reset_index()
    total_bests = records.groupby('Year')['TotalKg'].max().reset_index()
    merged_bests = sq_bests.merge(bnch_bests, on='Year', how='outer').merge(dl_bests, on='Year', how='outer').merge(total_bests, on='Year', how='outer')
    return merged_bests

In [21]:
def open_world_records(sex,w_class,df):
    # sex and weightclass passed as arguments should be in string format e.g: 'F','M';'66','120+'...
    athletes = df[(df['Sex'] == sex) & (df['WeightClassKg'] == w_class)]
    world_records = athletes[['Best3SquatKg','Best3BenchKg','Best3DeadliftKg','TotalKg']].max()
    wr_indices = athletes[['Best3SquatKg', 'Best3BenchKg', 'Best3DeadliftKg', 'TotalKg']].idxmax()
    wr_holders = athletes.loc[wr_indices, ['Name', 'Year', 'MeetName']]
    wr_holders['Lift in kg'] = world_records.values
    wr_holders.index = ['Squat WR','Bench WR','Deadlift WR','Total WR']
    return wr_holders


In [48]:
def get_avg_weight_class_total(weight_class,df):
    totals = df[df['WeightClassKg'] == weight_class]
    best_totals = totals.sort_values(by='TotalKg', ascending=False).drop_duplicates(subset='Name')
    avg_total = best_totals['TotalKg'].mean()
    return avg_total
    
    

In [25]:
def compute_competition_years(df):
    # Group by 'Name' and count the number of unique years each athlete has competed in
    athlete_years = df.groupby('Name')['Year'].nunique().reset_index()
    athlete_years.columns = ['Name', 'YearsCompeted']
    
    return athlete_years