In [20]:
import pandas as pd
import ast
import json

# Read the CSV file
df = pd.read_csv('../eda/athletes.csv')

# Function to safely convert the disciplines to a list
def convert_to_list(x):
    if isinstance(x, list):
        return x
    try:
        # Try parsing as JSON
        return json.loads(x.replace("'", '"'))
    except:
        try:
            # Try with ast.literal_eval
            return ast.literal_eval(x)
        except:
            # If all else fails, return as a single-item list
            return [x]

# Apply the conversion to the disciplines column
df['disciplines'] = df['disciplines'].apply(convert_to_list)

# Create a mask for Wrestling athletes
wrestling_mask = df['disciplines'].apply(lambda x: 'Athletics' in x)

# Count total Wrestling athletes
wrestling_count = wrestling_mask.sum()

# Count Wrestling athletes by gender
wrestling_men_count = df[wrestling_mask & (df['gender'] == 'Male')].shape[0]
wrestling_women_count = df[wrestling_mask & (df['gender'] == 'Female')].shape[0]
wrestling_unspecified_count = df[wrestling_mask & (~df['gender'].isin(['Male', 'Female']))].shape[0]

# Print the results
print(f"Total number of Wrestling athletes: {wrestling_count}")
print(f"Number of male Wrestling athletes: {wrestling_men_count}")
print(f"Number of female Wrestling athletes: {wrestling_women_count}")
print(f"Number of Wrestling athletes with unspecified gender: {wrestling_unspecified_count}")


Total number of Wrestling athletes: 2018
Number of male Wrestling athletes: 1036
Number of female Wrestling athletes: 982
Number of Wrestling athletes with unspecified gender: 0


In [None]:
# If discipline is stored as a string representation of a list, we need to convert it first
# Try to evaluate if it's stored as a string representation of a list
try:
    # First row as a test to see if conversion is needed
    if isinstance(df['disciplines'].iloc[0], str) and ('[' in df['disciplines'].iloc[0] or "'" in df['disciplines'].iloc[0]):
        df['disciplines'] = df['disciplines'].apply(eval)
except:
    pass  # Already in the correct format

# Count athletes who have 'Wrestling' in their disciplines
wrestling_athletes_count = df[df['disciplines'].apply(lambda x: 'Wrestling' in x if isinstance(x, list) else x == 'Wrestling')].shape[0]

# Print the result
print(f"Number of Wrestling athletes: {wrestling_athletes_count}")