In [None]:
import pandas as pd

df = pd.read_csv("/content/marathon.csv")
df.head(1)

In [None]:
import pandas as pd
from datetime import datetime
import io

import warnings
warnings.filterwarnings('ignore')

# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')

# Sort by Gender and Date to get chronological order of records
df_sorted = df.sort_values(['Gender', 'Date'])

# Calculate record duration for each athlete
def calculate_record_duration(group):
    # Sort by date within each gender group
    group = group.sort_values('Date').copy()

    # Calculate duration each record was held
    group['Record_End_Date'] = group['Date'].shift(-1)

    # For the last (current) record holder, use today's date
    today = datetime.now()
    group['Record_End_Date'] = group['Record_End_Date'].fillna(today)

    # Calculate duration in days
    group['Duration_Days'] = (group['Record_End_Date'] - group['Date']).dt.days

    return group

# Apply the function to each gender group
df_with_duration = df_sorted.groupby('Gender').apply(calculate_record_duration).reset_index(drop=True)

for gender in df_with_duration['Gender'].unique():
    gender_data = df_with_duration[df_with_duration['Gender'] == gender].copy()

    print(f"\n{gender.upper()} MARATHON RECORDS:")
    print("-" * 60)

    # Sort by duration (descending) and take top 20
    gender_data_sorted = gender_data.sort_values('Duration_Days', ascending=False).head(20)

    for idx, row in gender_data_sorted.iterrows():
        end_status = "Present" if pd.isna(df_sorted[df_sorted['Gender'] == gender]['Date'].shift(-1).iloc[0] if len(df_sorted[df_sorted['Gender'] == gender]) == 1 else row['Record_End_Date']) or row['Record_End_Date'].date() == datetime.now().date() else row['Record_End_Date'].strftime('%Y-%m-%d')

        print(f"{row['Name']:<20} | {row['Time']:<8} | {row['Date'].strftime('%Y-%m-%d')} | {row['Duration_Days']:>4} days")

In [None]:
import pandas as pd
from datetime import datetime

df = pd.read_csv("/content/marathon.csv")
df = df[df['Country'] == 'RUS']

# Convert Date column to datetime and extract year
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
df['Event_Year'] = df['Date'].dt.year

# Group athletes by Country, Event, and Event_Year to find potential training groups
training_groups = df.groupby(['Country', 'Event_Year']).agg({
    'Name': list,
    'City': lambda x: list(set(x)),  # Get unique cities
    'Date': lambda x: list(set(x.dt.strftime('%d.%m.%Y'))),  # Get unique dates
    'Gender': list,
    'Time': list
}).reset_index()

# Add group size
training_groups['Group_Size'] = training_groups['Name'].apply(len)

# Filter for groups with more than 1 athlete (potential training partners)
potential_training_groups = training_groups[training_groups['Group_Size'] > 1]

print("POTENTIAL TRAINING GROUPS:")
print("Athletes from the same country competing during the same year")
print("="*80)

if not potential_training_groups.empty:
    for idx, group in potential_training_groups.iterrows():
        print(f"\nTraining Group #{idx + 1}:")
        print(f"Country: {group['Country']}")
        print(f"Year: {group['Event_Year']}")
        print(f"Group Size: {group['Group_Size']} athletes")
        print(f"Cities competed in: {', '.join(group['City'])}")
        print(f"Competition dates: {', '.join(group['Date'])}")
        print("Athletes:")
        for i, (name, gender, time) in enumerate(zip(group['Name'], group['Gender'], group['Time']), 1):
            print(f"  {i}. {name} ({gender}) - {time}")
        print("-" * 50)
else:
    print("No training groups found with the current criteria.")

print("\n" + "="*80)
print("DETAILED ANALYSIS BY COUNTRY AND EVENT:")
print("="*80)

# More detailed analysis - show all athletes grouped by country and event
detailed_groups = df.groupby(['Country', 'Event']).agg({
    'Name': list,
    'Event_Year': list,
    'City': list,
    'Date': lambda x: list(x.dt.strftime('%d.%m.%Y')),
    'Gender': list,
    'Time': list
}).reset_index()

for idx, group in detailed_groups.iterrows():
    print(f"\n{group['Country']} - {group['Event']}:")
    years = list(set(group['Event_Year']))
    print(f"Active years: {', '.join(map(str, sorted(years)))}")
    print(f"Total athletes: {len(group['Name'])}")
    print("Athletes details:")
    for name, year, city, date, gender, time in zip(
        group['Name'], group['Event_Year'], group['City'],
        group['Date'], group['Gender'], group['Time']
    ):
        print(f"  • {name} ({gender}) - {time} in {city} on {date} ({year})")
    print("-" * 50)

# Additional analysis: Same year, same country clusters
print("\n" + "="*80)
print("SAME YEAR CLUSTERS (Regardless of exact date):")
print("="*80)

same_year_groups = df.groupby(['Country', 'Event_Year']).agg({
    'Name': list,
    'Event': list,
    'City': list,
    'Date': lambda x: list(x.dt.strftime('%d.%m.%Y')),
    'Gender': list,
    'Time': list
}).reset_index()

same_year_groups['Group_Size'] = same_year_groups['Name'].apply(len)
multi_athlete_years = same_year_groups[same_year_groups['Group_Size'] > 1]

if not multi_athlete_years.empty:
    for idx, group in multi_athlete_years.iterrows():
        print(f"\n{group['Country']} - {group['Event_Year']}:")
        print(f"Athletes: {group['Group_Size']}")
        events = list(set(group['Event']))
        print(f"Events: {', '.join(events)}")
        for name, event, city, date, gender, time in zip(
            group['Name'], group['Event'], group['City'],
            group['Date'], group['Gender'], group['Time']
        ):
            print(f"  • {name} ({gender}) - {event} - {time} in {city} on {date}")
        print("-" * 50)
else:
    print("No multi-athlete clusters found in the same year.")

# Summary statistics
print("\n" + "="*80)
print("SUMMARY STATISTICS:")
print("="*80)
print(f"Total athletes: {len(df)}")
print(f"Countries represented: {df['Country'].nunique()}")
print(f"Events: {', '.join(df['Event'].unique())}")
print(f"Years covered: {df['Event_Year'].min()} - {df['Event_Year'].max()}")
print(f"Potential training groups (same country, event, year): {len(potential_training_groups)}")