In [6]:
import pandas as pd #type: ignore
import numpy as np

# Load the ordered data
df = pd.read_csv('processed_data/ordered_times.csv')

# Define the order lists (same as in data_wrangler.ipynb)
sheet_order = [
    'CMS All Time Top 10',
    'CMS Axelrood Pool Records',
    'CMS Frosh Swimming & Diving Records',
    'Development of Team Records (October 2001 to March 2025)', 
    'CMS at UCSD',
    'CMS at Cal Baptist Distance Meet',
    'CMS at PP',
    'CMS at PP Combined', 
    'CMS SCIAC Champions',
    'SCIAC All Time Top 10 Performers',
    'SCIAC Records',
    'NCAA TOP 20'
]

sex_order = ['Athena', 'Stag', 'Women', 'Men']

event_order = [
    # FREE
    '50 FREE', '100 FREE', '200 FREE', '500 FREE', '1000 FREE', '1650 FREE',
    # BACK
    '50 BACK', '100 BACK', '200 BACK', '300 BACK',
    # BREAST
    '50 BREAST', '100 BREAST', '200 BREAST', '300 BREAST',
    # FLY
    '100 FLY', '200 FLY', '300 FLY',
    # IM
    '200 IM', '300 IM', '400 IM',
    # DIVING (METER)
    '1-METER (6 dives)', '1-METER (11 dives)', '1-METER',
    '3-METER (6 dives)', '3-METER (11 dives)', '3-METER',
    # RELAY
    '200 FREE RELAY', '400 FREE RELAY', '500 FREE RELAY- (50-100-150-200)',
    '800 FREE RELAY', '200 MEDLEY RELAY', '400 MEDLEY RELAY',
    '500 MEDLEY RELAY - (200 BACK-150 BR-100 FL-50 FS)',
    # Spl.
    '50 FREE - RELAY Spl.', '50 FREE Spl.', '100 FREE - RELAY Spl.',
    '100 FREE Spl.', '200 FREE - RELAY Spl.', '200 FREE Spl.',
    '50 BACK - RELAY Spl.', '50 BACK Spl.', '50 BREAST - RELAY Spl.',
    '50 BREAST Spl.', '100 BREAST - RELAY Spl.', '100 BREAST Spl.',
    '50 FLY - RELAY Spl.', '50 FLY Spl.', '100 FLY - RELAY Spl.', '100 FLY Spl.'
]

print(f"Loaded {len(df)} records")
print(f"Unique sheets: {df['SHEET'].nunique()}")
print(f"Unique events: {df['EVENT'].nunique()}")


Loaded 4173 records
Unique sheets: 12
Unique events: 49


In [7]:
class EnhancedSwimTableGenerator:
    def __init__(self, df, sheet_order, sex_order, event_order):
        self.df = df
        self.sheet_order = sheet_order
        self.sex_order = sex_order
        self.event_order = event_order
        
        # Override system for special cases
        self.overrides = {
            'RELAY': {'layout': 'single_column', 'max_entries': None},
            'Spl.': {'layout': 'single_column', 'max_entries': None},
            'METER': {'layout': 'single_column', 'max_entries': None},
            'CMS All Time Top 10': {'max_entries': 10},
            'CMS at PP': {'max_entries': 10}
        }
    
    def time_to_seconds(self, time_str):
        """Convert time string to seconds for sorting"""
        if pd.isna(time_str):
            return float('inf')
        try:
            time_str = str(time_str).strip()
            if ':' in time_str:
                parts = time_str.split(':')
                minutes = float(parts[0])
                seconds = float(parts[1])
                return minutes * 60 + seconds
            else:
                return float(time_str)
        except (ValueError, AttributeError):
            return float('inf')
    
    def get_available_columns(self, data):
        """Determine which columns have data and select appropriate macro"""
        available_cols = []
        for col in ['TIME', 'NAME', 'YEAR', 'TEAM', 'RANK', 'SITE', 'MEET', 'CONTEXT']:
            if col in data.columns and not data[col].isna().all():
                available_cols.append(col)
        
        # Map to macro names
        macro_map = {
            ('TIME', 'NAME', 'YEAR'): 'eventtableTIMENAMEYEARMini',
            ('TIME', 'NAME', 'YEAR', 'TEAM'): 'eventtableTIMENAMEYEARTEAMMini',
            ('TIME', 'NAME', 'YEAR', 'TEAM', 'RANK'): 'eventtableTIMENAMEYEARTEAMRANKMini',
            ('TIME', 'YEAR', 'TEAM', 'RANK'): 'eventtableTIMEYEARTEAMRANKMini',
            ('NAME', 'YEAR'): 'eventtableNAMEYEARMini',
            ('TIME', 'NAME', 'YEAR', 'SITE'): 'eventtableTIMENAMEYEARSITEMini',
            ('TIME', 'NAME', 'YEAR', 'SITE', 'MEET', 'CONTEXT'): 'eventtableTIMENAMEYEARSITEMEETCONTEXTMini',
            ('TIME', 'NAME', 'YEAR', 'TEAM', 'MEET'): 'eventtableTIMENAMEYEARTEAMMEETMini'
        }
        
        return available_cols, macro_map.get(tuple(available_cols), 'eventtableTIMENAMEYEARMini')
    
    def should_use_single_column(self, event_name, num_entries):
        """Determine if event should use single column layout"""
        # Check overrides first
        for key, config in self.overrides.items():
            if key in event_name:
                if 'layout' in config and config['layout'] == 'single_column':
                    return True
        
        # Default logic
        if 'RELAY' in event_name or 'Spl.' in event_name or 'METER' in event_name:
            return True
        if num_entries > 15:
            return True
        return False
    
    def group_events_for_layout(self, events_data):
        """Group events into pairs for side-by-side layout"""
        groups = []
        i = 0
        while i < len(events_data):
            event_name, event_data = events_data[i]
            
            if self.should_use_single_column(event_name, len(event_data)):
                # Single column event
                groups.append([(event_name, event_data)])
                i += 1
            else:
                # Try to pair with next event
                if i + 1 < len(events_data):
                    next_event_name, next_event_data = events_data[i + 1]
                    if not self.should_use_single_column(next_event_name, len(next_event_data)):
                        groups.append([(event_name, event_data), (next_event_name, next_event_data)])
                        i += 2
                    else:
                        groups.append([(event_name, event_data)])
                        i += 1
                else:
                    groups.append([(event_name, event_data)])
                    i += 1
        
        return groups
    
    def generate_table_latex(self, event_name, data):
        """Generate LaTeX for a single table"""
        # Sort by time
        if 'TIME' in data.columns:
            data = data.copy()
            data['time_seconds'] = data['TIME'].apply(self.time_to_seconds)
            data = data.sort_values('time_seconds').drop('time_seconds', axis=1)
            data = data.reset_index(drop=True)
        
        # Apply max entries override
        max_entries = None
        for key, config in self.overrides.items():
            if key in event_name or key in data['SHEET'].iloc[0] if len(data) > 0 else False:
                if 'max_entries' in config:
                    max_entries = config['max_entries']
                    break
        
        if max_entries and len(data) > max_entries:
            data = data.head(max_entries)
        
        # Get available columns and macro
        available_cols, macro_name = self.get_available_columns(data)
        
        # Generate table rows
        rows = []
        for _, row in data.iterrows():
            row_data = []
            for col in available_cols:
                value = str(row[col]) if pd.notna(row[col]) else ""
                row_data.append(value)
            rows.append(" & ".join(row_data) + " \\\\")
        
        table_content = "\n".join(rows)
        return f"{macro_name}{{{event_name}}}{{\n{table_content}\n}}"
    
    def generate_section_latex(self, sheet_name, sex_name, events_data):
        """Generate complete LaTeX section with hierarchical structure"""
        latex_parts = []
        
        # Add subsection and subsubsection
        latex_parts.append(f"\\subsection{{{sheet_name}}}")
        latex_parts.append(f"\\subsubsection{{{sex_name}}}")
        latex_parts.append("")
        
        # Group events for layout
        event_groups = self.group_events_for_layout(events_data)
        
        for group in event_groups:
            if len(group) == 2:
                # Side-by-side layout
                event1_name, event1_data = group[0]
                event2_name, event2_data = group[1]
                
                table1 = self.generate_table_latex(event1_name, event1_data)
                table2 = self.generate_table_latex(event2_name, event2_data)
                
                latex_parts.append("\\begin{minipage}[t]{0.44\\textwidth}")
                latex_parts.append("\\centering")
                latex_parts.append(table1)
                latex_parts.append("\\end{minipage}\\hfill")
                latex_parts.append("\\begin{minipage}[t]{0.44\\textwidth}")
                latex_parts.append("\\centering")
                latex_parts.append(table2)
                latex_parts.append("\\end{minipage}")
                latex_parts.append("")
                latex_parts.append("\\vspace{0.3cm}")
                latex_parts.append("")
            else:
                # Single column layout
                event_name, event_data = group[0]
                table = self.generate_table_latex(event_name, event_data)
                
                latex_parts.append("\\begin{minipage}[t]{0.44\\textwidth}")
                latex_parts.append("\\centering")
                latex_parts.append(table)
                latex_parts.append("\\end{minipage}\\hfill")
                latex_parts.append("\\begin{minipage}[t]{0.44\\textwidth}")
                latex_parts.append("\\centering")
                latex_parts.append("")  # Empty minipage for spacing
                latex_parts.append("\\end{minipage}")
                latex_parts.append("")
                latex_parts.append("\\vspace{0.3cm}")
                latex_parts.append("")
        
        return "\n".join(latex_parts)

# Create the generator
generator = EnhancedSwimTableGenerator(df, sheet_order, sex_order, event_order)
print("Enhanced generator created!")


Enhanced generator created!


In [13]:
# Generate complete LaTeX for all sections
def generate_complete_latex():
    """Generate LaTeX for all sheet/sex combinations"""
    all_latex = []
    
    for sheet in sheet_order:
        sheet_data = df[df['SHEET'] == sheet]
        if len(sheet_data) == 0:
            continue
            
        # Get available sexes for this sheet
        available_sexes = sheet_data['SEX'].unique()
        
        for sex in sex_order:
            if sex not in available_sexes:
                continue
                
            sex_data = sheet_data[sheet_data['SEX'] == sex]
            
            # Group by event in order
            events_data = []
            for event in event_order:
                event_data = sex_data[sex_data['EVENT'] == event]
                if len(event_data) > 0:
                    events_data.append((event, event_data))
            
            if events_data:  # Only generate if there's data
                section_latex = generator.generate_section_latex(sheet, sex, events_data)
                all_latex.append(section_latex)
                
                # Add page break after each section
                all_latex.append("\\newpage")
                all_latex.append("")
    
    return "\n".join(all_latex)

# Generate the complete LaTeX
print("Generating complete LaTeX...")
complete_latex = generate_complete_latex()

# Save to file
with open('/home/ben/Desktop/Projects/media_guide/latex/sections/generated_latex.tex', 'w') as f:
    f.write(complete_latex)

print(f"Generated LaTeX saved to 'generated_latex.tex'")
print(f"Total length: {len(complete_latex)} characters")
print(f"Number of lines: {complete_latex.count(chr(10))}")

# Show preview
print("\nPreview (first 15 lines):")
lines = complete_latex.split('\n')
for i, line in enumerate(lines[:15]):
    print(f"{i+1:2d}: {line}")
if len(lines) > 15:
    print(f"... and {len(lines) - 15} more lines")


Generating complete LaTeX...
Generated LaTeX saved to 'generated_latex.tex'
Total length: 208488 characters
Number of lines: 8396

Preview (first 15 lines):
 1: \subsection{CMS All Time Top 10}
 2: \subsubsection{Athena}
 3: 
 4: \begin{minipage}[t]{0.44\textwidth}
 5: \centering
 6: eventtableNAMEYEARMini{50 FREE}{
 7: Jocelyn Crawford & 2019 \\
 8: Ava Sealander & 2022 \\
 9: Madeleine Kan & 2025 \\
10: Kelly Ngo & 2016 \\
11: Helen Liu & 2014 \\
12: Annika Sharma & 2024 \\
13: Michele Kee & 2014 \\
14: Natalia Orbach-M & 2020 \\
15: Suzia Starzyk & 2020 \\
... and 8382 more lines
