In [2]:
import pandas as pd
import json
import re
from unidecode import unidecode
from ast import literal_eval
from pathlib import Path

In [3]:
# Load venues data
venues_df = pd.read_csv('../eda/data/venues.csv')

# Display sample data
print(f"Venues data shape: {venues_df.shape}")
venues_df.head(2)

Venues data shape: (35, 6)


Unnamed: 0,venue,sports,date_start,date_end,tag,url
0,Aquatics Centre,"['Artistic Swimming', 'Diving', 'Water Polo']",2024-07-27T09:00:00Z,2024-08-10T20:00:00Z,aquatics-centre,https://olympics.com/en/paris-2024/venues/aqua...
1,Bercy Arena,"['Artistic Gymnastics', 'Basketball', 'Trampol...",2024-07-27T09:00:00Z,2024-08-11T16:00:00Z,bercy-arena,https://olympics.com/en/paris-2024/venues/berc...


In [4]:
# Function to safely convert string representation of lists to actual lists
def safe_eval(value):
    if pd.isna(value) or value == '':
        return []
    try:
        if isinstance(value, str) and value.startswith('[') and value.endswith(']'):
            return literal_eval(value)
        return value
    except (ValueError, SyntaxError):
        return value

# Function to generate slug
def generate_slug(name):
    if pd.isna(name) or name == '':
        return ''
    
    # Convert to lowercase and replace spaces with hyphens
    slug = name.lower().replace(' ', '-')
    # Remove accents
    slug = unidecode(slug)
    # Remove any special characters
    slug = re.sub(r'[^a-z0-9-]', '', slug)
    return slug

In [5]:
# Process venues data
venues_json = {}

for index, venue in venues_df.iterrows():
    # Generate slug if not available
    venue_slug = venue['tag'] if pd.notna(venue['tag']) else generate_slug(venue['venue'])
    
    # Convert sports list
    sports_list = safe_eval(venue['sports'])
    sports = []
    
    for sport in sports_list:
        sport_slug = generate_slug(sport)
        sports.append({
            'slug': sport_slug,
            'name': sport,
            'icon': f"/img/sports/SVG/{sport_slug}.svg"
        })
    
    # Create venue object
    venue_data = {
        'name': venue['venue'],
        'slug': venue_slug,
        'url': venue['url'] if pd.notna(venue['url']) else '',
        'sports': sports,
        'date_start': venue['date_start'] if pd.notna(venue['date_start']) else '',
        'date_end': venue['date_end'] if pd.notna(venue['date_end']) else '',
        'description': f"Olympic venue hosting {', '.join([s['name'] for s in sports])}",
        'location': {
            'latitude': None,  # Will be filled from GeoJSON
            'longitude': None  # Will be filled from GeoJSON
        }
    }
    
    venues_json[venue_slug] = venue_data

print(f"Processed {len(venues_json)} venues")

Processed 35 venues
