In [1]:
import pandas as pd
import re
from datetime import datetime, timedelta

# Load your dataset
df = pd.read_csv("/Users/user/Desktop/the-stack/python/posts/food-trucks/ucla_food_trucks_hours2324.csv")  # Replace with actual file path

meal_columns = ['Breakfast', 'Lunch/Brunch', 'Dinner', 'Extended Dinner']
truck_hours = {}

def parse_time(t_str):
    # Handles both "11 a.m." and "11:30 a.m."
    t_str = t_str.replace('.', '').strip()
    formats = ["%I %p", "%I:%M %p"]
    for fmt in formats:
        try:
            return datetime.strptime(t_str, fmt).time()
        except ValueError:
            continue
    raise ValueError(f"Unrecognized time format: {t_str}")

def time_diff(start, end):
    start_dt = datetime.combine(datetime.today(), start)
    end_dt = datetime.combine(datetime.today(), end)
    if end_dt < start_dt:
        end_dt += timedelta(days=1)
    return (end_dt - start_dt).total_seconds() / 3600

for _, row in df.iterrows():
    for meal in meal_columns:
        entry = str(row[meal])
        if "CLOSED" in entry or not entry.strip():
            continue

        # Split by commas for multiple time blocks
        blocks = entry.split(',')
        for block in blocks:
            # Match a time range at the beginning of the block
            match = re.match(r"\s*(\d{1,2}(:\d{2})?\s*[ap]\.?m\.?)\s*-\s*(\d{1,2}(:\d{2})?\s*[ap]\.?m\.?)\s+(.*)", block.strip(), re.IGNORECASE)
            if not match:
                continue

            start_str, _, end_str, _, trucks_str = match.groups()
            try:
                start = parse_time(start_str)
                end = parse_time(end_str)
                duration = time_diff(start, end)
            except Exception as e:
                print(f"Time parse error in block: '{block}': {e}")
                continue

            # Truck names are separated by 2+ spaces
            trucks = re.split(r"\s{2,}", trucks_str.strip())
            for truck in trucks:
                if truck:
                    truck_hours[truck] = truck_hours.get(truck, 0) + duration

# Output results
result_df = pd.DataFrame([
    {"Food Truck": truck, "Total Hours": round(hours, 2)}
    for truck, hours in sorted(truck_hours.items(), key=lambda x: -x[1])
])

print(result_df)


                    Food Truck  Total Hours
0             Perro 1-10 Tacos        480.5
1         8E8 Thai Street Food        424.0
2                 Smile Hotdog        350.0
3                Aloha Fridays        329.5
4              Pinch of Flavor        326.5
5   Kalamaki Greek Street Food        315.5
6           Flamin Hot Chicken        272.5
7                 StopBye Cafe        272.0
8      BittieBitez Mini-Donuts        270.5
9              The Taco Cartel        259.5
10                    Salpicon        258.0
11                 Creamy Boys        249.0
12                Habibi Shack        243.5
13            Cerda Vega Tacos        243.5
14                        Wafl        227.0
15            Heritage Kitchen        220.0
16         Uncle Al's Barbeque        192.5
17      DD's Chick & Cat Shack        178.0
18                  Yuna's Bob        162.5
19             Dina's Dumpling        150.5
20                       Yalla        148.5
21          Rice Balls of Fire  