# Load and Combine All Logs

In [1]:
import os
import pandas as pd

log_dir = '../data/raw'  # Adjust if needed
log_files = [f for f in os.listdir(log_dir) if f.endswith('.csv')]
all_dfs = []
for f in log_files:
    file_path = os.path.join(log_dir, f)
    try:
        df = pd.read_csv(file_path)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        all_dfs.append(df)
    except Exception as e:
        print(f"Error loading {f}: {e}")
if all_dfs:
    combined_df = pd.concat(all_dfs, ignore_index=True)
    print(f"Combined DataFrame shape: {combined_df.shape}")
else:
    print("No valid log files found.")

Combined DataFrame shape: (2706, 10)


# Search for "school bus" in Any Label Column

In [2]:
mask = (
    combined_df['label_1'].astype(str).str.lower().str.strip() == 'school bus'
)
if 'label_2' in combined_df.columns:
    mask |= (combined_df['label_2'].astype(str).str.lower().str.strip() == 'school bus')
if 'label_3' in combined_df.columns:
    mask |= (combined_df['label_3'].astype(str).str.lower().str.strip() == 'school bus')

bus_df = combined_df[mask]
if not bus_df.empty:
    most_recent = bus_df.sort_values('timestamp', ascending=False).iloc[0]
    print("Most recent school bus detection:")
    print(most_recent)
else:
    print("No school bus found in any log.")

Most recent school bus detection:
timestamp     2025-05-09 15:01:00
label_1                school bus
count_1                         2
avg_conf_1                   0.94
label_2                       NaN
count_2                       NaN
avg_conf_2                    NaN
label_3                       NaN
count_3                       NaN
avg_conf_3                    NaN
Name: 126, dtype: object


(Optional) Display All School Bus Detections

In [3]:
if not bus_df.empty:
    display(bus_df[['timestamp', 'label_1', 'label_2', 'label_3']].sort_values('timestamp', ascending=False))

Unnamed: 0,timestamp,label_1,label_2,label_3
126,2025-05-09 15:01:00,school bus,,
125,2025-05-09 07:07:00,school bus,,
119,2025-05-08 15:10:00,school bus,,
118,2025-05-08 07:14:00,school bus,,
112,2025-05-07 15:28:00,school bus,,
...,...,...,...,...
487,2024-05-13 07:00:00,school bus,,
472,2024-05-10 15:26:00,school bus,,
471,2024-05-10 07:06:00,school bus,,
466,2024-05-09 15:12:00,school bus,,


# Prototype: Interactive CLI Assistant for School Bus Queries

In [4]:
import os
import pandas as pd
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load and combine logs once
log_dir = '../data/raw'
log_files = [f for f in os.listdir(log_dir) if f.endswith('.csv')]
all_dfs = []
for f in log_files:
    file_path = os.path.join(log_dir, f)
    try:
        df = pd.read_csv(file_path)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        all_dfs.append(df)
    except Exception as e:
        print(f"Error loading {f}: {e}")
if all_dfs:
    combined_df = pd.concat(all_dfs, ignore_index=True)
else:
    combined_df = pd.DataFrame()

def assistant_response(query):
    query = query.lower()
    if "school bus" not in query:
        return "This prototype only answers questions about school buses."
    # Time expression parsing
    if "today" in query:
        today = pd.Timestamp.today().normalize()
        df = combined_df[combined_df['timestamp'].dt.date == today.date()]
    elif "yesterday" in query:
        yesterday = (pd.Timestamp.today() - pd.Timedelta(days=1)).normalize()
        df = combined_df[combined_df['timestamp'].dt.date == yesterday.date()]
    else:
        df = combined_df
    # Search for 'school bus'
    mask = (
        df['label_1'].astype(str).str.lower().str.strip() == 'school bus'
    )
    if 'label_2' in df.columns:
        mask |= (df['label_2'].astype(str).str.lower().str.strip() == 'school bus')
    if 'label_3' in df.columns:
        mask |= (df['label_3'].astype(str).str.lower().str.strip() == 'school bus')
    bus_df = df[mask]
    if not bus_df.empty:
        most_recent = bus_df.sort_values('timestamp', ascending=False).iloc[0]
        return f"Yes, I last saw a school bus at {most_recent['timestamp']}."
    else:
        return "No, I did not see a school bus."

# Create widgets
input_box = widgets.Text(
    value='',
    placeholder='Type your question here...',
    description='Query:',
    disabled=False
)
output = widgets.Output()

def on_submit(text):
    with output:
        clear_output()
        response = assistant_response(text.value)
        print(response)

input_box.on_submit(on_submit)
display(input_box, output)

Text(value='', description='Query:', placeholder='Type your question here...')

Output()