# Default

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

EXP = "gpt-oss-120b_17"

file_path = f"../../experiments/gpt-oss-120b_06/people_travel_data_travel_time.jsonl"

df = pd.read_json(file_path, lines=True)
df['date'] = pd.to_datetime(df['timestamp'], unit='s').dt.date
df.describe()

filter_file_path = f"../../experiments/gpt-oss-120b_06/people_travel_data.jsonl"

filter_df = pd.read_json(filter_file_path, lines=True)

all_activities = filter_df['activity_id'].unique()

df = df[df['activity_id'].isin(all_activities)]
df.head()

# DEBUG

In [182]:
# expected_arrive_at doesn't changes
df['expected_arrive_at_24h'] = pd.to_numeric(df['expected_arrive_at']) % 86400
expected_arrive_at_by_activity = df.groupby(['activity_id']).agg({
    'expected_arrive_at_24h': ['max', 'min']
})
expected_arrive_at_by_activity.columns = ['max_expected_arrive_at', 'min_expected_arrive_at']
assert expected_arrive_at_by_activity['max_expected_arrive_at'].equals(expected_arrive_at_by_activity['min_expected_arrive_at']), "Expected arrival times do not match"

# Analyze

In [None]:
late_time_changes = df.groupby(['person_id', 'date']).agg({
    'late': ['mean', 'min', 'max']
})
late_time_changes.columns = ['mean_late', 'min_late', 'max_late']
late_time_changes.reset_index(inplace=True)
late_time_changes.head()

# draw the plot figure, a line for each person
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
for person_id, group in late_time_changes.groupby('person_id'):
    plt.plot(group['date'], group['mean_late'], label=f'Person {person_id}')
    # plt.fill_between(group['date'],
    #                  group['min_late'],
    #                  group['max_late'],
    #                  alpha=0.3, color='red', label='Â±1 Std Dev')
plt.xlabel('Date')
plt.ylabel('Mean Late Time')
plt.title('Mean Late Time by Person')
plt.legend()
plt.show()

In [None]:
late_time_changes = df.groupby(['date']).agg({
    'late': ['mean', 'min', 'max']
})
late_time_changes.columns = ['mean_late', 'min_late', 'max_late']
late_time_changes.reset_index(inplace=True)
late_time_changes.head()

# draw the plot figure, a line for each person
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(late_time_changes['date'], late_time_changes['mean_late'])

plt.xlabel('Date')
plt.ylabel('Mean Late Time')
plt.title('Mean Late Time by Person')
plt.legend()
plt.show()

In [None]:
start_at_changes = df[df['person_id'].isin([1031925])].groupby(['date', 'activity_id']).agg({
    'started_at': ['first']
})
start_at_changes.columns = ['started_at']
start_at_changes['started_at'] = (
    start_at_changes['started_at'].dt.hour * 3600
    + start_at_changes['started_at'].dt.minute * 60
    + start_at_changes['started_at'].dt.second
)
start_at_changes.reset_index(inplace=True)
start_at_changes.head()

# plot, a line for each activity
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
for activity_id, group in start_at_changes.groupby('activity_id'):
    plt.plot(group['date'], group['started_at'], label=f'Activity {activity_id}')
plt.xlabel('Date')
plt.ylabel('Started At Time')
plt.title('Started At Time by Activity')
plt.legend()
plt.show()