In [None]:
import pyodbc
import pandas as pd
import os

# Define database path
db_path = r"C:\\Users\\ohdaw\\OneDrive - Harvard University\\tu_data\\raw_data\\TU0624v1.accdb"
# Set up connection string (Ensure you have the appropriate ODBC driver installed)
conn_str = (
    r"DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};"
    fr"DBQ={db_path};"
)

# Connect to the database
conn = pyodbc.connect(conn_str)

# Get all user tables
cursor = conn.cursor()
tables = [row.table_name for row in cursor.tables(tableType='TABLE')]

In [None]:
# Export tables to CSV
output_folder = r"C:\Users\ohdaw\OneDrive - Harvard University\tu_data\exported_csv"
os.makedirs(output_folder, exist_ok=True)

for table in tables:
    df = pd.read_sql(f"SELECT * FROM [{table}]", conn)
    output_path = os.path.join(output_folder, f"{table}.csv")
    df.to_csv(output_path, index=False, encoding="utf-8")
    print(f"Exported: {table} -> {output_path}")

# Close connection
conn.close()
print("Export completed.")

In [2]:
import pyodbc
import pandas as pd
import os

  from pandas.core import (


In [34]:
# Define the folder path
folder_path = r"C:\Users\ohdaw\OneDrive - Harvard University\tu_data"

# Function to read CSV with encoding handling
def read_csv_with_encoding(file_path):
    try:
        return pd.read_csv(file_path, encoding="utf-8")  # Try UTF-8 first
    except UnicodeDecodeError:
        return pd.read_csv(file_path, encoding="ISO-8859-1")  # Fallback to ISO-8859-1

# Load CSV files with error handling
deltur = read_csv_with_encoding(os.path.join(folder_path, "deltur.csv"))
tur = read_csv_with_encoding(os.path.join(folder_path, "tur.csv"))
journey = read_csv_with_encoding(os.path.join(folder_path, "journey.csv"))
session = read_csv_with_encoding(os.path.join(folder_path, "session.csv"))

# Step 1: Join deltur.csv with tur.csv on "TurId"
df_1 = deltur.merge(tur, on="TurId", how="left")

# Save the merged dataset
output_path = os.path.join(folder_path, "deltur_tur.csv")
df_1.to_csv(output_path, index=False, encoding="utf-8-sig")  # Save with safe encoding

print(f"Merged dataset saved at: {output_path}")

Merged dataset saved at: C:\Users\ohdaw\OneDrive - Harvard University\tu_data\deltur_tur.csv


In [35]:
# Step 2: Join the result with journey.csv on "SessionId"
df_2 = df_1.merge(journey, on="JourneyId", how="left")

# Save the merged dataset
output_path = os.path.join(folder_path, "deltur_tur_journey.csv")
df_2.to_csv(output_path, index=False, encoding="utf-8-sig")  # Save with safe encoding

print(f"Merged dataset saved at: {output_path}")

Merged dataset saved at: C:\Users\ohdaw\OneDrive - Harvard University\tu_data\deltur_tur_journey.csv


In [36]:
# Remove columns ending with "_x" and rename "_y" columns
df_2 = df_2[[col for col in df_2.columns if not col.endswith("_x")]]
df_2.columns = [col[:-2] if col.endswith("_y") else col for col in df_2.columns]

# Step 3: Join the result with session.csv on "SessionId"
df_3 = df_2.merge(session, on="SessionId", how="left")

# Save the final merged dataset
output_path = os.path.join(folder_path, "deltur_tur_journey_session.csv")
df_3.to_csv(output_path, index=False, encoding="utf-8-sig")  # Save with safe encoding

print(f"Merged dataset saved at: {output_path}")

Merged dataset saved at: C:\Users\ohdaw\OneDrive - Harvard University\tu_data\deltur_tur_journey_session.csv


In [56]:
example = df_3[["TurId", "JourneyId", "SessionId"]]
example.head(20)

Unnamed: 0,TurId,JourneyId,SessionId
0,47,48,50026
1,48,48,50026
2,49,56,50027
3,50,56,50027
4,51,56,50027
5,52,56,50027
6,53,56,50027
7,54,56,50027
8,55,56,50027
9,56,56,50027


In [5]:
# Generate descriptive statistics
desc_stats = df_3.describe(include="all")  # Includes numerical & categorical summary

# Save the summary statistics
output_stats_path = os.path.join(folder_path, "descriptive_statistics.csv")
desc_stats.to_csv(output_stats_path)

# Display summary
print(desc_stats)

               TurId       Delturnr   ModeDwelTime      StageMode  \
count   7.706970e+05  770697.000000  489078.000000  770697.000000   
unique           NaN            NaN            NaN            NaN   
top              NaN            NaN            NaN            NaN   
freq             NaN            NaN            NaN            NaN   
mean    1.809049e+06       1.220914     158.448614       8.637538   
std     7.237045e+05       0.645825     184.533496       8.164699   
min     4.700000e+01       1.000000       0.000000       1.000000   
25%     1.164991e+06       1.000000      15.000000       1.000000   
50%     2.060200e+06       1.000000      70.000000      11.000000   
75%     2.487963e+06       1.000000     264.000000      11.000000   
max     2.733609e+06      15.000000    1305.000000      51.000000   

            ModeGroup  StageDrivPass    StageLength  StageWaitMin  \
count   770697.000000  534747.000000  770697.000000  53954.000000   
unique            NaN            

In [37]:
# Filter households where (FamNumPers - FamNumAdults) ≠ 0 (meaning children are present)
final_df_filtered = df_3[df_3["FamNumPers"] - df_3["FamNumAdults"] != 0]

# Save the filtered dataset
output_filtered_path = os.path.join(folder_path, "merged_filtered.csv")
final_df_filtered.to_csv(output_filtered_path, index=False, encoding="utf-8-sig")

# Display the first few rows of the filtered DataFrame
print(final_df_filtered.head(10))

    TurId  Delturnr  ModeDwelTime  StageMode  ModeGroup  StageDrivPass  \
10     57         1           NaN         11         11            1.0   
11     58         1         130.0         11         11            1.0   
12     59         1         175.0         11         11            1.0   
13     60         1         120.0         11         11            1.0   
14     61         1         105.0         11         11            1.0   
15     62         1           0.0         11         11            1.0   
16     63         1           NaN         11         21            2.0   
17     64         1         285.0         11         21            2.0   
23     70         1           NaN          2          2            1.0   
24     71         1           0.0          2          2            1.0   

    StageLength  StageWaitMin  StageStartMsm  StageDurationMin  ...  \
10         15.0           NaN          450.0              20.0  ...   
11         15.0           NaN          600.

In [7]:
# Select relevant columns
columns_to_analyze = ["RespAgeSimple", "TotalNumTrips", "TotalLen", "TotalMin", "DayNumJourneys"]

# Compute descriptive statistics
desc_stats = final_df_filtered[columns_to_analyze].describe().T  # Transpose for readability
desc_stats["mode"] = final_df_filtered[columns_to_analyze].mode().iloc[0]  # Compute mode

# Count SessionId and RespSex
session_count = final_df_filtered["SessionId"].nunique()  # Unique count of SessionId
resp_sex_count = final_df_filtered["RespSex"].value_counts()  # Count of gender

# Format table
desc_stats = desc_stats.rename(columns={
    "count": "N", "mean": "Mean", "std": "Std Dev", "min": "Min", "25%": "Q1", 
    "50%": "Median", "75%": "Q3", "max": "Max"
})
desc_stats = desc_stats[["N", "Min", "Max", "Mean", "mode", "Median", "Std Dev"]]  # Keep only required stats

# Convert counts to DataFrame and merge
session_df = pd.DataFrame({"Metric": ["SessionId Count"], "Value": [session_count]})
resp_sex_df = pd.DataFrame({"Metric": [f"RespSex {k}" for k in resp_sex_count.index], "Value": resp_sex_count.values})
count_df = pd.concat([session_df, resp_sex_df], ignore_index=True)

# Merge descriptive stats with counts
desc_stats.reset_index(inplace=True)
desc_stats.rename(columns={"index": "Metric"}, inplace=True)
final_table = pd.concat([count_df, desc_stats], ignore_index=True)
final_table

Unnamed: 0,Metric,Value,N,Min,Max,Mean,mode,Median,Std Dev
0,SessionId Count,82079.0,,,,,,,
1,RespSex 2,183784.0,,,,,,,
2,RespSex 1,157563.0,,,,,,,
3,RespAgeSimple,,341347.0,6.0,88.0,31.396843,17.0,35.0,14.514206
4,TotalNumTrips,,341347.0,1.0,40.0,4.631246,4.0,4.0,2.49654
5,TotalLen,,341347.0,0.02,1076.1,51.233963,4.0,28.0,71.980049
6,TotalMin,,341347.0,1.0,975.0,80.215414,40.0,65.0,64.331457
7,DayNumJourneys,,341347.0,0.0,20.0,1.799874,1.0,2.0,0.965414


In [None]:
# Define folder path
folder_path = r"C:\Users\ohdaw\OneDrive - Harvard University\tu_data"
filtered_df_path = os.path.join(folder_path, "merged_filtered.csv")

# Load dataset
final_df_filtered = pd.read_csv(filtered_df_path, encoding="utf-8-sig")

# Create a new column 'JourneyWithChildren' based on 'PartyNumu10'
def classify_journey(df):
    journey_grouped = df.groupby("JourneyId")["PartyNumu10"]

    # Create a new column for journey classification
    df["JourneyWithChildren"] = journey_grouped.transform(lambda x: 
        1 if (x >= 1).all() else  # Case 1: Every trip in the journey has children
        2 if (x >= 1).any() else  # Case 2: Some trips in the journey have children
        3                        # Case 3: No trips in the journey have children
    )
    return df

# Apply the function to classify journeys
final_df_filtered_0to9 = classify_journey(final_df_filtered)

# Save the updated dataset
#output_classified_path = os.path.join(folder_path, "translated_filtered_classified.csv")
#final_df_filtered_0to9.to_csv(output_classified_path, index=False, encoding="utf-8-sig")

# Display the first few rows of the updated DataFrame
print(final_df_filtered_0to9[["JourneyId", "PartyNumu10", "JourneyWithChildren"]].head(20))

    JourneyId  PartyNumu10  JourneyWithChildren
10         58          NaN                    3
11         58          NaN                    3
12         60          NaN                    3
13         60          NaN                    3
14         62          NaN                    3
15         62          NaN                    3
16         64          NaN                    3
17         64          NaN                    3
23         71          NaN                    3
24         71          NaN                    3
25         73          NaN                    3
26         73          NaN                    3
34         87          NaN                    3
35         87          NaN                    3
36         89          NaN                    3
37         89          NaN                    3
38         94          NaN                    3
40         94          NaN                    3
42         94          NaN                    3
43    1163803          NaN              

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["JourneyWithChildren"] = journey_grouped.transform(lambda x:


In [100]:
# Count unique journeys for each category
journey_counts_0to9 = final_df_filtered_0to9.groupby("JourneyId")["JourneyWithChildren"].first().value_counts().sort_index()

# Convert to DataFrame for better presentation
journey_summary_0to9 = pd.DataFrame({
    "Journey Category": ["Every trip had children", "Some trips had children", "No trips had children"],
    "Count": journey_counts.values
})
journey_summary_0to9

Unnamed: 0,Journey Category,Count
0,Every trip had children,15964
1,Some trips had children,15169
2,No trips had children,95915


In [106]:
# Define Mode Mapping for PrimMode
mode_mapping = {1: "Walk", 2: "Bicycle", 11: "Driving Car"}

# Filter dataset for selected PrimMode values
filtered_modes = final_df_filtered[final_df_filtered["PrimMode"].isin(mode_mapping.keys())].copy()

# Assign readable mode names
filtered_modes["PrimMode"] = filtered_modes["PrimMode"].map(mode_mapping)

# Remove NaN values in JourneyWithChildren
filtered_modes_0to9 = filtered_modes.dropna(subset=["JourneyWithChildren"])

# Group by JourneyId to avoid double-counting and get the primary mode
journey_modes_0to9 = (
    filtered_modes_0to9.groupby(["JourneyId", "JourneyWithChildren"])["PrimMode"]
    .agg(lambda x: x.mode()[0] if not x.mode().empty else x.iloc[0])  # Select most common mode
    .reset_index()
)

# Count occurrences of each mode within JourneyWithChildren categories
mode_counts_0to9 = journey_modes_0to9.pivot_table(index="JourneyWithChildren", columns="PrimMode", aggfunc="size", fill_value=0)

# Rename JourneyWithChildren categories
category_labels = {1: "Every trip had children", 2: "Some trips had children", 3: "No trips had children"}
mode_counts_0to9.index = mode_counts_0to9.index.map(category_labels)

# Add a "Total Count" column
mode_counts_0to9["Total"] = mode_counts_0to9.sum(axis=1)

# Reset index without "PrimMode" as a column
mode_counts_0to9_df = mode_counts_0to9.reset_index()

# Ensure correct column renaming
mode_counts_0to9_df.columns.name = None  # Remove any column group names

# Rename the first column properly
mode_counts_0to9_df = mode_counts_0to9_df.rename(columns={"JourneyWithChildren": "Category"})
mode_counts_0to9_df = mode_counts_0to9_df.reset_index(drop=True)

# Ensure **PrimMode is completely removed**
if "PrimMode" in mode_counts_0to9_df.columns:
    mode_counts_0to9_df = mode_counts_0to9_df.drop(columns=["PrimMode"])

# Display the cleaned mode count table
mode_counts_0to9_df

Unnamed: 0,Category,Bicycle,Driving Car,Walk,Total
0,Every trip had children,1673,9146,2284,13103
1,Some trips had children,2490,7779,1247,11516
2,No trips had children,17557,54625,17721,89903


In [None]:
# Define folder path
folder_path = r"C:\Users\ohdaw\OneDrive - Harvard University\tu_data"
filtered_df_path = os.path.join(folder_path, "merged_filtered.csv")

# Load dataset
final_df_filtered = pd.read_csv(filtered_df_path, encoding="utf-8-sig")

# Create a new column 'JourneyWithChildren' based on 'PartyNum1017'
def classify_journey(df):
    journey_grouped = df.groupby("JourneyId")["PartyNum1017"]

    # Create a new column for journey classification
    df["JourneyWithChildren"] = journey_grouped.transform(lambda x: 
        1 if (x >= 1).all() else  # Case 1: Every trip in the journey has children
        2 if (x >= 1).any() else  # Case 2: Some trips in the journey have children
        3                        # Case 3: No trips in the journey have children
    )
    return df

# Apply the function to classify journeys
final_df_filtered_10to17 = classify_journey(final_df_filtered)

# Save the updated dataset
#output_classified_path = os.path.join(folder_path, "translated_filtered_classified.csv")
#final_df_filtered_0to9.to_csv(output_classified_path, index=False, encoding="utf-8-sig")

# Display the first few rows of the updated DataFrame
print(final_df_filtered_10to17[["JourneyId", "PartyNumu10", "JourneyWithChildren"]].head(20))

    JourneyId  PartyNumu10  JourneyWithChildren
10         58          NaN                    3
11         58          NaN                    3
12         60          NaN                    3
13         60          NaN                    3
14         62          NaN                    3
15         62          NaN                    3
16         64          NaN                    3
17         64          NaN                    3
23         71          NaN                    3
24         71          NaN                    3
25         73          NaN                    3
26         73          NaN                    3
34         87          NaN                    3
35         87          NaN                    3
36         89          NaN                    3
37         89          NaN                    3
38         94          NaN                    3
40         94          NaN                    3
42         94          NaN                    3
43    1163803          NaN              

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["JourneyWithChildren"] = journey_grouped.transform(lambda x:


In [110]:
# Count unique journeys for each category
journey_counts_10to17 = final_df_filtered_10to17.groupby("JourneyId")["JourneyWithChildren"].first().value_counts().sort_index()

# Convert to DataFrame for better presentation
journey_summary_10to17 = pd.DataFrame({
    "Journey Category": ["Every trip had children", "Some trips had children", "No trips had children"],
    "Count": journey_counts_10to17.values
})

journey_summary_10to17

Unnamed: 0,Journey Category,Count
0,Every trip had children,14313
1,Some trips had children,12869
2,No trips had children,99866


In [None]:
# Define Mode Mapping for PrimMode
mode_mapping = {1: "Walk", 2: "Bicycle", 11: "Driving Car"}

# Filter dataset for selected PrimMode values
filtered_modes = final_df_filtered[final_df_filtered["PrimMode"].isin(mode_mapping.keys())].copy()

# Assign readable mode names
filtered_modes["PrimMode"] = filtered_modes["PrimMode"].map(mode_mapping)

# Remove NaN values in JourneyWithChildren
filtered_modes_10to17 = filtered_modes.dropna(subset=["JourneyWithChildren"])

# Group by JourneyId to avoid double-counting and get the primary mode
journey_modes_10to17 = (
    filtered_modes_10to17.groupby(["JourneyId", "JourneyWithChildren"])["PrimMode"]
    .agg(lambda x: x.mode()[0] if not x.mode().empty else x.iloc[0])  # Select most common mode
    .reset_index()
)

# Count occurrences of each mode within JourneyWithChildren categories
mode_counts_10to17 = journey_modes_10to17.pivot_table(index="JourneyWithChildren", columns="PrimMode", aggfunc="size", fill_value=0)

# Rename JourneyWithChildren categories
category_labels = {1: "Every trip had children", 2: "Some trips had children", 3: "No trips had children"}
mode_counts_10to17.index = mode_counts_10to17.index.map(category_labels)

# Add a "Total Count" column
mode_counts_10to17["Total"] = mode_counts_10to17.sum(axis=1)

# Reset index without "PrimMode" as a column
mode_counts_10to17_df = mode_counts_10to17.reset_index()

# Ensure correct column renaming
mode_counts_10to17_df.columns.name = None  # Remove any column group names

# Rename the first column properly
mode_counts_10to17_df = mode_counts_10to17_df.rename(columns={"JourneyWithChildren": "Category"})
mode_counts_10to17_df = mode_counts_10to17_df.reset_index(drop=True)

# Ensure **PrimMode is completely removed**
if "PrimMode" in mode_counts_10to17_df.columns:
    mode_counts_10to17_df = mode_counts_10to17_df.drop(columns=["PrimMode"])

# Display the cleaned mode count table
mode_counts_10to17_df


Unnamed: 0,Category,Bicycle,Driving Car,Walk,Total
0,Every trip had children,1673,9146,2284,13103
1,Some trips had children,2490,7779,1247,11516
2,No trips had children,17557,54625,17721,89903


In [155]:
# Define folder path
folder_path = r"C:\Users\ohdaw\OneDrive - Harvard University\tu_data"
filtered_df_path = os.path.join(folder_path, "merged_filtered.csv")

# Load dataset
final_df_filtered = pd.read_csv(filtered_df_path, encoding="utf-8-sig")

# Standardize column names (remove spaces)
final_df_filtered.columns = final_df_filtered.columns.str.strip()

# Ensure required columns exist
required_columns = ["PartyNum1017", "PartyNumu10", "JourneyId"]
missing_columns = [col for col in required_columns if col not in final_df_filtered.columns]

if missing_columns:
    raise ValueError(f"❌ ERROR: Missing columns: {missing_columns}")

# Replace NaN values with 0
final_df_filtered[["PartyNum1017", "PartyNumu10"]] = final_df_filtered[["PartyNum1017", "PartyNumu10"]].fillna(0)

# Convert to integer types
final_df_filtered["PartyNum1017"] = final_df_filtered["PartyNum1017"].astype(int)
final_df_filtered["PartyNumu10"] = final_df_filtered["PartyNumu10"].astype(int)

# Function to classify journeys
def classify_journey(df):
    # Identify journeys with any children present
    journey_with_children = df.groupby("JourneyId")[["PartyNum1017", "PartyNumu10"]].apply(
        lambda x: 1 if (x["PartyNum1017"].gt(0).all() or x["PartyNumu10"].gt(0).all()) else  # Every trip in the journey has at least one "PartyNum1017" or "PartyNumu10"
                  2 if (x["PartyNum1017"].gt(0).any() or x["PartyNumu10"].gt(0).any()) else  # Some trips in the journey have at least one "PartyNum1017" or "PartyNumu10"
                  3  # No trips in the journey have children
    )
    
    # Merge back to the main dataframe
    df = df.merge(journey_with_children.rename("JourneyWithChildren"), on="JourneyId", how="left")
    return df

# Apply classification
final_df_filtered_0to17 = classify_journey(final_df_filtered)

# Save the updated dataset
#output_classified_path = os.path.join(folder_path, "translated_filtered_classified.csv")
#final_df_filtered_0to9.to_csv(output_classified_path, index=False, encoding="utf-8-sig")

# Display the first few rows of the updated DataFrame
print(final_df_filtered_10to17[["JourneyId", "PartyNumu10","PartyNum1017", "JourneyWithChildren"]].head(20))

  final_df_filtered = pd.read_csv(filtered_df_path, encoding="utf-8-sig")


    JourneyId  PartyNumu10  PartyNum1017  JourneyWithChildren
10         58          NaN           NaN                    3
11         58          NaN           NaN                    3
12         60          NaN           NaN                    3
13         60          NaN           NaN                    3
14         62          NaN           NaN                    3
15         62          NaN           NaN                    3
16         64          NaN           NaN                    3
17         64          NaN           NaN                    3
23         71          NaN           NaN                    3
24         71          NaN           NaN                    3
25         73          NaN           NaN                    3
26         73          NaN           NaN                    3
34         87          NaN           NaN                    3
35         87          NaN           NaN                    3
36         89          NaN           NaN                    3
37      

In [156]:
# Count unique journeys for each category
journey_counts_0to17 = final_df_filtered_0to17.groupby("JourneyId")["JourneyWithChildren"].first().value_counts().sort_index()

# Convert to DataFrame for better presentation
journey_summary_0to17 = pd.DataFrame({
    "Journey Category": ["Every trip had children", "Some trips had children", "No trips had children"],
    "Count": journey_counts_0to17.values
})

journey_summary_0to17

Unnamed: 0,Journey Category,Count
0,Every trip had children,27514
1,Some trips had children,25158
2,No trips had children,74376


In [None]:
# Define Mode Mapping for PrimMode
mode_mapping = {1: "Walk", 2: "Bicycle", 11: "Driving Car"}

# Ensure journey classification is applied first
if "JourneyWithChildren" not in final_df_filtered.columns:
    final_df_filtered = classify_journey(final_df_filtered)  # Apply function if missing

# Filter dataset for selected PrimMode values
filtered_modes = final_df_filtered[final_df_filtered["PrimMode"].isin(mode_mapping.keys())].copy()

# Assign readable mode names
filtered_modes["PrimMode"] = filtered_modes["PrimMode"].map(mode_mapping)

# Ensure JourneyWithChildren column is not missing
if "JourneyWithChildren" in filtered_modes.columns:
    # Remove NaN values in JourneyWithChildren
    filtered_modes_0to17 = filtered_modes.dropna(subset=["JourneyWithChildren"])

    # Group by JourneyId to avoid double-counting and get the primary mode
    journey_modes_0to17 = (
        filtered_modes_0to17.groupby(["JourneyId", "JourneyWithChildren"])["PrimMode"]
        .agg(lambda x: x.mode()[0] if not x.mode().empty else x.iloc[0])  # Select most common mode
        .reset_index()
    )

    # Count occurrences of each mode within JourneyWithChildren categories
    mode_counts_0to17 = journey_modes_0to17.pivot_table(index="JourneyWithChildren", columns="PrimMode", aggfunc="size", fill_value=0)

    # Rename JourneyWithChildren categories
    category_labels = {1: "Every trip had children", 2: "Some trips had children", 3: "No trips had children"}
    mode_counts_0to17.index = mode_counts_0to17.index.map(category_labels)

    # Add a "Total Count" column
    mode_counts_0to17["Total"] = mode_counts_0to17.sum(axis=1)

    # Reset index without "PrimMode" as a column
    mode_counts_0to17_df = mode_counts_0to17.reset_index()

    # Ensure correct column renaming
    mode_counts_0to17_df.columns.name = None  # Remove any column group names

    # Rename the first column properly
    mode_counts_0to17_df = mode_counts_0to17_df.rename(columns={"JourneyWithChildren": "Category"})
    mode_counts_0to17_df = mode_counts_0to17_df.reset_index(drop=True)

    # Ensure **PrimMode is completely removed**
    if "PrimMode" in mode_counts_0to17_df.columns:
        mode_counts_0to17_df = mode_counts_0to17_df.drop(columns=["PrimMode"])

    # Display the cleaned mode count table
    display(mode_counts_0to17_df)

else:
    print("🚨 ERROR: 'JourneyWithChildren' column is missing after filtering. Check if classify_journey() was applied correctly.")

Unnamed: 0,Category,Bicycle,Driving Car,Walk,Total
0,Every trip had children,2612,17662,5553,25827
1,Some trips had children,4798,15618,2812,23228
2,No trips had children,14310,38270,12887,65467
