In [1]:
import pandas as pd
import geopandas as gpd
import os
import numpy as np
import glob

In [2]:
# Specifify Data Path
data_dir = '../../ProjectA_TokyoBay/Data/2025 Studio Data from Yoshida/PeopleFlow'

In [9]:
# Function to Process Data

def travel_type(x, y):

    if x == 0.0: # origin in Toyosu
        if y == 0.0:  # destination in Toyosu
            return 'Trip within Toyosu'
        else:
            return 'Origin in Toyosu'
    elif y == 0.0:
        return 'Destination in Toyosu'
    else:
        return 'N/A'

def prep_od_data(fname, gdf_toyosu):

    df = pd.read_csv(fname, encoding="utf-8")

    # Drop tripid which has only one data point
    df_cnt = df.groupby("tripid")["tripid"].count()

    df = df[df["tripid"].isin(df_cnt[df_cnt > 1].index)]

    df["datetime"] = pd.to_datetime(df["recordedat"])

    # get first and last points from each tripid
    df.sort_values(by=["tripid", "recordedat"], inplace=True)

    df["asc_rnk"] = df.groupby("tripid")["recordedat"].rank(ascending=True)
    df["dsc_rnk"] = df.groupby("tripid")["recordedat"].rank(ascending=False)

    df_origin = df[df["asc_rnk"] == 1]
    df_destination = df[df["dsc_rnk"] == 1]

    df_origin.rename(columns={"lon": "origin_lon",
                    "lat": "origin_lat",
                    "datetime": "start_datetime"},
                    inplace=True)

    df_destination.rename(columns={"lon": "destination_lon",
                    "lat": "destination_lat",
                    "datetime": "end_datetime"},
                    inplace=True)

    gdf_origin = gpd.GeoDataFrame(df_origin, geometry=gpd.points_from_xy(df_origin["origin_lon"], df_origin["origin_lat"]), 
                              crs="EPSG:4326").to_crs("EPSG:6677")
    gdf_destination = gpd.GeoDataFrame(df_destination, geometry=gpd.points_from_xy(df_destination["destination_lon"], df_destination["destination_lat"]), 
                                    crs="EPSG:4326").to_crs("EPSG:6677")
    
    gdf_origin_within = gpd.sjoin(gdf_origin, gdf_toyosu, how='left', predicate='within')
    gdf_destination_within = gpd.sjoin(gdf_destination, gdf_toyosu, how='left', predicate='within')

    gdf_origin_within.rename(columns={"index_right": "Id"}, inplace=True)
    gdf_destination_within.rename(columns={"index_right": "Id"}, inplace=True)

    gdf_results = gdf_origin_within[["tripid", "transportmode", "p_ageclass", "p_gender", "origin_lon", "origin_lat", "start_datetime", "Id"]]\
                        .merge(gdf_destination_within[["tripid", "destination_lon", "destination_lat", "end_datetime", "Id"]],
                            on="tripid")
    
    gdf_results["trip_type"] = gdf_results.apply(lambda x: travel_type(x["Id_x"], x["Id_y"]), axis=1)
    
    return gdf_results

In [10]:
# Get List of CSV files containing trip data
file_list = glob.glob(os.path.join(data_dir, "*.csv"))

print(f"Found {len(file_list)} files in {data_dir}")

Found 7 files in ../../ProjectA_TokyoBay/Data/2025 Studio Data from Yoshida/PeopleFlow


In [11]:
# Load Study Area Polygon Boundary
gdf_toyosu = gpd.read_file("../../ProjectA_TokyoBay/Data/2025_Studio_ Project_Data/Boundary/Toyosu_Ariake_Shinonome/StudySite_EPSG6677.shp")

In [13]:
# Process each file
for fname in file_list:

    out_fname = fname[-21:]

    print(out_fname)

    df_results = prep_od_data(fname, gdf_toyosu)

    # Save results to CSV
    df_results.to_csv(f"../output/OD_Data_Prep/{out_fname}", 
                      encoding="utf-8", index=False)

2023-05-25_Toyosu.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_origin.rename(columns={"lon": "origin_lon",
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_destination.rename(columns={"lon": "destination_lon",


2023-05-22_Toyosu.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_origin.rename(columns={"lon": "origin_lon",
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_destination.rename(columns={"lon": "destination_lon",


2023-05-27_Toyosu.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_origin.rename(columns={"lon": "origin_lon",
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_destination.rename(columns={"lon": "destination_lon",


2023-05-24_Toyosu.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_origin.rename(columns={"lon": "origin_lon",
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_destination.rename(columns={"lon": "destination_lon",


2023-05-21_Toyosu.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_origin.rename(columns={"lon": "origin_lon",
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_destination.rename(columns={"lon": "destination_lon",


2023-05-26_Toyosu.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_origin.rename(columns={"lon": "origin_lon",
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_destination.rename(columns={"lon": "destination_lon",


2023-05-23_Toyosu.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_origin.rename(columns={"lon": "origin_lon",
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_destination.rename(columns={"lon": "destination_lon",
