In [7]:
import pandas as pd
import pickle as pk
import numpy as np
import torch

import os

from config import didi_data_path, didi_traj_dict_path, didi_interp_traj_path, didi_hash_traj_path, hash_dict_path, didi_hidx_traj_path

# Process Raw data to Trajectory Dictionary

In [None]:
for m in [('oct', 10), ('nov', 11)]:
    for d in range(1, 32):
        filename = os.path.join(didi_data_path, f'didi_data_2016_{m[0]}_{d:02d}.tar.gz')
        if os.path.isfile(filename):
            print(filename)
            df = pd.read_csv(filename, compression='gzip', error_bad_lines=False, skiprows=1, header=None).dropna()
            df[5] = (pd.to_datetime(df[2], unit='s') + pd.Timedelta('8h') - pd.to_datetime(f'2016-{m[1]:02d}-{d:02d}')) / pd.Timedelta('1s')
            traj_dict = {oid: df_trip[[5, 4, 3]].values.tolist() for oid, df_trip in df.groupby(1)}

            filename = os.path.join(didi_traj_dict_path, f'traj_dict_2016{m[1]:02d}{d:02d}.pk')
            with open(filename, 'wb') as f:
                pk.dump(traj_dict, f)

# Crop Target Time Range from Trajectory Dictionary

In [7]:
if not os.path.isdir(didi_traj_dict_path):
    os.mkdir(didi_traj_dict_path)
    print('Create Folder ', didi_traj_dict_path)

In [None]:
for m in [10, 11]:
    for d in range(1, 32):
        filename = os.path.join(didi_traj_dict_path, f'traj_dict_2016{m:02d}{d:02d}.pk')
        if os.path.isfile(filename):
            print(filename)
            with open(filename, 'rb') as f:
                data = pk.load(f)

            data_cropped = {}
            for uid in data:
                traj = list(filter(lambda x: 7 * 3600 < x[0] <= 11 * 3600, data[uid]))
                if len(traj) > 0:
                    data_cropped[uid] = traj

            filename = os.path.join(didi_traj_dict_path, f'traj_dict_0700-1100_2016{m:02d}{d:02d}.pk')
            with open(filename, 'wb') as f:
                pk.dump(data_cropped, f)

# Interpolate Trajectory Dictionary to Constant Sampling Trajectory

In [8]:
if not os.path.isdir(didi_interp_traj_path):
    os.mkdir(didi_interp_traj_path)
    print('Create Folder ', didi_interp_traj_path)

In [9]:
dT = 60
T = 240

for m in [10, 11]:
    for d in range(1, 32):
        
        filename = os.path.join(didi_traj_dict_path, f'traj_dict_0700-1100_2016{m:02d}{d:02d}.pk')
        
        if os.path.isfile(filename):
            print(filename)

            with open(filename, 'rb') as f:
                user_traj = pk.load(f)
            interp_user_traj = {}

            for uid in user_traj:
                time_vec = list(map(lambda x: max(0, min(T - 1, int(x[0] - 7 * 3600) // dT)), user_traj[uid]))
                coordinates = list(map(lambda x: [x[1], x[2]], user_traj[uid]))
                interp_user_traj[uid] = np.zeros((T, 2))
                interp_user_traj[uid][:time_vec[0]] = coordinates[0]
                for t, point in zip(time_vec, coordinates):
                    interp_user_traj[uid][t:] = point
                interp_user_traj[uid] = torch.FloatTensor(interp_user_traj[uid])

            filename = os.path.join(didi_interp_traj_path, f'interp_traj_2012{m:02d}{d:02d}.pk')
            with open(filename, 'wb') as f:
                pk.dump(interp_user_traj, f)

/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161001.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161002.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161003.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161004.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161005.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161006.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161007.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161008.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161009.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161010.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161011.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161012.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161013.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_20161014.pk
/data/fan/didi/processed/traj_dict/traj_dict_0700-1100_2016101

# Transform the Constant Sampling Trajectory into h3hash Represented Trajectory

In [10]:
from h3 import h3

In [11]:
if not os.path.isdir(didi_hash_traj_path):
    os.mkdir(didi_hash_traj_path)
    print('Create Folder ', didi_hash_traj_path)

In [12]:
hash_set = set([])

for m in [10, 11]:
    for d in range(1, 32):
        
        filename = os.path.join(didi_interp_traj_path, f'interp_traj_2012{m:02d}{d:02d}.pk')

        if os.path.isfile(filename):
            print(filename)
            
            with open(filename, 'rb') as f:
                user_traj = pk.load(f)

            user_hash_traj = {}
            
            for uid in user_traj:
                user_hash_traj[uid] = [int(h3.geo_to_h3(x[0], x[1], 9), 16) for x in user_traj[uid].numpy().tolist()]
                hash_set |= set(user_hash_traj[uid])
                
            filename = os.path.join(didi_hash_traj_path, f'hash_traj_2012{m:02d}{d:02d}.pk')
            with open(filename, 'wb') as f:
                pk.dump(user_hash_traj, f)

/data/fan/didi/processed/interp_traj/interp_traj_20121001.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121002.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121003.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121004.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121005.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121006.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121007.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121008.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121009.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121010.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121011.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121012.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121013.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121014.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121015.pk
/data/fan/didi/processed/interp_traj/interp_traj_20121016.pk
/data/fan/didi/processed

In [13]:
hash_dict = {v: k for k, v in enumerate(hash_set)}

hash_dict_path = didi_data_path + 'processed/hash_dict.pk'

with open(hash_dict_path, 'wb') as f:
    pk.dump(hash_dict, f)

# Translate the h3hash to cluster ID represented Trajectory (For didi, we only use the index of hash)

In [14]:
didi_hidx_traj_path = didi_data_path + 'processed/hidx_traj/'

if not os.path.isdir(didi_hidx_traj_path):
    os.mkdir(didi_hidx_traj_path)
    print('Create Folder ', didi_hidx_traj_path)

In [15]:
with open(hash_dict_path, 'rb') as f:
    hash_dict = pk.load(f)

for m in [10, 11]:
    for d in range(1, 32):
        
        filename = os.path.join(didi_hash_traj_path, f'hash_traj_2012{m:02d}{d:02d}.pk')

        if os.path.isfile(filename):
            print(filename)
            
            with open(filename, 'rb') as f:
                user_traj = pk.load(f)

            user_hidx_traj = {}
            
            for uid in user_traj:
                user_hidx_traj[uid] = torch.LongTensor(list(map(lambda x: hash_dict[x], user_traj[uid])))
                
            filename = os.path.join(didi_hidx_traj_path, f'hidx_traj_2012{m:02d}{d:02d}.pk')
            with open(filename, 'wb') as f:
                pk.dump(user_hidx_traj, f)

/data/fan/didi/processed/hash_traj/hash_traj_20121001.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121002.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121003.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121004.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121005.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121006.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121007.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121008.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121009.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121010.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121011.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121012.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121013.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121014.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121015.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121016.pk
/data/fan/didi/processed/hash_traj/hash_traj_20121017.pk
/data/fan/didi/processed/hash_t