In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import glob, os
# from s2sphere import CellId, LatLng
import datetime
import pytz

In [3]:
def local_datetime_from_timestamp(timestamp, tz_info):
    utc_datetime = datetime.datetime.utcfromtimestamp(timestamp)
    return utc_datetime.replace(tzinfo=pytz.timezone('UTC')).astimezone(tz_info)

In [4]:
def file_slicing(file_path):
    data = pd.read_csv(file_path, sep=" ", header=None)
    data.columns = ["lat", "long", "occupied", "unixtime"]
    data['cab_name'] = pd.Series(file_path, index=data.index)
    data_sort = data.sort_values(by=['cab_name', 'unixtime'])

    data_sort['dt_ts'] = data_sort.apply(
    lambda row: str(local_datetime_from_timestamp(row['unixtime'], pytz.timezone('America/Los_Angeles')))[:19],
        axis = 1
    )
    data_sort = data_sort.reset_index(drop=True)

    data_sort['last_occupied'] = data_sort.occupied.shift(+1)
    data_sort['next_occupied'] = data_sort.occupied.shift(-1)
    data_sort['is_pickup'] = ((data_sort['occupied'] - data_sort['last_occupied']) == 1)
    data_sort['is_dropoff'] = ((data_sort['occupied'] - data_sort['next_occupied']) == 1)
    
    if data_sort.loc[0, 'occupied'] == 1:
        data_sort.loc[0, 'is_pickup'] = True

    if data_sort.loc[len(data_sort)-1, 'occupied'] == 1:
        data_sort.loc[len(data_sort)-1, 'is_dropoff'] = True

    data_sliced = data_sort[(data_sort['is_pickup'] == True) | (data_sort['is_dropoff'] == True)]
    data_sliced = data_sliced[(data_sliced['is_pickup'] == False) | (data_sliced['is_dropoff'] == False)]
    data_sliced = data_sliced.reset_index(drop=True)
    data_sliced = data_sliced.drop(['last_occupied', 'next_occupied','occupied','cab_name'], axis=1)
    return data_sliced

In [5]:
def collapse_pick_drop(data_sliced, file_name):
    # assert (len(data_sliced)%2 == 0)
    data_start_end = pd.DataFrame()
    i = 0
    last_line = len(data_sliced) - 1

    if data_sliced.loc[0,'is_dropoff']: # first line is dropoff
        print(file_name, ": first line dropoff")
        # return
    elif data_sliced.loc[len(data_sliced)-1,'is_pickup']: # last line is pickup
        print(file_name, ": last line pickup")
        # return
    elif sum(np.where(data_sliced['is_pickup'] , 1, 0)) != sum(np.where(data_sliced['is_dropoff'], 1, 0)):
        print(file_name, ": pickup dropoff # not match")
        # return
    else:

        pick = data_sliced.iloc[::2]
        pick = pick.reset_index(drop=True)
        dropoff = data_sliced.iloc[1::2]
        dropoff = dropoff.reset_index(drop=True)
        if sum(np.where(pick['is_pickup'] == True, 0, 1)) == 0 and sum(np.where(dropoff['is_dropoff'] == True, 0, 1)) == 0:
            data_joined = pick.merge(dropoff, left_index=True, right_index=True, how='inner')
            data_joined = data_joined[['lat_y', 'long_y','lat_x', 'long_x', 'dt_ts_y', 'dt_ts_x']]
            return data_joined

In [6]:
files = glob.glob('./data/sf/*.txt')
# files = [files[4]]

for file_path in files:

    file_name = os.path.basename(file_path).split('.')[0]
    print(file_name)
    dirname = os.path.dirname(file_path)
    new_file_path = os.path.join("./data/sf/output".format(dirname), "{}_collapsed.csv".format(file_name))
    data_sliced = file_slicing(file_path)
    data_start_end = collapse_pick_drop(data_sliced, file_name)
    if data_start_end is not None:
        with open(new_file_path, 'w') as f:
             data_start_end.to_csv(new_file_path, sep=',', encoding='utf-8', index=False)

print("done")

new_adkavy
new_ackgrica
new_iafstnue
new_ogijtri
new_umhenwed
new_ovkojy
new_obheujvo
new_oygvar
new_ekfrab
new_ugatna
new_eoivqued
new_aldhidd
new_ajthof
new_iorjtwav
new_ikujfurk
new_upthin
new_ancedvab
new_eytups
new_amnurgji
new_ansyut
new_ujtosh
new_ainplin
new_iagods
new_isvayd
new_aupclik
new_askmecle
new_epkiapme
new_inlica
new_ogdygdyd
new_ubzachy
new_igvidth
new_ecdiwovu
new_ilkedve
new_ujhuki
new_epemvagu
new_imhacy
new_ichikiga
new_ojbaso
new_eggfrij
new_oiphye
new_uvreoipy
new_enjubpl
new_ankped
new_avpavi
new_ecgojtyt
new_ibflsruc
new_ugifmav
new_ioajdig
new_ikkimm
new_elbnaxa
new_owgves
new_ocjeng
new_avglybic
new_epabcadu
new_idvowwed
new_agcowktu
new_ibgryk
new_iatmeuns
new_oadwowd
new_eydadgio
new_oilrag
new_ucdewy
new_idtwal
new_odoywug
new_inshfola
new_eufdod
new_ictmuog
new_efgoaku
new_edodblea
new_ebstic
new_owufrey
new_ayshowg
new_acgerl
new_aggjuo
new_omdrid
new_afsfat
new_ellimtbu
new_ojumna
new_atidfi
new_ifragcic
new_odlorhem
new_aviltly
new_okblahed
new_eshr