In [1]:
import pandas as pd
import geopandas as gpd
from shapely import Point
from datetime import datetime, timedelta
import random

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Read in days from GTFS

gtfs_dates = pd.read_csv('data/tfwm_gtfs/calendar_dates.txt', parse_dates=[1])
date_counts = gtfs_dates['date'].value_counts().sort_index()

#Filter 15th March to 15th April

start_date = '2023-03-15'
end_date = '2024-04-15'

date_counts = pd.DataFrame(date_counts.loc[start_date:end_date])

date_counts['weekday'] = date_counts.index.weekday < 5

bank_holidays = ['2024-03-29', '2024-04-01']
date_counts['bank_holiday'] = date_counts.index.isin(pd.to_datetime(bank_holidays))

In [3]:
#Get random date

rand_date = date_counts[((date_counts['weekday'] == True) & (date_counts['bank_holiday'] == False))].sample(1).index

In [10]:
rand_date = date_counts[((date_counts['weekday'] == True) & (date_counts['bank_holiday'] == False))].iloc[:1].index

In [4]:
# Get OAs

wm_oas = gpd.read_file('data/west_midlands_OAs/west_midlands_OAs.shp')
wm_oas = wm_oas[wm_oas['LAD11CD'] == 'E08000026']
oa_info = pd.read_csv('data/oa_info.csv')
oa_info = oa_info.merge(wm_oas[['OA11CD']], left_on = 'oa_id', right_on = 'OA11CD', how = 'inner')
oaLatLon = oa_info[['oa_id','oa_lon','oa_lat']]

In [5]:
# Get POIs

pois = pd.read_csv('data/POIs/pois.csv', index_col=0)

#Select local POIs
poisInRegion = []

for i,r in pois.iterrows():
    poiPoint = Point(tuple(list(r[['poi_lon','poi_lat']])))
    
    for i2, r2 in wm_oas.iterrows():
        if r2['geometry'].intersects(poiPoint):
            poisInRegion.append(r['poi_id'])

pois = pois[pois['poi_id'].isin(poisInRegion)]

In [6]:
# Get Time Stamps / Time Interval

stratumDict = {
    'amPeak':{
        'startHour' : 6,
        'startMinute' : 30,
        'endHour' : 8,
        'endMinute' : 30,
        'day':'tues'
        },
    'interPeak':{
        'startHour' : 11,
        'startMinute' : 00,
        'endHour' : 16,
        'endMinute' : 00,
        'day':'tues'
        },
    'pmPeak':{
        'startHour' : 16,
        'startMinute' : 30,
        'endHour' : 18,
        'endMinute' : 30,
        'day':'tues'
        },
    'Saturday':{
        'startHour' : 10,
        'startMinute' : 00,
        'endHour' : 18,
        'endMinute' : 00,
        'day' : 'sat'
        }
    }

In [8]:
stratum = 'amPeak'

# Create Time Domain
startHour = stratumDict[stratum]['startHour']
startMinute = stratumDict[stratum]['startMinute']
endHour = stratumDict[stratum]['endHour']
endMinute = stratumDict[stratum]['endMinute']

start = datetime(year=2012, month=2, day=25, hour=startHour, minute = startMinute)
end = datetime(year=2012, month=2, day=25, hour=endHour, minute = endMinute)
diff = end - start
minutesInInterval = diff.total_seconds()/60
hoursInInterval = minutesInInterval/60

timeDomain = []

for i in range(300):
    randStartTime = start + timedelta(minutes=random.randint(1, int(minutesInInterval)))
    timeDomain.append(str(randStartTime.hour).zfill(2)+':'+str(randStartTime.minute).zfill(2))

In [10]:
import csv
import os, urllib, json, csv, zipfile, math

In [13]:
oa_sample = random.sample(list(oaLatLon.index), 2)
poi_sample = pois[pois['type']=='Vaccination Centre'].index
time_sample = random.sample(range(len(timeDomain)), 3)

print('Num Rows : {}'.format(len(oa_sample) * len(poi_sample) * len(time_sample)))

otp_url = 'http://localhost:8080/otp/routers/default/plan?'
output_file = open('tempdata/tripscosts_otp2.csv', 'w')
writer = csv.writer(output_file)
writer.writerow(['trip_id','date','time','oa_id','poi_id','itiniery_id','duration','walk_time','wait_time','transit_time','initial_wait_time','transfers','fare'])

trip_id = 0
itin_id = 0
trip_date = rand_date[0].strftime('%m/%d/%Y')

failed_routes_list = []

for oa_ind in oa_sample:
    for poi_ind in poi_sample:
        for t_ind in time_sample:
            if trip_id % 500 == 0:
                print(trip_id)
            next_oa = oaLatLon.loc[oa_ind]
            next_poi = pois.loc[poi_ind]
            trip_time = timeDomain[t_ind]

            params = {}

            params['date'] = trip_date
            params['time'] = trip_time
            params['fromPlace'] = '%s,%s' % (next_oa['oa_lat'], next_oa['oa_lon'])
            params['toPlace'] = '%s,%s' % (next_poi['poi_lat'], next_poi['poi_lon'])
            params['mode'] = 'WALK,TRANSIT'
            params['arriveBy'] = 'false'
            params['numItineraries'] = '1'
            params['searchWindow'] = 300
            params['maxWalkDistance'] = 2000

            req = urllib.request.Request(otp_url + urllib.parse.urlencode(params))
            req.add_header('Accept', 'application/json')

            response = urllib.request.urlopen(req)

            content = response.read()
            objs = json.loads(content)
            if len(objs['plan']['itineraries']) == 0:
                fail_append = {}
                fail_append['oa'] = next_oa['oa_id']
                fail_append['poi'] = next_poi['poi_id']
                fail_append['time'] = trip_time
                failed_routes_list.append(fail_append)
                pass
            else:
                i = objs['plan']['itineraries'][0]
            if i['transitTime'] == 0:
                fare = 0
            else:
                fare = (i['transfers'] + 1) * 2.4

            query_time = datetime.strptime(trip_time, '%H:%M').time()
            departure_time = datetime.fromtimestamp(float(i['startTime']) / 1000).time()
            initial_wait_time = (datetime.combine(datetime.today(), departure_time) - datetime.combine(datetime.today(), query_time)).total_seconds()

            row = [trip_id,trip_date,trip_time,next_oa['oa_id'],next_poi['poi_id'],itin_id,i['duration'],i['walkTime'],i['waitingTime'],i['transitTime'],initial_wait_time,i['transfers'],fare]
            writer.writerow(row)
            trip_id += 1

output_file.close()
failed_routes = pd.DataFrame(failed_routes_list)

Num Rows : 66
0


In [12]:
failed_routes

In [136]:
#Gen access cost
#(( 1.5 * (labelledTrips['total_time'])) - (0.5 * labelledTrips['transit_time']) + ((labelledTrips['fare'] * 3600) / 6.7) + (10 * labelledTrips['num_transfers'])) / 60