In [1]:
%matplotlib notebook
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import xml.etree.ElementTree as ET
from IPython.display import Image
import random

In [6]:
# trip distribution by numbers
# merged_data.groupby(by=['COUNTY_NM', 'dest_TAZ'])['demand'].agg('sum')

In [7]:
# trip distribution by percentages
# merged_data.groupby(by=['COUNTY_NM', 'dest_TAZ'])['demand'].agg('sum')/merged_data.demand.sum() *100

COUNTY_NM  dest_TAZ 
King       King         50.854349
           Kitsap        0.046566
           Pierce        1.924806
           Snohomish     2.719326
Kitsap     King          0.044790
           Kitsap        5.885178
           Pierce        0.269283
           Snohomish     0.004914
Pierce     King          1.920831
           Kitsap        0.269929
           Pierce       17.268226
           Snohomish     0.010494
Snohomish  King          2.715734
           Kitsap        0.005005
           Pierce        0.010817
           Snohomish    16.049753
Name: demand, dtype: float64

1- kitsap-kitsap/pierce trips are not relevant to king county, and percent of trips between king/snohomish and kitsap is 0.046566+ 0.044790+0.004914+0.005005 = 0.01 % 
- therefore we fully eliminate trips to/from kitsap 

2- snohomish-pierce and pierce-snohomish that pass trough king county are negligible (0.010817+0.010494 = 0.02%)
- we eliminate trips between Snohomish and Pierce county

3- trips within pierce and snohomish are not relevant to king county
- we eliminate trips within Snohomish and Pierce county

### therefore, we only keep trips 1- within king county, 2- between king and pierce, and 3- between king and Snohomish



## For Pierce and Snohomish, 
it seems that majority of trips take place either between pierce and south of downtown OR snohomish and north of downtown, so what we do is keeping only the trips between snohomish/pierce and sumo TAZs

### final stats before further filterations



In [101]:
# filtered_data.groupby(by=['COUNTY_NM', 'dest_TAZ'])['demand'].agg('sum')

COUNTY_NM  dest_TAZ 
King       King         5040054
           Pierce        190763
           Snohomish     269506
Pierce     King            7352
Snohomish  King           18304
Name: demand, dtype: int64

In [10]:
# filtered_data.groupby(by=['COUNTY_NM', 'dest_TAZ'])['demand'].agg('sum')/filtered_data.demand.sum() *100

COUNTY_NM  dest_TAZ 
King       King         84.566906
           Pierce        3.200806
           Snohomish     4.522033
Pierce     King          3.194195
Snohomish  King          4.516059
Name: demand, dtype: float64

### Selecting TAZs and working with them in this notebook:
we run the code below in qgis python plugin:

<pre>
dir = '/Users/stlp/Library/CloudStorage/GoogleDrive-soheil99@uw.edu/My Drive/reasearch codes/SUMO/MatSumo-main/SUMO/demand generation/regions'
os.chdir(dir)
def get_selected_TAZs(address):
    layer = QgsProject.instance().mapLayersByName("taz2010")[0]
    selected_features = layer.selectedFeatures()
    attribute_list = [feature['TAZ'] for feature in selected_features]
    with open(address, 'w') as f:
        f.write(str(attribute_list))
        f.close()
</pre>
we select some regions in taz2010.shp and then use the command above like: `get_selected_TAZ('/1.txt')`
and read selected regions in qgis here using the function below:

In [2]:
def _read_region(address):
    with open(address, 'r') as f:
        content = f.read()
        data_list = eval(content)
    return data_list

def read_regions(address):
    if type(address)==list:
        outputs = []
        for adrs in address:
            outputs.append(_read_region(adrs))
        return outputs
    else:
        return _read_region(address)
            
#     with open(address, 'r') as f:
#         content = f.read()
#         data_list = eval(content)
#     return data_list

# Conversion algorithm
So far, I was trying to eliminate useless trips. There are some trips that we ARE going to use: trips with sumo_TAZs in one end (without over-3700 TAZs or kitsap TAZs)

I think the best way to deal with these trips is by starting with trips involving sumo_TAZs and then gradually add different scenarios in which vehicles will pass through sumo_TAZs.


In [3]:
# Load shapefiles
taz_data = gpd.read_file('taz2010.shp')

# Load CSV file
trip_data = pd.read_csv("psrc_vehicle_trips.csv")

# load sumo tazs
tree = ET.parse('modified_new_Taz_with_pseudo.add.xml')
root = tree.getroot()
sumo_taz = []
for taz in root.findall('taz'):
    tazid = taz.get('id')
    if tazid != 'taz_13':
        sumo_taz.append(int(tazid))

In [4]:
# we obtain list of TAZs for each of counties so that we can use them during filtering single trips dataframe
king = list(taz_data[taz_data['COUNTY_NM']=='King'].TAZ.values)
pierce = list(taz_data[taz_data['COUNTY_NM']=='Pierce'].TAZ.values)
kitsap = list(taz_data[taz_data['COUNTY_NM']=='Kitsap'].TAZ.values)
snohomish = list(taz_data[taz_data['COUNTY_NM']=='Snohomish'].TAZ.values)

In [5]:
len(trip_data)

9985425

Eliminate over-3700s, Kitsap, and port trips

In [6]:
trip_data = trip_data[~(trip_data.otaz.isin(kitsap)|trip_data.dtaz.isin(kitsap))]
trip_data = trip_data[~((trip_data.otaz>3700)|(trip_data.dtaz>3700))]
ports = [444, 445, 449, 478]
trip_data = trip_data[~(trip_data.otaz.isin(ports)|trip_data.dtaz.isin(ports))]
len(trip_data)

9250192

## TAZ conversion for sumo_TAZs included trips

In [7]:
trip_data['original_o'] = trip_data.otaz
trip_data['original_d'] = trip_data.dtaz
trip_data['original_deptm'] = trip_data.deptm
# adding more columns will increase computation time in next steps

In [8]:
converted_data = trip_data[trip_data.otaz.isin(sumo_taz) | trip_data.dtaz.isin(sumo_taz)]
len(converted_data)

608908

In [9]:
duplicate_rows = trip_data[trip_data.duplicated(subset=['otaz', 'dtaz', 'deptm'], keep=False)]
print(duplicate_rows)

         otaz  dtaz  deptm  travtime  original_o  original_d  original_deptm
126         1     5    482         5           1           5             482
127         1     5    482         6           1           5             482
1244        2     1   1107         2           2           1            1107
1245        2     1   1107         5           2           1            1107
1290        2     2   1018         4           2           2            1018
...       ...   ...    ...       ...         ...         ...             ...
9331718  3466  3466   1013         3        3466        3466            1013
9331724  3466  3466   1040        22        3466        3466            1040
9331725  3466  3466   1040         1        3466        3466            1040
9331739  3466  3466   1123        52        3466        3466            1123
9331740  3466  3466   1123        16        3466        3466            1123

[567030 rows x 7 columns]


### for Pierce and Snohomish trips
Based on map of highways, we can assign pierce to I5-south (5002, 5003) and snohomish to I5-north (5000, 5001).  It is not exact and there could be many many trips passing through bellevue instead of I-5.

In [10]:
def random_split_dataframe(df, percentage):
    if percentage < 0 or percentage > 100:
        raise ValueError("Percentage should be between 0 and 100")

    # Shuffle the DataFrame to randomize the order
    df = df.sample(frac=1, random_state=1)

    # Calculate the number of rows for the first part (X%)
    num_rows = len(df)
    split_index = int(num_rows * percentage / 100)

    # Split the DataFrame into two parts
    part1 = df.iloc[:split_index, :]
    part2 = df.iloc[split_index:, :]

    return part1, part2

In [11]:
# interval = 15
def get_intra_TAZ_traveltime(o_list, d_list, interval=15, exact_time = None):
    if exact_time is None:
        trips = trip_data[trip_data.otaz.isin(o_list) & trip_data.dtaz.isin(d_list)]
        average = round(trips.travtime.mean())
        time_offsets = list()
        for start in range(0,1440, interval):
            end = start+interval
            interval_trips = trips[trips.deptm.isin(range(start, end))]
            average_time = interval_trips.travtime.mean()
            if pd.isna(average_time):
                time_offsets.append(average)
            else:
                time_offsets.append(round(average_time))
        return time_offsets
    else:
        trips = trip_data[trip_data.otaz.isin(o_list) & 
                          trip_data.dtaz.isin(d_list) &
                          trip_data.deptm.isin(range(int(exact_time-interval/2), int(exact_time+interval/2)+1))]

        average = trips.travtime.mean()
        return average

In [12]:
def get_random_TAZ(taz_list):
    if type(taz_list)==list:
        return random.choice(taz_list)
    else:
        return taz_list

In [13]:
def convert1(invalid_TAZs, near_pseudo_TAZs, pseudo_taz_in, pseudo_taz_out, df=None):
    if df is None:
            df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
    invalid_to_near_tt = get_intra_TAZ_traveltime(invalid_TAZs, near_pseudo_TAZs)
#     print(f"it'll take {len(df)} iterations.")
    for index, row in tqdm(df.iterrows()):
        interval = int(row.deptm//15)
        if row.otaz in invalid_TAZs:
            row.otaz = get_random_TAZ(pseudo_taz_in)
            row.deptm += invalid_to_near_tt[interval]
        elif row.dtaz in invalid_TAZs:
            row.dtaz = get_random_TAZ(pseudo_taz_out)
        converted_data.loc[index, :] = row 

In [14]:
sumo_s = read_regions('regions/sumo_south.txt')
sumo_n = [taz for taz in sumo_taz if taz not in sumo_s]
sumo_e = read_regions('regions/sumo_east.txt') 
sumo_w = [taz for taz in sumo_taz if taz not in sumo_e]
near_i90 = read_regions('regions/near_i90.txt')

In [15]:
Image(url="regions/i5.png", width=300, height=300)

In [16]:
near_i5n=[381, 382, 406, 407, 428, 430]
near_i5s=[649, 650, 657, 669, 670] 
nearSR99n = read_regions('regions/near_SR99N.txt')
near6001 = read_regions('regions/near6001.txt')

In [17]:
convert1(pierce, near_i5s, 5002, 5003)

14553it [00:20, 713.92it/s]


##### new stuff
--------

In [18]:
# convert1(snohomish, near_i5n, 5000, 5001)
# it'll take 36268 iterations

In [18]:
def north_trips_handler(invalid_TAZs):
    df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
    print(f'total trips: {len(df)}')
    # if trips go to north1
    north1=[426, 427, 428, 429]
    df1 = df[df.otaz.isin(north1)|df.dtaz.isin(north1)]
    convert1(invalid_TAZs, near_i5n, [5000, 7001], 5001, df=df1)

    # if trips go to north2
    north21=[430, 432, 434]
    df1 = df[df.otaz.isin(north21)|df.dtaz.isin(north21)]
    convert1(invalid_TAZs, near_i5n, 7002, 7002, df=df1)

    north22 = [431, 433, 435]
    df1 = df[df.otaz.isin(north22)|df.dtaz.isin(north22)]
    convert1(invalid_TAZs, near_i5n, [5000, 7002], 7002, df=df1)


    # if trips go to north3
    north3=[436, 437, 438, 439, 440, 441, 442, 443]
    df1 = df[df.otaz.isin(north3)|df.dtaz.isin(north3)]
    convert1(invalid_TAZs, nearSR99n, [5010, 7003], [5011, 7003], df=df1)

    # if trips go to downtown north west
    df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
    df1 = df[df.otaz.isin(sumo_w)|df.dtaz.isin(sumo_w)]
    df1 = df1[df1.otaz.isin(sumo_n)|df1.dtaz.isin(sumo_n)]
    
    df25, df75 = random_split_dataframe(df1, 25)
    convert1(invalid_TAZs, nearSR99n, 5010, 5011, df=df25)
    convert1(invalid_TAZs, near_i5n, 5000, 5001, df=df75)
    

    # other trips 
    convert1(invalid_TAZs, near_i5n, 5000, 5001)

In [19]:
# invalid_TAZs = [337, 338, 356, 358,359]
invalid_TAZs = [358,359]

converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]

Unnamed: 0,otaz,dtaz,deptm,travtime,original_o,original_d,original_deptm
753020,358,426,563,13,358,426,563
753021,358,426,577,13,358,426,577
753022,358,426,940,12,358,426,940
753023,358,426,1069,13,358,426,1069
753024,358,427,568,13,358,427,568
...,...,...,...,...,...,...,...
1361144,653,358,886,13,653,358,886
1361145,653,358,1013,16,653,358,1013
1361146,653,358,1103,14,653,358,1103
1361147,653,358,1115,14,653,358,1115


In [20]:
north_trips_handler(snohomish)

total trips: 36268


795it [00:01, 712.72it/s]
3634it [00:04, 726.92it/s]
2694it [00:03, 760.67it/s]
2127it [00:02, 763.64it/s]
2969it [00:03, 760.94it/s]
8910it [00:11, 767.85it/s]
15139it [00:19, 769.51it/s]


In [21]:
def northeast_trips_handler(invalid_TAZs):
    df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
    print(f'total trips: {len(df)}')
    # if trips go to north1
    north1=[426, 427, 428, 429]
    df1 = df[df.otaz.isin(north1)|df.dtaz.isin(north1)]
    convert1(invalid_TAZs, near_i5n, [5000, 7001], 5001, df=df1)

    # if trips go to north2
    north21=[430, 432, 434]
    df1 = df[df.otaz.isin(north21)|df.dtaz.isin(north21)]
    convert1(invalid_TAZs, near_i5n, 7002, 7002, df=df1)

    north22 = [431, 433, 435]
    df1 = df[df.otaz.isin(north22)|df.dtaz.isin(north22)]
    convert1(invalid_TAZs, near_i5n, [5000, 7002], 7002, df=df1)


    # if trips go to north3
    north3=[436, 437, 438, 439, 440, 441, 442, 443]
    df1 = df[df.otaz.isin(north3)|df.dtaz.isin(north3)]
    convert1(invalid_TAZs, nearSR99n, 7003, 7003, df=df1)

    # other trips 
    convert1(invalid_TAZs, near_i5n, 5000, 5001)

In [22]:
def northwest_trips_handler(invalid_TAZs):
    df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
    print(f'total trips: {len(df)}')
    # if trips go to north1
    north1=[426, 427, 428, 429]
    df1 = df[df.otaz.isin(north1)|df.dtaz.isin(north1)]
    convert1(invalid_TAZs, near_i5n+nearSR99n, [5010, 7001], [5001, 5011], df=df1)

    # if trips go to north21
    north21=[430, 432, 434]
    df1 = df[df.otaz.isin(north21)|df.dtaz.isin(north21)]
    convert1(invalid_TAZs, nearSR99n, 5010, [5011, 7002], df=df1)

    # if trips go to north32
    north3=[440, 441, 442, 443]
    df1 = df[df.otaz.isin(north3)|df.dtaz.isin(north3)]
    convert1(invalid_TAZs, nearSR99n+near6001, [5010, 6001], [5011, 6001], df=df1)

    # if trips go to downtown east
    df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
    df1 = df[df.otaz.isin(sumo_e)|df.dtaz.isin(sumo_e)]
    convert1(invalid_TAZs, nearSR99n, 5010, 5001, df=df1)

    # other trips 
    convert1(invalid_TAZs, nearSR99n, 5010, 5011)

### for trips within king county

In [23]:
Image(url="regions/version2/north1.png", width=300, height=300)

In [24]:
north_trips_handler(read_regions('regions/version2/north1.txt'))

total trips: 11749


301it [00:00, 609.78it/s]
1131it [00:01, 641.38it/s]
922it [00:01, 693.95it/s]
745it [00:01, 604.52it/s]
1019it [00:01, 611.96it/s]
3060it [00:04, 761.59it/s]
4571it [00:06, 758.80it/s]


In [25]:
Image(url="regions/version2/north2.png", width=300, height=300)

In [26]:
north_trips_handler(read_regions('regions/version2/north2.txt'))

total trips: 3878


88it [00:00, 746.26it/s]
453it [00:00, 718.26it/s]
363it [00:00, 761.54it/s]
183it [00:00, 769.77it/s]
342it [00:00, 759.64it/s]
1028it [00:01, 751.57it/s]
1421it [00:01, 767.19it/s]


In [27]:
Image(url="regions/version2/north3.png", width=300, height=300)

In [28]:
north_trips_handler(read_regions('regions/version2/north3.txt'))

total trips: 15273


399it [00:00, 726.00it/s]
1499it [00:01, 765.23it/s]
1269it [00:01, 765.69it/s]
913it [00:01, 781.25it/s]
1324it [00:01, 771.93it/s]
3974it [00:05, 774.15it/s]
5895it [00:07, 775.52it/s]


In [29]:
Image(url="regions/version2/north4.png", width=300, height=300)

In [30]:
north_trips_handler(read_regions('regions/version2/north4.txt'))

total trips: 11794


330it [00:00, 757.75it/s]
1367it [00:01, 762.17it/s]
1097it [00:01, 764.16it/s]
634it [00:00, 749.11it/s]
1011it [00:01, 756.97it/s]
3034it [00:04, 738.76it/s]
4321it [00:05, 761.25it/s]


In [31]:
Image(url="regions/version2/north5.png", width=300, height=300)

In [32]:
northeast_trips_handler(read_regions('regions/version2/north5.txt'))

total trips: 12334


332it [00:00, 750.98it/s]
1515it [00:01, 771.42it/s]
1227it [00:01, 777.95it/s]
631it [00:00, 769.30it/s]
8629it [00:11, 759.02it/s]


In [33]:
Image(url="regions/version2/north6.png", width=300, height=300)

In [34]:
north_trips_handler(read_regions('regions/version2/north6.txt'))

total trips: 21517


1115it [00:01, 721.67it/s]
2650it [00:03, 735.43it/s]
2097it [00:02, 771.43it/s]
1173it [00:01, 763.14it/s]
1732it [00:02, 764.72it/s]
5199it [00:06, 766.84it/s]
7551it [00:09, 766.57it/s]


In [35]:
Image(url="regions/version2/north7.png", width=300, height=300)

In [36]:
northwest_trips_handler(read_regions('regions/version2/north7.txt')+[337, 338, 356])

total trips: 19859


490it [00:00, 751.20it/s]
2875it [00:03, 764.02it/s]
838it [00:01, 748.19it/s]
2508it [00:03, 764.45it/s]
13148it [00:17, 771.55it/s]


In [37]:
Image(url="regions/version2/north8.png", width=300, height=300)

In [38]:
northwest_trips_handler(read_regions('regions/version2/north8.txt'))

total trips: 9358


152it [00:00, 726.99it/s]
1074it [00:01, 758.86it/s]
449it [00:00, 757.74it/s]
1241it [00:01, 692.71it/s]
6442it [00:08, 739.40it/s]


In [39]:
Image(url="regions/version2/north9.png", width=300, height=300)

In [40]:
invalid_TAZs = read_regions('regions/version2/north9.txt')
df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
print(f'total trips: {len(df)}')

# if trips go to north32
north32=[440, 441, 442, 443]
df1 = df[df.otaz.isin(north32)|df.dtaz.isin(north32)]
convert1(invalid_TAZs, near6001, 6001, 6001, df=df1)

# if trips go to downtown south east
df1 = df[df.otaz.isin(sumo_s)|df.dtaz.isin(sumo_s)]
df1 = df1[df1.otaz.isin(sumo_e)|df1.dtaz.isin(sumo_e)]
convert1(invalid_TAZs, near_i5n, 5000, 5001, df=df1)

# other trips 
df = converted_data[converted_data.otaz.isin(invalid_TAZs)|converted_data.dtaz.isin(invalid_TAZs)]
df20, df80 = random_split_dataframe(df, 20)
convert1(invalid_TAZs, nearSR99n, 5010, 5011, df20)
convert1(invalid_TAZs, near6001, 6001,  6001, df80)

total trips: 12236


867it [00:01, 734.95it/s]
787it [00:01, 756.13it/s]
2116it [00:02, 764.09it/s]
8466it [00:11, 766.29it/s]


##### Old stuff
---------

In [41]:
Image(url="regions/north2.png", width=300, height=300)

In [42]:
north2 = read_regions('regions/north2.txt')+[358,359]
boundary = read_regions('regions/north2_boundary.txt')
# convert1(north2, boundary, boundary, boundary)
convert1(north2, boundary, 7002, 7002)

17971it [00:23, 758.48it/s]


In [43]:
Image(url="regions/SR99N.png", width=300, height=300)

In [44]:
SR99n = read_regions('regions/SR99N.txt')
nearSR99n = read_regions('regions/near_SR99N.txt')

In [45]:
convert1(SR99n, nearSR99n, 5010, 5011)

0it [00:00, ?it/s]


 --------

In [46]:
Image(url="regions/i5n1.png", width=300, height=300)

In [47]:
i5n1 = read_regions('regions/i5n1.txt')

In [48]:
convert1(i5n1, near_i5n, 5000, 5001)

0it [00:00, ?it/s]


___

In [49]:
Image(url="regions/i5n2.png", width=300, height=300)

In [50]:
i5n2 = read_regions('regions/i5n2.txt')
convert1(i5n2, near_i5n, 5000, 5001)

0it [00:00, ?it/s]


--------

In [51]:
Image(url="regions/west_king1.png", width=600, height=300)

In [52]:
west_king1 = read_regions('regions/west_king1.txt')
df = converted_data[converted_data.otaz.isin(west_king1)|converted_data.dtaz.isin(west_king1)]

In [53]:
# if trips go to upper downtown:
df1 = df[df.otaz.isin(sumo_n)|df.dtaz.isin(sumo_n)]
convert1(west_king1, near_i5n, 5000, 5001, df=df1)

7327it [00:09, 744.02it/s]


In [54]:
# if trips go to lower downtown:
df2 = df[df.otaz.isin(sumo_s)|df.dtaz.isin(sumo_s)]
convert1(west_king1, near_i90, 5004, 5005, df=df2)

3881it [00:05, 748.46it/s]


---------

In [55]:
Image(url="regions/west_king2.png", width=600, height=300)

In [56]:
west_king2 = read_regions('regions/west_king2.txt')
df = converted_data[converted_data.otaz.isin(west_king2)|converted_data.dtaz.isin(west_king2)]

In [57]:
# if trips go to upper downtown:
df1 = df[df.otaz.isin(sumo_n)|df.dtaz.isin(sumo_n)]
convert1(west_king2, near_i5n, 5000, 5001, df=df1)

11223it [00:14, 767.20it/s]


In [58]:
# if trips go to lower downtown:
df2 = df[df.otaz.isin(sumo_s)|df.dtaz.isin(sumo_s)]
convert1(west_king2, near_i90, 5004, 5005, df=df2)

6167it [00:08, 765.85it/s]


-----

In [59]:
Image(url="regions/west_king3.png", width=500, height=300)

In [60]:
west_king3 = read_regions('regions/west_king3.txt')
convert1(west_king3, near_i90, 5004, 5005)

33163it [00:43, 760.69it/s]


-----

In [61]:
Image(url="regions/west_king4.png", width=600, height=300)

In [62]:
west_king4 = read_regions('regions/west_king4.txt')
convert1(west_king4, near_i90, 5004, 5005)

11397it [00:15, 759.10it/s]


-----

In [63]:
Image(url="regions/west_king5.png", width=600, height=300)

In [64]:
west_king5 = read_regions('regions/west_king5.txt')
convert1(west_king5, near_i5s, 5002, 5003)

4920it [00:06, 758.93it/s]


-----

In [65]:
Image(url="regions/west_king6.png", width=300, height=300)

In [66]:
west_king6 = read_regions('regions/west_king6.txt')
convert1(west_king6, near_i5s, 5002, 5003)

17509it [00:23, 757.47it/s]


--------

In [67]:
Image(url="regions/vashon.png", width=300, height=300)

In [68]:
vashon = read_regions('regions/vashon.txt')
nearsr99s = read_regions('regions/near_SR99S.txt')
convert1(vashon, nearsr99s, 5008, 5009)

1206it [00:01, 744.54it/s]


------

In [69]:
Image(url="regions/southwest1.png", width=300, height=300)

In [70]:
sw1 = read_regions('regions/southwest1.txt')
df = converted_data[converted_data.otaz.isin(sw1)|converted_data.dtaz.isin(sw1)]

In [71]:
# if trips go to west downtown:
df1 = df[df.otaz.isin(sumo_w)|df.dtaz.isin(sumo_w)]
convert1(sw1, nearsr99s, 5008, 5009, df=df1)

19529it [00:25, 763.83it/s]


In [72]:
# if trips go to east downtown:
df2 = df[df.otaz.isin(sumo_e)|df.dtaz.isin(sumo_e)]
convert1(sw1, near_i5s, 5002, 5003, df=df2)

4512it [00:05, 764.57it/s]


In [73]:
Image(url="regions/southwest2.png", width=300, height=300)


In [74]:
sw2 = read_regions('regions/southwest2.txt')
df = converted_data[converted_data.otaz.isin(sw2)|converted_data.dtaz.isin(sw2)]

In [75]:
# if trips go to west downtown:
df1 = df[df.otaz.isin(sumo_w)|df.dtaz.isin(sumo_w)]
convert1(sw2, nearsr99s, 5008, 5009, df=df1)

30346it [00:39, 762.66it/s]


In [76]:
# if trips go to east downtown:
df2 = df[df.otaz.isin(sumo_e)|df.dtaz.isin(sumo_e)]
convert1(sw2, near_i5s, 5002, 5003, df=df2)

6100it [00:08, 753.21it/s]


----

In [77]:
Image(url="regions/i5s11.png", width=300, height=1000)

In [78]:
i5s1 = read_regions('regions/i5s1.txt')
convert1(i5s1, near_i5s, 5002, 5003)

26281it [00:34, 764.28it/s]


In [79]:
Image(url="regions/i5s2.png", width=300, height=300)

In [80]:
i5s2 = read_regions('regions/i5s2.txt')
convert1(i5s2, near_i5s, 5002, 5003)

14231it [00:19, 717.92it/s]


-----

In [81]:
Image(url="regions/southeast.png", width=400, height=300)

In [82]:
se = read_regions('regions/southeast.txt')

In [83]:
# if trips go to east downtown:
df = converted_data[converted_data.otaz.isin(se)|converted_data.dtaz.isin(se)]
df = df[df.otaz.isin(sumo_e)|df.dtaz.isin(sumo_e)]
convert1(se, [636], 636, 636, df=df)

4056it [00:05, 719.91it/s]


In [84]:
# if trips go to south downtown:
df = converted_data[converted_data.otaz.isin(se)|converted_data.dtaz.isin(se)]
df = df[df.otaz.isin(sumo_s)|df.dtaz.isin(sumo_s)]
convert1(se, [650], 650, 650, df=df)

5070it [00:06, 737.73it/s]


In [85]:
# if trips go to north downtown:
df = converted_data[converted_data.otaz.isin(se)|converted_data.dtaz.isin(se)]
df = df[df.otaz.isin(sumo_n)|df.dtaz.isin(sumo_n)]
convert1(se, near_i5s, 5002, 5003, df=df)

6196it [00:08, 761.48it/s]


----

In [86]:
Image(url="regions/east1.png", width=400, height=300)

In [87]:
east1 = read_regions('regions/east1.txt')
boundary = read_regions('regions/east1_boundary.txt')
# convert1(east1, boundary, boundary, boundary)
convert1(east1, boundary, 7004, 7004)

33580it [00:43, 763.49it/s]


----

In [88]:
Image(url="regions/east2.png", width=400, height=300)

In [89]:
east2 = read_regions('regions/east2.txt')
boundary = read_regions('regions/east2_boundary.txt')
# convert1(east2, boundary, boundary, boundary)
convert1(east2, boundary, 7005, 7005)

18070it [00:23, 768.58it/s]


----

In [90]:
Image(url="regions/north1.png", width=400, height=300)

In [91]:
north1 = read_regions('regions/north1.txt')
boundary = read_regions('regions/north1_boundary.txt')
# convert1(north1, boundary, boundary, boundary)
convert1(north1, boundary, 7001, 7001)

8357it [00:10, 764.78it/s]


----

----

In [92]:
Image(url="regions/north_west.png", width=300, height=300)

In [128]:
NW = read_regions('regions/north_west.txt')
boundary = read_regions('regions/north3_boundary.txt')
df = converted_data[converted_data.otaz.isin(NW)|converted_data.dtaz.isin(NW)]

df1 = df[df.otaz.isin(sumo_n)|df.dtaz.isin(sumo_n)]
# convert1(NW, boundary, boundary, boundary, df=df1)
convert1(NW, boundary, 7003, 7003, df=df1)

df2 = df[df.otaz.isin(sumo_s)|df.dtaz.isin(sumo_s)]
convert1(NW, nearSR99n, 5010, 5011, df=df2)

0it [00:00, ?it/s]
0it [00:00, ?it/s]


In [95]:
convert1(NW, nearSR99n, 5010, 5011)

0it [00:00, ?it/s]


In [96]:
converted_data[converted_data.otaz.isin(NW)|converted_data.dtaz.isin(NW)]


Unnamed: 0,otaz,dtaz,deptm,travtime,original_o,original_d,original_deptm


In [129]:
converted_data[converted_data.otaz.isin(NW)|converted_data.dtaz.isin(NW)]


Unnamed: 0,otaz,dtaz,deptm,travtime,original_o,original_d,original_deptm


-------

In [99]:
Image(url="regions/6001.png", width=300, height=300)

In [100]:
to6001 = read_regions('regions/6001.txt')
near6001 = read_regions('regions/near6001.txt')

In [101]:
# df = converted_data[converted_data.otaz.isin(to6001)|converted_data.dtaz.isin(to6001)]

# df1 = df[df.otaz.isin(sumo_n)|df.dtaz.isin(sumo_n)]
# convert1(to6001, near6001, 6001, 6001, df=df1)

# df2 = df[df.otaz.isin(sumo_s)|df.dtaz.isin(sumo_s)]
# convert1(to6001, nearSR99n, 5010, 5011, df=df2)

industrial=read_regions('regions/version2/industrial.txt')
df = converted_data[converted_data.otaz.isin(to6001)|converted_data.dtaz.isin(to6001)]
df1 = df[df.otaz.isin(industrial)|df.dtaz.isin(industrial)]
convert1(to6001, nearSR99n, 5010, 5011, df=df2)

convert1(to6001, near6001, 6001, 6001)

0it [00:00, ?it/s]
15862it [00:21, 724.07it/s]


----

In [102]:
Image(url="regions/south.png", width=300, height=300)

In [103]:
south = read_regions('regions/south.txt')
boundary = read_regions('regions/south_boundary.txt')
# convert1(south, boundary, boundary, boundary)
convert1(south, boundary, 7000, 7000)

11355it [00:15, 734.22it/s]


-------

## TAZ conversion for trips passing through sumo_TAZs

For this stage, we assume some scenarios, find related trips in out main trip dataframe, process them, and then add them to our existing trips

In [104]:
def get_trips_in_between(src_df, taz_list1, taz_list2):
# make sure taz_list1 and taz_list2 have no TAZ within sumo_TAZs. 
# In that case, some of the trips that were already processed in converted_data will be included
    df = src_df[(src_df.otaz.isin(taz_list1) & src_df.dtaz.isin(taz_list2)) | 
                (src_df.otaz.isin(taz_list2) & src_df.dtaz.isin(taz_list1))]
    return df

In [105]:
def convert2(df, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2):
    zone1offsets = get_intra_TAZ_traveltime(zone1, near_new_z1)
    zone2offsets = get_intra_TAZ_traveltime(zone2, near_new_z2)
    new_z1_in, new_z1_out = new_z1[0], new_z1[1]
    new_z2_in, new_z2_out = new_z2[0], new_z2[1]
    
    print(f"it'll take {len(df)} iterations.")
    for index, row in tqdm(df.iterrows()):
        interval = int(row.deptm//15)
        
        if row.otaz in zone1 and row.dtaz in zone2:
            row.otaz = get_random_TAZ(new_z1_in)
            row.dtaz = get_random_TAZ(new_z2_out)
            row.deptm += zone1offsets[interval]
            
        elif row.otaz in zone2 and row.dtaz in zone1:
            row.otaz = get_random_TAZ(new_z2_in)
            row.dtaz = get_random_TAZ(new_z1_out)
            row.deptm += zone2offsets[interval]
#         converted_data.loc[index, :] = row 
        df.loc[index, :] = row

-----

In [106]:
Image(url="regions/scenarios.png", width=900, height=300)

In [107]:
#scenario 1:

# out_of_sumo_data = get_trips_in_between(trip_data, zone1, zone2) 
out_of_sumo_data = get_trips_in_between(trip_data, i5n2, i5s1) 
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
convert2(out_of_sumo_data, i5n2, [5000, 5001], near_i5n, i5s1, [5002, 5003], near_i5s)

it'll take 1406 iterations.


1406it [00:00, 7048.82it/s]


In [108]:
def make_pseudo_trips(zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2):
        df = get_trips_in_between(trip_data, zone1, zone2) 
        convert2(df, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
        return df

In [109]:
# scenario 2
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
df = make_pseudo_trips(i5n2, [5000, 5001], near_i5n, sw2, [5002, 5003], near_i5s)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])


it'll take 2462 iterations.


2462it [00:00, 6678.09it/s]


In [110]:
# scenario 3
east2_b = read_regions('regions/east2_boundary.txt')

# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(i5n2, [5000, 5001], near_i5n, east2, [east2_b, east2_b], east2_b)
df = make_pseudo_trips(i5n2, [5000, 5001], near_i5n, east2, [7005, 7005], east2_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 1301 iterations.


1301it [00:00, 6053.08it/s]


In [111]:
# scenario 4
east1_b = read_regions('regions/east1_boundary.txt')
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(i5n2, [5000, 5001], near_i5n, east1, [east1_b, east1_b], east1_b)
df = make_pseudo_trips(i5n2, [5000, 5001], near_i5n, east1, [7004, 7004], east1_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 2657 iterations.


2657it [00:00, 6773.72it/s]


In [112]:
# scenario 5
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(i5s2, [5002, 5003], near_i5s, east1, [east1_b, east1_b], east1_b)
df = make_pseudo_trips(i5s2, [5002, 5003], near_i5s, east1, [7004, 7004], east1_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 2196 iterations.


2196it [00:00, 6656.79it/s]


In [113]:
# scenario 6
north3_b = read_regions('regions/north3_boundary.txt')
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(NW, [north3_b, north3_b], north3_b, east1, [east1_b, east1_b], east1_b)
df = make_pseudo_trips(NW, [7003, 7003], north3_b, east1, [7004, 7004], east1_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 1588 iterations.


1588it [00:00, 6493.03it/s]


In [114]:
# scenario 7
north2_b = read_regions('regions/north2_boundary.txt')
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(north2, [north2_b, north2_b], north2_b, east2, [east2_b, east2_b], east2_b)
df = make_pseudo_trips(north2, [7002, 7002], north2_b, east2, [7005, 7005], east2_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 944 iterations.


944it [00:00, 5837.49it/s]


In [115]:
# scenario 8
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(NW, [north3_b, north3_b], north3_b, east2, [east2_b, east2_b], east2_b)
df = make_pseudo_trips(NW, [7003, 7003], north3_b, east2, [7005, 7005], east2_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 851 iterations.


851it [00:00, 5785.42it/s]


In [116]:
# scenario 9
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(i5s2, [5002, 5003], near_i5s, east2, [east2_b, east2_b], east2_b)
df = make_pseudo_trips(i5s2, [5002, 5003], near_i5s, east2, [7005, 7005], east2_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 1846 iterations.


1846it [00:00, 6378.37it/s]


In [117]:
# scenario 10
south_b = read_regions('regions/south_boundary.txt')
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(NW, [north3_b, north3_b], north3_b, south, [south_b, south_b], south_b)
df = make_pseudo_trips(NW, [7003, 7003], north3_b, south, [7000, 7000], south_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 1172 iterations.


1172it [00:00, 6671.97it/s]


In [118]:
# scenario 11
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
df = make_pseudo_trips(NW, [436, 436], north3_b, se, [[636, 650], [636, 650]], [636, 647, 648, 649, 650 ])
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 624 iterations.


624it [00:00, 5792.90it/s]


In [119]:
# scenario 12
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(NW, [north3_b, north3_b], north3_b, sw1, [5008, 5009], nearsr99s)
df = make_pseudo_trips(NW, [7003, 7003], north3_b, sw1, [5008, 5009], nearsr99s)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 2238 iterations.


2238it [00:00, 6756.43it/s]


In [120]:
# scenario 13
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
df = make_pseudo_trips((SR99n+to6001), [5010, 5011], nearSR99n,
                       sw1, [5008, 5009], nearsr99s)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 9355 iterations.


9355it [00:01, 5696.04it/s]


In [121]:
# scenario 14
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
df = make_pseudo_trips(i5n1, [5000, 5001], near_i5n,
                       i5s1, [5002, 5003], near_i5s)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 7321 iterations.


7321it [00:01, 5973.34it/s]


In [122]:
# scenario 15
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(i5n1, [5000, 5001], near_i5n, south, [south_b, south_b], south_b)
df = make_pseudo_trips(i5n1, [5000, 5001], near_i5n, 
                       south, [7000, 7000], south_b)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 4856 iterations.


4856it [00:00, 6365.12it/s]


In [123]:
# scenario 18
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
df = make_pseudo_trips(sw2, [5008, 5009], nearsr99s,
                       (SR99n+to6001), [5010, 5011], nearSR99n)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 14977 iterations.


14977it [00:02, 5856.74it/s]


In [124]:
# scenario 19
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(south, [south_b, south_b], south_b,
#                        (SR99n+to6001), [5010, 5011], nearSR99n)
df = make_pseudo_trips(south, [7000, 7000], south_b,
                       (SR99n+to6001), [5010, 5011], nearSR99n)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 7006 iterations.


7006it [00:01, 6018.20it/s]


In [125]:
# scenario 20
north1_b = read_regions('regions/north1_boundary.txt')
# convert2(out_of_sumo_data, zone1, new_z1, near_new_z1, zone2, new_z2, near_new_z2)
# df = make_pseudo_trips(south, [south_b, south_b], south_b,
#                        north1, [5000, 5001], near_i5n)
df = make_pseudo_trips(south, [7000, 7000], south_b,
                       north1, [5000, 5001], near_i5n)
out_of_sumo_data = pd.concat([out_of_sumo_data, df])

it'll take 388 iterations.


388it [00:00, 5919.53it/s]


In [126]:
out_of_sumo_data

Unnamed: 0,otaz,dtaz,deptm,travtime,original_o,original_d,original_deptm
24739,5000,5003,490,28,8,708,464
24740,5000,5003,506,30,8,709,478
24741,5000,5003,529,30,8,709,503
24742,5000,5003,887,24,8,711,869
24745,5000,5003,1001,36,8,718,977
...,...,...,...,...,...,...,...
1539539,7000,5001,626,18,726,408,618
1540589,7000,5001,1118,22,727,365,1109
1540590,7000,5001,910,22,727,367,902
1541385,7000,5001,991,24,728,365,983


In [130]:
len(converted_data)

608908

In [131]:
df = pd.concat([converted_data, out_of_sumo_data])

In [132]:
len(df)

672096

In [135]:
df[df.otaz == 402]

Unnamed: 0,otaz,dtaz,deptm,travtime,original_o,original_d,original_deptm


# making CSV file

In [136]:
df.to_csv('psrc_vehicle_trips_converted_taz.csv', index=False)