# Demo

This notebook converts transit data in GTFS foramt to .csv files with standard GMNS format for visualization, traffic simulation and planning purpose.
It takes Phoenix as example.

In [1]:
import os
import math
import datetime
import numpy as np
import pandas as pd
from python import readtxt, LLs2Dist, convert_time_sequence, time_convert, time_calculate, time

### Read GTFS data
stop.txt, route.txt, trip.txt, stop_times.txt is required.

In [2]:
os.getcwd()
os.chdir('../data')

df_stops = readtxt('stops')
df_routes = readtxt('routes')
df_trips = readtxt('trips')
df_stoptimes = readtxt('stop_times')

### build node.csv

In [3]:
node_csv = pd.DataFrame()

node_csv['name'] = df_stops['stop_id']
node_csv['x_coord'] = df_stops['stop_lon']
node_csv['y_coord'] = df_stops['stop_lat']
node_csv['node_type'] = None
node_csv['ctrl_type'] = None
node_csv['zone_id'] = None
node_csv['geometry'] = "POINT (" + df_stops['stop_lon'] + " " + df_stops['stop_lat'] +")"

node_csv['node_id'] = range(100001,100001 + node_csv['name'].size,1)

print(node_csv[0:5])

    name       x_coord      y_coord node_type ctrl_type zone_id  \
0     10   -112.117589    33.377274      None      None    None   
1    100  -111.9860836  33.64048337      None      None    None   
2   1002  -112.2009471  33.56353253      None      None    None   
3  10021  -112.0700826  33.45312081      None      None    None   
4   1003  -112.0858039   33.4000364      None      None    None   

                           geometry  node_id  
0     POINT (-112.117589 33.377274)   100001  
1  POINT (-111.9860836 33.64048337)   100002  
2  POINT (-112.2009471 33.56353253)   100003  
3  POINT (-112.0700826 33.45312081)   100004  
4   POINT (-112.0858039 33.4000364)   100005  


### build link.csv

In [4]:
node_csv = node_csv.rename(columns={'name':'stop_id'})

In [5]:
combined_route = df_trips.merge(df_routes,on='route_id',how='left')
combined_stop = df_stoptimes.merge(node_csv,on='stop_id',how='left' )
combined_trip = combined_stop.merge(df_trips,on='trip_id',how='left')

In [6]:
dataList_route = {}
gp = combined_route.groupby('trip_id')

for key, form in gp:
    dataList_route[key] = {
        'route_id': form['route_id'].values[0],
        'route_id_short_name': form['route_long_name'].values[0]
        }

In [7]:
dataList_trip = {}
gp = combined_trip.groupby('trip_id')

for key, form in gp:
    temp = form['arrival_time']
    temp = convert_time_sequence(temp)
    dataList_trip[key] = {
        'route_id': form['route_id'].values[0],
        'from_node_id': form['node_id'].values[0],
        'to_node_id': form['node_id'].values[-1],
        'node_sequence': form['node_id'].tolist(),
        'time_sequence': temp
        }

In [8]:
link_list = []
link_csv = pd.DataFrame()

node_x = node_csv['x_coord'].tolist()
node_y = node_csv['y_coord'].tolist()
node_id_list = node_csv['node_id'].tolist()

for key in dataList_trip.keys(): 
    active_node_sequence_size = len(dataList_trip[key]['node_sequence'])
        
    for i in range(active_node_sequence_size-1):
        
        route_index = dataList_trip[key]['route_id']
        active_from_node_id = dataList_trip[key]['node_sequence'][i]
        active_to_node_id = dataList_trip[key]['node_sequence'][i+1]
        active_from_node_idx = node_id_list.index(active_from_node_id)
        active_to_node_idx = node_id_list.index(active_to_node_id)
        
        from_node_id_x = node_x[active_from_node_idx]
        from_node_id_y = node_y[active_from_node_idx]
        to_node_id_x = node_x[active_to_node_idx]
        to_node_id_y = node_y[active_to_node_idx]
        
        active_distance = LLs2Dist(float(from_node_id_x),float(from_node_id_y),float(to_node_id_x),float(to_node_id_y))
        active_geometry = 'LINESTRING (' + str(from_node_id_x)+' '+str(from_node_id_y)+', '+str(to_node_id_x)+' '+str(to_node_id_y)+')'
        
        link_list.append([route_index,active_from_node_id,active_to_node_id,active_distance,active_geometry])  

In [9]:
link_csv = pd.DataFrame(link_list, columns=['name','from_node_id','to_node_id','length','geometry']).drop_duplicates()    

In [10]:
link_csv['facility_type'] = None
link_csv['link_type'] = 1
link_csv['dir_flag'] = 1
link_csv['lanes'] = 1
link_csv['free_speed'] = 65
link_csv['capacity'] = 1900
link_csv['main_node_id'] = None
link_csv['movement_str'] = None
link_csv['NEMA_phase_number'] = None    
    
link_csv.index.name = 'link_id'
link_csv.index += 0

print(link_csv[0:5])

         name  from_node_id  to_node_id    length  \
link_id                                             
0        FLSH        102222      102688  0.587413   
1        FLSH        102688      107880  0.304955   
2        FLSH        107880      106237  0.190518   
3        FLSH        106237      104445  0.054524   
4        FLSH        104445      104446  0.113762   

                                                  geometry facility_type  \
link_id                                                                    
0        LINESTRING (-111.936317 33.425719, -111.926204...          None   
1        LINESTRING (-111.9262049 33.42469311, -111.926...          None   
2        LINESTRING (-111.9269728 33.4290600, -111.9293...          None   
3        LINESTRING (-111.9293591 33.42715292, -111.929...          None   
4        LINESTRING (-111.929337 33.426364, -111.929318...          None   

         link_type  dir_flag  lanes  free_speed  capacity main_node_id  \
link_id              

### build agent.csv

In [11]:
agent_csv = pd.DataFrame()

length_temp = np.array(link_csv['length'])
from_node_temp = np.array(link_csv['from_node_id'])
to_node_temp = np.array(link_csv['to_node_id'])

In [12]:
for key in dataList_trip.keys():
    active_length_list = []
    flag = 1

    active_node_sequence_size = len(dataList_trip[key]['node_sequence'])
    for i in range(active_node_sequence_size-1):
        active_from_node_id = dataList_trip[key]['node_sequence'][i]
        active_to_node_id = dataList_trip[key]['node_sequence'][i+1]
        temp1 = np.array(from_node_temp == active_from_node_id)
        temp2 = np.array(to_node_temp == active_to_node_id)
        temp = temp1 & temp2
        if not any(temp2):
            flag = 0
            break
        
        active_length = length_temp[temp2]
        active_length = active_length[0]
        active_length_list.append(active_length)
          
    if flag == 1:
        active_length = sum(active_length_list) 
        active_time_sequence = dataList_trip[key]['time_sequence']
        active_time_first_temp = dataList_trip[key]['time_sequence'][0]
        active_time_last_temp = dataList_trip[key]['time_sequence'][-1]
        active_time=time(active_time_first_temp, active_time_last_temp)

        node_sequence_str = list(map(str, dataList_trip[key]['node_sequence']))
        node_sequence_temp = ';'.join(node_sequence_str)+';'
        
        time_sequence_temp = ';'.join(active_time_sequence)+';'
        
        agent_csv = agent_csv.append([{'agent_type':'transit', 'trip_id':key, 'route_id':dataList_trip[key]['route_id'],
                                'route_id_short_name':dataList_route[key]['route_id_short_name'],
                                'from_node_id':dataList_trip[key]['from_node_id'], 'to_node_id':dataList_trip[key]['to_node_id'],
                                'travel_time':active_time, 'distance':active_length,
                                'node_sequence':node_sequence_temp,
                                'time_sequence':time_sequence_temp}],ignore_index=True)

In [13]:
agent_csv.index.name = 'agent_id'
agent_csv.index += 0

print(agent_csv[0:5])

         agent_type   trip_id route_id   route_id_short_name  from_node_id  \
agent_id                                                                     
0           transit  14453278     FLSH                 FLASH        102222   
1           transit  14453279     FLSH                 FLASH        102222   
2           transit  14453280     FLSH                 FLASH        102222   
3           transit  14453281      531  Mesa/Gilbert Express        100450   
4           transit  14453282      531  Mesa/Gilbert Express        100450   

          to_node_id  travel_time   distance  \
agent_id                                       
0             102278         16.0   2.298391   
1             102278         16.0   2.298391   
2             102278         16.0   2.298391   
3             106551         73.0  21.590803   
4             106551         73.0  21.590803   

                                              node_sequence  \
agent_id                                             