#### GTFS Helper File
##### Author: Eric Englin


This code inputs a kml file with routes and outputs the shapes tab of the RTAP GTFS builder Excel document. Kml files are generally created as a simple Google map document and can be directly input into this python script. 

In [1]:
from pykml import parser
import re
import networkx as nx
import osmnx as ox
from geopy import distance
import pandas as pd


In [2]:
def make_shape_table(kml_name):
    # function to make the table on shapes tab of RTAP GTFS Builder
    # input: kml file location
    # output: dataframe that can be directly pasted into shapes tab
    
    # first section reads in the kml file and parses to grab the coordinates of each point on a line
    root = parser.parse(kml_name).getroot()
    coord_ex = '(-?\d+\.\d+),'
    heig_ex = '(\d+)'
    regex = coord_ex + coord_ex + heig_ex
    mycoor = (root.Document.Placemark.LineString.coordinates).text.strip()
    coord = re.findall(regex, mycoor)


    # next section calculates the distance between each point
    # assumes that the distances will be short enough to use the geometric distance
    # another approach could use the actual driving/road distance
    from geopy import distance
    shape_id_list = []
    lat_list= []
    lon_list = []
    shape_pt_sequence_list = []
    sequence_num = 1
    shape_dist_traveled_list = []

    last_lon = float(coord[0][0])
    last_lat = float(coord[0][1])

    for x in coord:
        shape_id_list.append(root.Document.name.text)
        lon = float(x[0])
        lat = float(x[1])
        lon_list.append(lon)
        lat_list.append(lat)
        shape_pt_sequence_list.append(sequence_num)
        if sequence_num == 1:
            shape_dist_traveled_list.append(0)

        else:
            from geopy import distance
          #  print((float(x[1]), float(x[0])), (float(last_lat), float(last_lon)))
            distance = distance.distance((x[1],x[0]), 
                                          (last_lat,last_lon))
            shape_dist_traveled_list.append(distance.km)

        last_lat = lat
        last_lon = lon

        sequence_num +=1
        
    df = pd.DataFrame({
        "shape_id":shape_id_list,
        "shape_pt_lat":lat_list,
        "shape_pt_lon":lon_list,
        "shape_pt_sequence":shape_pt_sequence_list,
        "shape_dist_traveled":shape_dist_traveled_list
    })
    
    return df
    

In [3]:
kml_list = []


### Zion National Park

In [22]:
canyon_route_df = make_shape_table(r"C:\Users\Eric.Englin\Downloads\canyon.kml")
town_route_df = make_shape_table(r"C:\Users\Eric.Englin\Downloads\town.kml")

df = town_route_df.append(canyon_route_df)

df.to_excel(r"C:\Users\Eric.Englin\Downloads\Zion.xlsx")

  df = town_route_df.append(canyon_route_df)


In [20]:
df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,TownRoute,37.16741,-113.01362,1,0.0
1,TownRoute,37.16731,-113.01363,2,0.011134
2,TownRoute,37.16713,-113.01368,3,0.020464
3,TownRoute,37.1671,-113.01347,4,0.018946
4,TownRoute,37.16726,-113.01344,5,0.017956


### Harpers Ferry

In [18]:
hafe_df = make_shape_table(r"C:\Users\Eric.Englin\Downloads\lower town.kml")

hafe_df.to_excel(r"C:\Users\Eric.Englin\Downloads\HAFE.xlsx")

In [21]:
hafe_df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,Harpers Ferry National Historical Park Visitor...,39.31644,-77.75726,1,0.0
1,Harpers Ferry National Historical Park Visitor...,39.31646,-77.75729,2,0.003409
2,Harpers Ferry National Historical Park Visitor...,39.31647,-77.75732,3,0.002815
3,Harpers Ferry National Historical Park Visitor...,39.31647,-77.75736,4,0.00345
4,Harpers Ferry National Historical Park Visitor...,39.31647,-77.7574,5,0.00345
