#### GTFS Helper File
##### Author: Eric Englin


This code inputs a kml file with routes and outputs the shapes tab of the RTAP GTFS builder Excel document. Kml files are generally created as a simple Google map document and can be directly input into this python script. 

In [5]:
from pykml import parser
import re
import networkx as nx
import osmnx as ox
from geopy import distance
import pandas as pd


In [6]:
def make_shape_table_from_kml(kml_name):
    # function to make the table on shapes tab of RTAP GTFS Builder
    # input: kml file location
    # output: dataframe that can be directly pasted into shapes tab
    
    # first section reads in the kml file and parses to grab the coordinates of each point on a line
    root = parser.parse(kml_name).getroot()
    coord_ex = '(-?\d+\.\d+),'
    heig_ex = '(\d+)'
    regex = coord_ex + coord_ex + heig_ex
    mycoor = (root.Document.Placemark.LineString.coordinates).text.strip()
    coord = re.findall(regex, mycoor)


    # next section calculates the distance between each point
    # assumes that the distances will be short enough to use the geometric distance
    # another approach could use the actual driving/road distance
    from geopy import distance
    shape_id_list = []
    lat_list= []
    lon_list = []
    shape_pt_sequence_list = []
    sequence_num = 1
    shape_dist_traveled_list = []
    dist_traveled = 0

    last_lon = float(coord[0][0])
    last_lat = float(coord[0][1])

    for x in coord:
        shape_id_list.append(root.Document.name.text)
        lon = float(x[0])
        lat = float(x[1])
        lon_list.append(lon)
        lat_list.append(lat)
        shape_pt_sequence_list.append(sequence_num)
        if sequence_num == 1:
            shape_dist_traveled_list.append(0)

        else:
            from geopy import distance
          #  print((float(x[1]), float(x[0])), (float(last_lat), float(last_lon)))
            distance = distance.distance((x[1],x[0]), 
                                          (last_lat,last_lon))
            
            dist_traveled = dist_traveled + distance.km
            shape_dist_traveled_list.append(dist_traveled)

        last_lat = lat
        last_lon = lon

        sequence_num +=1
        
    df = pd.DataFrame({
        "shape_id":shape_id_list,
        "shape_pt_lat":lat_list,
        "shape_pt_lon":lon_list,
        "shape_pt_sequence":shape_pt_sequence_list,
        "shape_dist_traveled":shape_dist_traveled_list
    })
    
    return df
    

In [30]:
boha_df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,LW_GI,42.360797,-71.050571,1,0.0
1,LW_GI,42.360804,-71.050528,2,0.003637
2,LW_GI,42.360811,-71.050484,3,0.007274
3,LW_GI,42.360817,-71.050441,4,0.010909
4,LW_GI,42.360823,-71.050398,5,0.014545


In [38]:
def make_shape_table_from_shapes_sheet(shapes_sheet):
    # function to make the table on shapes tab of RTAP GTFS Builder
    # input: shapes sheet in RTAP GTFS Builder file location
    # output: dataframe that can be directly pasted into shapes tab
    
    df = pd.read_excel(shapes_sheet, 
                       sheet_name = "shapes")
    
    # first section reads in the kml file and parses to grab the coordinates of each point on a line
    # next section calculates the distance between each point
    # assumes that the distances will be short enough to use the geometric distance
    # another approach could use the actual driving/road distance
    from geopy import distance
    shape_dist_traveled_list = []
    dist_traveled = 0
    last_lon = float(df['shape_pt_lon'][0])
    last_lat = float(df['shape_pt_lat'][0])

    for x in range(len(df)):
            
        
        lon = float(df['shape_pt_lon'][x])
        lat = float(df['shape_pt_lat'][x])

        if df['shape_pt_sequence'][x] == 1:
            shape_dist_traveled_list.append(0)
            dist_traveled = 0

        else:
            from geopy import distance
          #  print((float(x[1]), float(x[0])), (float(last_lat), float(last_lon)))
            distance = distance.distance((lat, lon), 
                                          (last_lat,last_lon))
            
            dist_traveled = dist_traveled + distance.km
            shape_dist_traveled_list.append(dist_traveled)

        last_lat = lat
        last_lon = lon

        
    df["shape_dist_traveled"] = shape_dist_traveled_list
    
    return df
    

In [7]:
kml_list = []


### Zion National Park

In [4]:
canyon_route_df = make_shape_table_from_kml(r"C:\Users\Eric.Englin\Downloads\canyon.kml")
town_route_df = make_shape_table_from_kml(r"C:\Users\Eric.Englin\Downloads\town.kml")

df = town_route_df.append(canyon_route_df)

df.to_excel(r"C:\Users\Eric.Englin\Downloads\Zion.xlsx")

  df = town_route_df.append(canyon_route_df)


In [5]:
df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,town,37.16657,-113.01363,1,0.0
1,town,37.16677,-113.01356,2,0.02305
2,town,37.16698,-113.0135,3,0.046958
3,town,37.1671,-113.01347,4,0.06054
4,town,37.16726,-113.01344,5,0.078495


### Harpers Ferry

In [7]:
hafe_df = make_shape_table_from_kml(r"C:\Users\Eric.Englin\Downloads\Harpers Ferry National Historical Park Visitor Center to Harpers Ferry National Historical Park Visitor Center.kml")

hafe_df.to_excel(r"C:\Users\Eric.Englin\Downloads\HAFE.xlsx")

In [8]:
hafe_df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,Harpers Ferry National Historical Park Visitor...,39.31644,-77.75726,1,0.0
1,Harpers Ferry National Historical Park Visitor...,39.31646,-77.75729,2,0.003409
2,Harpers Ferry National Historical Park Visitor...,39.31647,-77.75732,3,0.006225
3,Harpers Ferry National Historical Park Visitor...,39.31647,-77.75736,4,0.009674
4,Harpers Ferry National Historical Park Visitor...,39.31647,-77.7574,5,0.013124


### BOHA

In [39]:
boha_df = make_shape_table_from_shapes_sheet(r"C:\Users\Eric.Englin\DOT OST\volpe-proj-VU16A100 - Emerging Mobility\Subgroup Support\Traveler Information Technologies\GTFS\Park GTFS + Presentations\BOHA\BOHA GTFS Builder_2.xlsb")

boha_df.to_excel(r"C:\Users\Eric.Englin\DOT OST\volpe-proj-VU16A100 - Emerging Mobility\Subgroup Support\Traveler Information Technologies\GTFS\Park GTFS + Presentations\BOHA\BOHA shapes.xlsx",
                sheet_name = "shapes")

In [40]:
boha_df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,LW_GI,42.360797,-71.050571,1,0.0
1,LW_GI,42.360804,-71.050528,2,0.003637
2,LW_GI,42.360811,-71.050484,3,0.007274
3,LW_GI,42.360817,-71.050441,4,0.010909
4,LW_GI,42.360823,-71.050398,5,0.014545


## BOHA Thompson Island

In [41]:
boha_df_thompson = make_shape_table_from_shapes_sheet(r"C:\Users\Eric.Englin\DOT OST\volpe-proj-VU16A100 - Emerging Mobility\Subgroup Support\Traveler Information Technologies\GTFS\Park GTFS + Presentations\BOHA\BOHA GTFS Builder_Thompson Island.xlsb")

boha_df_thompson.to_excel(r"C:\Users\Eric.Englin\DOT OST\volpe-proj-VU16A100 - Emerging Mobility\Subgroup Support\Traveler Information Technologies\GTFS\Park GTFS + Presentations\BOHA\BOHA Thompson shapes.xlsx",
                sheet_name = "shapes")

In [42]:
boha_df_thompson.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,EDIC_TI,42.343253,-71.036621,1,0.0
1,EDIC_TI,42.342535,-71.035214,2,0.140732
2,EDIC_TI,42.342963,-71.015449,3,1.770153
3,EDIC_TI,42.342813,-71.01412,4,1.880889
4,EDIC_TI,42.342418,-71.012902,5,1.990439
