In [24]:
from bs4 import BeautifulSoup
import csv
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from shapely.geometry import LineString
import utilities

In [2]:
def process_coord_string(coord_string):
    """
    Take a coord string and spit out long/lat floats
    """
    space_splits = coord_string.split(' ')
    path_points = list()
    for split in space_splits:
        comma_split = split.split(',')
        lat = float(comma_split[1]) # lat
        lon = float(comma_split[0]) # long
        path_points.append(tuple([lon, lat]))
    return path_points

def process_place(placemark):
    """Process placemark data from a KML file"""
    place_dict = dict()
    for i in placemark.find_all('SimpleData'):
        place_dict[i['name']] = i.text
        
    try:
        place_dict['lat_lon_list'] = process_coord_string(placemark.find('coordinates').string)
        place_dict['line'] = LineString(place_dict['lat_lon_list'])
        place_dict['mid_point'] = place_dict['line'].interpolate(0.5, normalized = True)
        place_dict['length'] = place_dict['line'].length
    except:
        place_dict['lat_lon_list'] = np.nan
        place_dict['line'] = np.nan
        place_dict['mid_point'] = np.nan
        place_dict['length'] = np.nan

    return place_dict

# PA Traffic

In [3]:
with  open('RMSTRAFFIC_Traffic_Volumes.kml', encoding='utf8') as f:
    centerline_soup = BeautifulSoup(f, 'xml')

In [4]:
placemarks = centerline_soup.find_all('Placemark')
place_list = [process_place(i) for i in placemarks]
df_traffic = pd.DataFrame(place_list)

In [9]:
del centerline_soup, place_list, placemarks

In [6]:
df_traffic.shape

(50443, 52)

In [8]:
df_traffic.head()

Unnamed: 0,RMSTRAFFIC_LRS_KEY,ST_RT_NO,CTY_CODE,DISTRICT_NO,JURIS,SEG_BGN,OFFSET_BGN,SEG_END,OFFSET_END,SEG_PT_BGN,...,NLF_CNTL_BGN,NLF_CNTL_END,CUM_OFFSET_BGN_T1,CUM_OFFSET_END_T1,RECORD_UPDATE,GEOMETRYLEN,lat_lon_list,line,mid_point,length
0,01004800100000003004225,0048,1,8,5,10,0,30,422,100000,...,0,2797,0,2797,20190217,0,"[(-77.23110324422287, 39.82972373398385), (-77...",LINESTRING (-77.23110324422287 39.829723733983...,POINT (-77.2299963918253 39.82972222817695),0.002214
1,02028H00100000004006865,028H,2,11,5,10,0,40,686,100000,...,0,4751,0,4751,20190217,0,"[(-79.98862809120706, 40.43003878956188), (-79...",LINESTRING (-79.98862809120706 40.430038789561...,POINT (-79.98526276314594 40.43010513612506),0.006732
2,02064400100000001076565,0644,2,11,5,10,0,10,7656,100000,...,0,7656,0,7656,20190908,0,"[(-79.8815921856011, 40.630976124424535), (-79...",LINESTRING (-79.88159218560109 40.630976124424...,POINT (-79.86976417452328 40.63349257754408),0.024917
3,02065100100000002000005,0651,2,11,5,10,0,20,0,100000,...,0,4065,0,4065,20180211,0,"[(-79.90853573333078, 40.21860659169417), (-79...",LINESTRING (-79.90853573333078 40.218606591694...,POINT (-79.90442238098508 40.22356933075688),0.013079
4,02065100200000002065475,0651,2,11,5,20,0,20,6547,200000,...,4065,10612,4065,10612,20190825,0,"[(-79.89861846554105, 40.2253894122988), (-79....",LINESTRING (-79.89861846554105 40.225389412298...,POINT (-79.89789183920182 40.234340476428),0.018416


In [11]:
df_traffic.columns

Index(['RMSTRAFFIC_LRS_KEY', 'ST_RT_NO', 'CTY_CODE', 'DISTRICT_NO', 'JURIS',
       'SEG_BGN', 'OFFSET_BGN', 'SEG_END', 'OFFSET_END', 'SEG_PT_BGN',
       'SEG_PT_END', 'SEG_LNGTH_FEET', 'SEQ_NO', 'CUR_AADT', 'ADTT_CUR',
       'TRK_PCT', 'WKDY_TRK_CUR', 'ADLR_TRK_CUR', 'ADLF_TRK_CUR',
       'BASE_YR_CLS_CNT', 'BASE_ADT', 'ADTT_BASE', 'WKDY_TRK_BASE',
       'ADLR_TRK_BASE', 'ADLF_TRK_BASE', 'BASE_ADT_YR', 'DLY_VMT',
       'DLY_TRK_VMT', 'K_FACTOR', 'D_FACTOR', 'T_FACTOR', 'VOL_CNT_KEY',
       'VOL_CNT_DATE', 'RAW_CNT_HIST_DATE', 'TRAFF_PATT_GRP', 'DUR_CLS_CNT',
       'TYPE_OF_CNT', 'DIR_IND', 'MSLINK', 'MAPID', 'NLF_ID', 'SIDE_IND',
       'NLF_CNTL_BGN', 'NLF_CNTL_END', 'CUM_OFFSET_BGN_T1',
       'CUM_OFFSET_END_T1', 'RECORD_UPDATE', 'GEOMETRYLEN', 'lat_lon_list',
       'line', 'mid_point', 'length'],
      dtype='object')

In [12]:
df_traffic.iloc[0,:]

RMSTRAFFIC_LRS_KEY                              01004800100000003004225
ST_RT_NO                                                           0048
CTY_CODE                                                             01
DISTRICT_NO                                                          08
JURIS                                                                 5
SEG_BGN                                                            0010
OFFSET_BGN                                                            0
SEG_END                                                            0030
OFFSET_END                                                          422
SEG_PT_BGN                                                     00100000
SEG_PT_END                                                     00300422
SEG_LNGTH_FEET                                                     2797
SEQ_NO                                                                1
CUR_AADT                                                        

In [10]:
print('done')

done


In [13]:
df_traffic[['line', 'CUR_AADT']].head()

Unnamed: 0,line,CUR_AADT
0,LINESTRING (-77.23110324422287 39.829723733983...,4625
1,LINESTRING (-79.98862809120706 40.430038789561...,5195
2,LINESTRING (-79.88159218560109 40.630976124424...,4984
3,LINESTRING (-79.90853573333078 40.218606591694...,694
4,LINESTRING (-79.89861846554105 40.225389412298...,637


In [15]:
import geopandas
geopandas.GeoDataFrame(df_traffic[['line', 'CUR_AADT']].dropna(), geometry='line').to_file('traffic_long_raw_AADT_geometries.shp', driver='ESRI Shapefile')

In [19]:
df_traffic.shape

(50443, 52)

In [17]:
test = geopandas.read_file('traffic_long_raw_AADT_geometries.shp')

In [18]:
test.shape

(43226, 2)

In [20]:
test.head()

Unnamed: 0,CUR_AADT,geometry
0,4625,LINESTRING (-77.23110324422287 39.829723733983...
1,5195,LINESTRING (-79.98862809120706 40.430038789561...
2,4984,LINESTRING (-79.88159218560109 40.630976124424...
3,694,LINESTRING (-79.90853573333078 40.218606591694...
4,637,LINESTRING (-79.89861846554105 40.225389412298...
