# Scraping LADOT Volume Data from PDFs, Part 2

##### Where I Left Off
In the first python notebook, I described the process by which I was able to extract volume data from PDFs. At this point, the resulting data has been converted to .csv files, formatted for data analysis. 


In [7]:
### Setup
import csv
import glob
from datetime import datetime, date, time
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster
import os
import geopandas as gp

ImportError: DLL load failed: The specified module could not be found.

I'm going to start by loading and cleaning the tables provided by BOE. First there is the files table.

In [None]:
# Load traffic data files table
traffic_data_files_path = 'boe_tables/dbo_dot_traffic_data_files.csv'
dbo_dot_traffic_data_files = pd.read_csv(traffic_data_files_path,
                                         parse_dates=['UploadDT'],
                                         encoding="ISO-8859-1")

# Drop rows where TrafficID is NaN, convert TrafficID to int type
dbo_dot_traffic_data_files = dbo_dot_traffic_data_files.dropna(axis=0, how='any', subset=['TrafficID'])
dbo_dot_traffic_data_files['TrafficID'] = dbo_dot_traffic_data_files['TrafficID'].astype(int)

# Subset out Survey Data and Automatic Counts
dbo_dot_traffic_data_files = dbo_dot_traffic_data_files[(dbo_dot_traffic_data_files['TrafficType'] == 'manual_count')]

# See traffic data files head
print("There are " + str(len(dbo_dot_traffic_data_files)) + " records in the table.")
#dbo_dot_traffic_data_files.head()
dbo_dot_traffic_data_files[(dbo_dot_traffic_data_files['TrafficID'] == 1435)]

In [None]:
# Load traffic data table
traffic_data_path = 'boe_tables/dot_traffic_data.csv'
dot_traffic_data = pd.read_csv(traffic_data_path)

# Drop "ext" and "Shape" columns
dot_traffic_data = dot_traffic_data.drop(['ext','Shape'], axis=1)
#dot_traffic_data['IntersectionID'] = dot_traffic_data['IntersectionID'].astype(int)

# See traffic data head
dot_traffic_data.head()

In [None]:
# Combine both of the traffic data files
file_table = dbo_dot_traffic_data_files.merge(dot_traffic_data,
                                              how='left',
                                              left_on='TrafficID',
                                              right_on='TrafficID')
file_table.head()

In [None]:
# Import output data tables
manualcount_df = pd.read_csv('TrafficCountData/Results/manualcount.csv')
pedestrian_df = pd.read_csv('TrafficCountData/Results/pedestrian.csv')
peakvol_df = pd.read_csv('TrafficCountData/Results/peakvol.csv')
specveh_df = pd.read_csv('TrafficCountData/Results/SpecialVehicle.csv')
info_df = pd.read_csv('TrafficCountData/Results/info.csv')

info_df.head()

In [None]:
# Left join the traffic data table to the count info table
info_df_merge = info_df.merge(file_table, how='left', left_on = 'count_id', right_on = 'ID')

# Take a look at the length and head of the new table
print(len(info_df_merge))
info_df_merge

In [None]:
# Import BOE Intersection spatial data
boe_int = gp.GeoDataFrame.from_file('shp/Intersections.shp')
# Take a peek at the table contents
boe_int.head()

In [None]:
# Join BOE table to info table
info_df_merge = info_df_merge.merge(boe_int,
                                    how='left',
                                    left_on = 'IntersectionID',
                                    right_on = 'CL_NODE_ID')

print(len(info_df_merge))
info_df_merge

In [None]:
# Create new LA Basemap specifying map center, zoom level, and using Stamen Toner Tiles
count_map = folium.Map([34.109279, -118.266087],
                       tiles='Stamen Toner',
                       zoom_start=11)

# Subset out points with a NaN Coodinate values
info_df_merge = info_df_merge[pd.notnull(info_df_merge['LAT'])]
info_df_merge = info_df_merge[pd.notnull(info_df_merge['LON'])]

#marker_cluster = MarkerCluster().add_to(count_map)

locationlist = info_df_merge[['LAT','LON']].values.tolist()
labels = info_df_merge["DocName"].values.tolist()

print(len(labels))
print(len(locationlist))

# Loop through the midblock_points df, add each point and value for 'intersection' column to the map
#for point in range(len(locationlist)):
    #popup = folium.Popup(labels[point], parse_html=True)
    #folium.Marker(locationlist[point]
                  #,popup='test'
                 # 
                 #).add_to(count_map)
    #popup = folium.Popup(str(row['intersection']), parse_html=True)
    #popup issues. See here: https://github.com/python-visualization/folium/issues/726
    #folium.Marker([row['lat'], row['lon']]).add_to(marker_cluster)
for point in range(400):
    popup = folium.Popup(labels[point], parse_html=True)
    #popup = folium.Popup('hello')
    folium.Marker(locationlist[point], popup = popup).add_to(count_map)
# Show the map
count_map


In [None]:
import folium
print(folium.__file__)
print(folium.__version__)

from folium.plugins import MarkerCluster

In [None]:
#map2 = folium.Map(location=[38.9, -77.05], tiles='CartoDB dark_matter', zoom_start=11)

#marker_cluster = MarkerCluster().add_to(map2)

#for point in range(0, len(locationlist)):
#    folium.Marker(locationlist[point], popup=df_counters['Name'][point]).add_to(marker_cluster)
#map2

In [None]:
import pandas as pd

data = '{"id":{"0":411083201,"1":418513660,"2":528057543,"3":586713622,"4":647656728,"5":647656785,"6":1493456455,"7":1493456487,"8":2005894602,"9":2095344770,"10":2187987262,"11":2411692096,"12":2411698457,"13":2474058013,"14":2474058022,"15":2881830804,"16":2895323815,"17":2895323816,"18":2983919102,"19":3321734312,"20":3641568148,"21":4010355532,"22":4030622426,"23":4037746568,"24":4055719117,"25":4259001279,"26":4340535594,"27":4625994189,"28":4666687025},"Latitude":{"0":37.7673162,"1":37.7645003,"2":37.7682118,"3":37.7648492,"4":37.771672,"5":37.7721693,"6":37.7763591,"7":37.777046,"8":37.7690716,"9":37.766319,"10":37.7664253,"11":37.7770377,"12":37.7763106,"13":37.7739857,"14":37.774748,"15":37.775488,"16":37.7752166,"17":37.7759142,"18":37.7744588,"19":37.7760172,"20":37.7762395,"21":37.765011,"22":37.769195,"23":37.7750452,"24":37.7726713,"25":37.7717782,"26":37.7745253,"27":37.7768943,"28":37.7752822},"Longitude":{"0":-122.4219479,"1":-122.4216812,"2":-122.4223857,"3":-122.4320119,"4":-122.4331366,"5":-122.4307254,"6":-122.4180877,"7":-122.4172737,"8":-122.4277243,"9":-122.417422,"10":-122.4290387,"11":-122.4175698,"12":-122.4232558,"13":-122.424226,"14":-122.4226877,"15":-122.4159,"16":-122.4195185,"17":-122.4191912,"18":-122.4205881,"19":-122.4314951,"20":-122.4168763,"21":-122.4226685,"22":-122.4315398,"23":-122.4210561,"24":-122.4220741,"25":-122.4167145,"26":-122.4306476,"27":-122.4245402,"28":-122.4161381},"Cafe Name":{"0":"Four Barrel Coffee","1":"Muddy Waters","2":"Carlin\'s Cafe","3":"Peet\'s Coffee & Tea","4":"Nectar","5":"Cafe International","6":"Ma\'velous","7":"Starbucks","8":"Starbucks","9":"Flying Pig Bistro","10":"Church Street Cafe","11":"Anderson Bakery","12":"Blue Bottle Coffee","13":"mercury cafe","14":"gourmet and more","15":"Cumaica","16":"All Star Cafe","17":"Boston Cafe","18":"Javalencia Cafe","19":"Alamo Square Cafe","20":"Blue Bottle Coffee","21":"Stanza","22":"Duboce Park Cafe","23":"Eden Cafe","24":"Delessio Market & Bakery","25":"Gaslamp Cafe","26":"The Center SF Tea House","27":"Cafe la Vie","28":"Peet\'s Coffee"},"Street":{"0":"Valencia Street","1":"Valencia Street","2":"Valencia Street","3":"Market Street","4":"Haight Street","5":"Haight Street","6":"Market Street","7":"Market Street","8":"Market Street","9":"South Van Ness Avenue","10":null,"11":null,"12":null,"13":"Octavia Street","14":null,"15":"Mission Street","16":"Market Street","17":"Van Ness Avenue","18":"Market Street","19":null,"20":"Market Street","21":null,"22":"Sanchez Street","23":"Franklin Street","24":"Market Street","25":"Howard Street","26":"Fillmore Street","27":"Octavia Street","28":null}}'

df = pd.read_json(data)

location = df['Latitude'].mean(), df['Longitude'].mean()

import folium

#locationlist = df[["Latitude","Longitude"]].values.tolist()
#labels = df["Cafe Name"].values.tolist()

locationlist = info_df_merge[['LAT','LON']].values.tolist()
labels = info_df_merge["DocName"].values.tolist()

m = folium.Map(location=location, zoom_start=14)
for point in range(20):
    popup = folium.Popup(labels[point], parse_html=True)
    #popup = folium.Popup('hello')
    folium.Marker(locationlist[point], popup=popup).add_to(m)

m

