### Data Collection Project
    by Elex Hill
    
This IPythonNotebook details out how I scraped a website, created a geodataframe, and then provides a visual representation of that data. 

In [1]:
import urllib2
import lxml
from lxml import html
import requests
import cssselect
import time
import json
import pandas
import geopandas
import shapely
import shapely.geometry
import fiona
import fiona.crs
import folium

url = "http://olympiawa.gov/city-services/parks/parks-and-trails.aspx"
req = urllib2.Request(url, headers={'User-Agent' : "Magic Browser"}) 
con = urllib2.urlopen( req )
doc_text = con.read()
doc = html.fromstring(doc_text)
doc.make_links_absolute(url)


### Scraping

Creating a list of lists describing the parks. The setup looks like this:

parks = [[name, addr, ammen], ...]


In [28]:
# Looping through the webpage to populate the parks dictionary
parks = [] #Initializing parks list
for row in doc.cssselect("div tr"):
    curr = row.text_content()
    if "(Closed)" not in curr: #Excludes parks that are closed
        curr = curr.strip()
        temp = curr.split("\r\n            ", 1)
        if len(temp) > 1: #Excludes the first tr that is the title of the table
            ammen = "" #Initialize ammenities string
            thing = row.cssselect("img")
            for img in thing:
                item = img.get("alt")
                if ammen == "":
                    ammen = item
                else: 
                    ammen = ammen + ", " + item
            if ammen == "": ammen = "None"
            info = [temp[0], temp[1], ammen]
            parks.append(info)
        
# Must fix weird section where two entries are in same tr
weirdI = parks[11]
weirdAd = weirdI[1]
temp = weirdAd.split("\r\n            ", 1)
temp1 = temp[1].strip().split("\r\n            ", 1)
weirdI[0] = temp1[0]
weirdI[1] = temp1[1]
parks[11] = weirdI

# Basic print of the information currently gathered 
for info in parks:
    print info[0] + ": " + info[1]
    print "   " + info[2]


ARTESIAN COMMONS: 415 4th Avenue E
   None
BIGELOW: 1220 Bigelow St NE
   Picnic Tables, Open Field, Restrooms, Playground, Swings, Picnic Shelter, Basketball, Public Art
BIGELOW SPRINGS: 930 Bigelow Ave NE
   None
BURRI: 2415 Burbank Ave NW
   Picnic Tables, Open Field, Swings, Basketball
COOPER CREST: 3600 20th Ave NW
   Hiking
DECATUR WOODS: 1015 Decatur Ave SE
   Picnic Tables, Open Field, Restrooms, Playground, Picnic Shelter, Public Art
EAST BAY WATERFRONT: 313 East Bay Dr NE
   Picnic Tables
EVERGREEN: 1445 Evergreen Pk Dr SW
   Picnic Tables, Open Field, Swings, Bocce Court
FRIENDLY GROVE: 2316 Friendly Grove Rd NE
   Picnic Tables, Open Field, Restrooms, Playground, Swings, Picnic Shelter, Basketball, Public Art, Tennis Court, Skate Park
GARFIELD NATURE TRAIL: 600 Rogers St NW
   Hiking
GRASS LAKE: 814 Kaiser Rd NW
   Hiking, Natural Area
HARRY FAIN'S LEGION: 1115 20th Ave SE
   Picnic Tables, Playground, Swings, Picnic Shelter
HERITAGE PARK FOUNTAIN: 330 5th Ave SW
   Public 

### Geocoding

Using the OpenCage API to add to the previous parks lists. We are adding latitude and longitude data to the parks list and then creating a GeoDataFrame and exporting it as a shapefile. 

In [29]:
# Defining the base of the url to call the api including the base and 
# a key specific to me
api_url = "http://api.opencagedata.com/geocode/v1/json?q=" 
api_key = "&key=9dd946671a985f8e17eebf022ff62123"

# Cycling through the parks dictionary and creating each unique api_url call,
# And then calling the api and pulling the first results latitude and longitude
# and adding those to the parks list, and then creating the geometry column
for info in parks:
    search = info[1] + ", Olympia, Washington" #Know info[1] is the address, and all of these parks in Olympia
    enc = urllib2.quote(search)
    enc_api = api_url + enc + api_key
    response = requests.get(enc_api)
    json_dict = json.loads(response.content)
    results = json_dict['results']
    geom = results.pop(0)
    coord = geom['geometry']
    info.append(coord['lat'])
    info.append(coord['lng'])
    time.sleep(1) # Delay to make 

# Creating dataframe of just the park
parks_df = pandas.DataFrame(parks, columns=['name', 'addr', 'ammen', 'lat', 'long'])

# Creating geometry column for GeoDataFrame
coord_list = zip(parks_df['long'], parks_df['lat'])
geom_list = [shapely.geometry.Point(CoordinateTuple) for CoordinateTuple in coord_list] #Taken from Geopandas/Folium class example
geom_gs = geopandas.GeoSeries(geom_list)

# Creating GeoDataFrame
parks_gdf = geopandas.GeoDataFrame(parks_df, geometry=geom_gs,)
parks_gdf.crs=fiona.crs.from_epsg(4326)

# Saving the file as a shapefile
parks_gdf.to_file('olympiaParks.shp')

parks_gdf.head()



Unnamed: 0,name,addr,ammen,lat,long,geometry
0,ARTESIAN COMMONS,415 4th Avenue E,,47.045099,-122.897637,POINT (-122.8976373 47.0450994)
1,BIGELOW,1220 Bigelow St NE,"Picnic Tables, Open Field, Restrooms, Playgrou...",47.03787,-122.9007,POINT (-122.9007 47.03787)
2,BIGELOW SPRINGS,930 Bigelow Ave NE,,47.050232,-122.890045,POINT (-122.8900453 47.0502321)
3,BURRI,2415 Burbank Ave NW,"Picnic Tables, Open Field, Swings, Basketball",47.065253,-122.930605,POINT (-122.9306053 47.0652526)
4,COOPER CREST,3600 20th Ave NW,Hiking,47.063176,-122.941918,POINT (-122.941918 47.0631764)


### Present

Taking the previous GeoDataFrame and using folium to create a visual representation of the data. 

In [40]:
# Creating the base of the map
center_lat = 47.0424
center_long = -122.8932
zoom = 13
parks_map = folium.Map(location=[center_lat, center_long], zoom_start=zoom, tiles="Stamen Terrain")

colors = ['#fcfc1b', '#c7d110', '#b2f727','#5fff02', '#56e802', '#4bcc02', '#3ead00', '#359101', '#2a7201', '#1f5400', '#163a00', '#1f3014']

# Adding the points to the map
# points = folium.features.GeoJson(parks_gdf.to_json())
for row in parks_gdf.itertuples():
    if row[3] == 'None': 
        icon = folium.Icon(color='white', icon_color=colors[0], icon = 'tree', prefix='fa')
    else:
        ammen = row[3]
        number = ammen.count(',') + 1
        icon = folium.Icon(color='white', icon_color=colors[number], icon = 'tree', prefix ='fa')
    popup = folium.Popup(html=row[1] + ': Ammenities offered: ' + row[3]) #Giving each point a popup
    folium.features.Marker(location=[row[4], row[5]], popup=popup, icon=icon).add_to(parks_map)

#parks_map.add_children(points)
parks_map