### Mapping Washington Wildlife and Recreation Program (WWRP) Funding
### Using Beautiful Soup and Folium

##### Corrine Armistead    -    Feb. 22nd 2018

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
# projects to scrape, pulled from project search, based on all WWRP funded projects from PRISM

filename = 'C:\\Users\\CorrineArmistead\\Desktop\\Armistead_WebScraping\\WWRPfunded.csv'
projectsdf = pd.read_csv(filename)

# Creating project list
proj_list = projectsdf['Project_ID'].values.tolist()

# New shortlist to test projects - 1349 total, testing with first 50
list50 = proj_list[:50]

list50[:5]

['17-1206', '17-1144', '16-2084', '16-2074', '16-2072']

In [3]:
# Scraping data

# Lists for each project attribute
proj_name = []
funding = []
description = []
location = []

# list1 data
for num in list50:
    url = 'https://secure.rco.wa.gov/PRISM/search/ProjectSnapshot.aspx?ProjectNumber='+ num
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html5lib")
    soup.prettify()
    
    # list for each attribute
    name = []
    fund = []
    desc = []
    loc = []
    
    # Extracting project attributes
    a = soup.find(class_="PRISMPageBanner")
    a2 = a.span.text
    b = soup.find(id="gdvProjectSnapshotFunding_lblProgramFundingAmt_0").get_text()
    c = soup.find(id="lblProjectDescription").get_text()
    
    # Adding to list
    name.append(a2)
    fund.append(b)
    desc.append(c)
    
    # Looking for the text "Coordinates" and find the text of the next tag
    for tr in soup.find_all(text="Coordinates: "):
        h = tr.parent
        gp = h.parent
        w = gp.find_next_sibling()
        w2 = w.find_next_sibling()
        l = w2.span.text
        loc.append(l)
    
    # Adding attributes to larger list
    proj_name = proj_name + name
    funding = funding + fund
    description = description + desc
    location = location + loc

In [4]:
# Creating a dataframe from lists of attributes
df = pd.DataFrame({'ProjectName':proj_name, 'WWRPfunding':funding, 'Description':description, 'Website':url, 'Coordinates':location})

df.head(5)

Unnamed: 0,Coordinates,Description,ProjectName,WWRPfunding,Website
0,47.13918637-123.05408818,Forterra proposes to acquire a conservation ea...,Little Skookum Inlet Forest,"$350,000",https://secure.rco.wa.gov/PRISM/search/Project...
1,45.81183445-122.36250787,This project will conserve approximately 360 a...,Rock Creek Forest,"$350,000",https://secure.rco.wa.gov/PRISM/search/Project...
2,48.35010170-120.09793250,The Town of Twisp in Okanogan County will use ...,Twisp Sports Complex Renovation - Ph 1,"$250,000",https://secure.rco.wa.gov/PRISM/search/Project...
3,47.81133944-122.38565635,The City of Edmonds will use this grant to rem...,Edmonds Waterfront Development,"$500,000",https://secure.rco.wa.gov/PRISM/search/Project...
4,47.60215090-118.38695031,The Department of Fish and Wildlife will use t...,Phantom Butte Grassland Restoration,"$65,000",https://secure.rco.wa.gov/PRISM/search/Project...


In [5]:
# viewing coordinates column
df[['Coordinates']].head(5)

Unnamed: 0,Coordinates
0,47.13918637-123.05408818
1,45.81183445-122.36250787
2,48.35010170-120.09793250
3,47.81133944-122.38565635
4,47.60215090-118.38695031


In [6]:
# adding a space in order to split coordinates
df.Coordinates = df.Coordinates.str.replace("-", ' -')

# Split delimited values in a DataFrame column into two new columns
df['lat'], df['long'] = zip(*df['Coordinates'].apply(lambda x: x.split(' ', 1)))


# Converting data types
df['lat'] = pd.to_numeric(df['lat'], errors='coerce')
df['long'] = pd.to_numeric(df['long'], errors='coerce')

# Testing
df[["lat", "long"]].head(5)

Unnamed: 0,lat,long
0,47.139186,-123.054088
1,45.811834,-122.362508
2,48.350102,-120.097932
3,47.811339,-122.385656
4,47.602151,-118.38695


In [7]:
# Importing additional packages for mapping

import shapely, shapely.geometry, fiona, fiona.crs, pandas, folium, geopandas

# Creating geometry from coordinates

coords = zip(df['long'], df['lat'])

geometry = [shapely.geometry.Point(c) for c in coords]

geolist = geopandas.GeoSeries(geometry)

geoPoints = geopandas.GeoDataFrame(
        df,
        geometry=geolist)

In [8]:
# Merging columns to create new popup column - need to revise to add html tags

df['popup'] = 'Project Name: ' + df['ProjectName'] + '   WWRP Funding: ' + df['WWRPfunding'] + '    Desc.: ' + df['Description']

In [9]:
# Point locations for adding to map
locations = df[['lat', 'long']]
locationlist = locations.values.tolist()

In [10]:
# Testing display of all points

map1 = folium.Map(location=[47, -120], zoom_start=6, tiles='Mapbox Bright')

labels = df["popup"].values.tolist()

for point in range(0, len(locationlist)):
    popup = folium.Popup(labels[point], parse_html=True)
    folium.Marker(locationlist[point], popup=popup).add_to(map1)

map1

In [11]:
# Map with clustering

from folium.plugins import MarkerCluster

map2 = folium.Map(location=[47, -120], zoom_start=6)

marker_cluster = MarkerCluster().add_to(map2)
labels = df["popup"].values.tolist()

for point in range(0, len(locationlist)):
    popup = folium.Popup(labels[point], parse_html=True)
    folium.Marker(locationlist[point], popup=popup).add_to(marker_cluster)
map2

In [14]:
# NOT USED
# Not needed but another way to create a list of points

df['lat'] = df['lat'].astype(float)
df['long'] = df['long'].astype(float)

df = df[['lat', 'long']]

dfmap = [[row['lat'],row['long']] for index, row in df.iterrows()]

In [12]:
# Heatmap example

import geopandas
from folium import plugins
from folium.plugins import HeatMap

map3 = folium.Map(location=[47, -120], zoom_start=6, tiles='Mapbox Bright')

HeatMap(locationlist).add_to(map3)

map3

### Now working on cloropleth map based on number of points in counties

In [13]:
# Importing shp file

filename2 = "C:\\Users\\CorrineArmistead\\Desktop\\Armistead_WebScraping\\WACountyBoundary\\WACountyBoundary.shp"

counties = geopandas.read_file(filename2)

In [14]:
# Testing polys with folium

map = folium.Map(location=[40, -95], zoom_start=4, tiles='Mapbox Bright')

# Adding an overlay of the 'world' data from above by converting to json
Overlay = folium.features.GeoJson(counties.to_json()).add_to(map)

map

In [15]:
# Setting coordinate system for points

print(geoPoints.crs)

geoPoints.crs = {'init' :'epsg:4326'}

print(geoPoints.crs)

None
{'init': 'epsg:4326'}


In [16]:
# Joining points to counties

projects_county = geopandas.sjoin(geoPoints, counties, how="inner", op='within')

# Testing
projects_county.head(5)

Unnamed: 0,Coordinates,Description,ProjectName,WWRPfunding,Website,lat,long,geometry,popup,index_right,countyfp,name
0,47.13918637 -123.05408818,Forterra proposes to acquire a conservation ea...,Little Skookum Inlet Forest,"$350,000",https://secure.rco.wa.gov/PRISM/search/Project...,47.139186,-123.054088,POINT (-123.05408818 47.13918637),Project Name: Little Skookum Inlet Forest WW...,22,45,Mason
11,47.43193239 -122.87802726,The Washington State Parks and Recreation Comm...,Inholdings and Adjacent Properties 2016,"$1,000,000",https://secure.rco.wa.gov/PRISM/search/Project...,47.431932,-122.878027,POINT (-122.87802726 47.43193239),Project Name: Inholdings and Adjacent Properti...,22,45,Mason
1,45.81183445 -122.36250787,This project will conserve approximately 360 a...,Rock Creek Forest,"$350,000",https://secure.rco.wa.gov/PRISM/search/Project...,45.811834,-122.362508,POINT (-122.36250787 45.81183445),Project Name: Rock Creek Forest WWRP Funding...,5,11,Clark
2,48.35010170 -120.09793250,The Town of Twisp in Okanogan County will use ...,Twisp Sports Complex Renovation - Ph 1,"$250,000",https://secure.rco.wa.gov/PRISM/search/Project...,48.350102,-120.097932,POINT (-120.0979325 48.3501017),Project Name: Twisp Sports Complex Renovation ...,23,47,Okanogan
3,47.81133944 -122.38565635,The City of Edmonds will use this grant to rem...,Edmonds Waterfront Development,"$500,000",https://secure.rco.wa.gov/PRISM/search/Project...,47.811339,-122.385656,POINT (-122.38565635 47.81133944),Project Name: Edmonds Waterfront Development ...,30,61,Snohomish


In [17]:
# Calculating the frequency of projects per county
frequency = pd.DataFrame(projects_county.name.value_counts().reset_index())
frequency.columns = ['name', 'count']

# merging dataframes
merged = pd.merge(counties, frequency, on='name')

# Testing
merged.head(5)

Unnamed: 0,countyfp,name,geometry,count
0,9,Clallam,POLYGON ((-123.5206495706549 48.22911230114539...,1
1,11,Clark,POLYGON ((-122.7706463427466 45.78183455754655...,1
2,17,Douglas,"POLYGON ((-119.2121964395016 47.8358178399146,...",3
3,19,Ferry,POLYGON ((-118.8365524086637 48.82697837586864...,1
4,29,Island,POLYGON ((-122.4250858169752 48.27525832237854...,3


In [19]:
# Choropleth Map - # of projects funded per county

map4 = folium.Map(location=[47.5, -120], zoom_start=6, tiles='Mapbox Bright')

map4.choropleth(geo_data=merged, data=merged,
                columns=['name', 'count'],
                threshold_scale=[0, 2, 4, 6, 8, 10],
                key_on='feature.properties.name',
                fill_color='YlGn', fill_opacity=0.8, line_opacity=0.2,
                legend_name='# of projects funded')
map4


In [None]:
# Choropleth map - $ of funding by county

map = folium.Map(location=[40, -95], zoom_start=4, tiles='Mapbox Bright')
