#AmeniDC
## The cost of amenities in the District of Columbia

In [55]:
import simplejson as json
import requests
from requests import Request, Session
from bs4 import BeautifulSoup
import re

Vfrom urlparse import urljoin
from collections import namedtuple
import sqlite3
import geojson
from geojson import Feature, Point, FeatureCollection
import pprint
pp = pprint.PrettyPrinter(indent=0)

#### Read in all API keys

In [42]:
with open("../secrets/google_secrets.json.nogit") as fh:
    secrets = json.loads(fh.read())
key = secrets['server_api_key']

#OpenData.DC html scraping for metadata

In [65]:
details_url = 'https://www.arcgis.com/sharing/rest/content/items/2acc75ccdd954267acecb8713b2b800a/info/metadata/metadata.xml?format=default&output=html'

resp = requests.get(details_url)
print resp.status_code
print resp.url
soup = BeautifulSoup(resp.text)


200
https://www.arcgis.com/sharing/rest/content/items/2acc75ccdd954267acecb8713b2b800a/info/metadata/metadata.xml?format=default&output=html


[]

In [132]:
con = sqlite3.connect('./property_sale_points.db')
con.execute("drop table if exists attributes")
con.execute("create table attributes (code text, codedesc text)")

tags = soup.find_all("em",text = re.compile('Attribute Label:'))
code_desc = []
for t in tags:
  dt_tag = t.find_parent("dl")
  if dt_tag:
    text_list = dt_tag.find_all(text=True)
    tmp_cd = [i for i in text_list if i!='\n' 
            and not re.search('Attribute Label:',i)
            and not re.search('Attribute Definition:',i) ]
    if len(tmp_cd) == 2:
      code_desc.append((tmp_cd[0],tmp_cd[1]))
    elif len(tmp_cd) == 1:
      pass
      code_desc.append((tmp_cd[0],''))
    else:
      pass
    
# con.rollback() is called after the with block finishes with an exception, the
# exception is still raised and must be caught
try:
  with con:
    con.executemany("insert into attributes values (?,?)", list(set(code_desc)))
except sqlite3.IntegrityError:
    print "couldn't add values to the table"


Object `with` not found.


#OpenData.DC API

Get the property sale points from http://opendata.dc.gov/datasets/2acc75ccdd954267acecb8713b2b800a_28
and store as a SQL database

In [5]:
search_url = 'http://opendata.dc.gov/datasets/2acc75ccdd954267acecb8713b2b800a_28.geojson?where=&geometry={"xmin":-8574493.39676032,"ymin":4708075.768737953,"xmax":-8571206.60454397,"ymax":4708792.365878146,"spatialReference":{"wkid":102100,"latestWkid":3857}}'

resp = requests.get(search_url)


In [6]:
resp.status_code

200

In [17]:
featcoll_sales = FeatureCollection(resp.json())

In [28]:
#for feat in featcoll_sales['features']['features']:
feat = featcoll_sales['features']['features'][0]

[-76.9993210638, 38.9073176001]


In [38]:
pp.pprint(feat['properties'])

{u'ABTLOTCODE': u' ',
u'ACCEPTCODE': u'MARKET',
u'ADDRESS1': u'5009 BRAMPTON PKWY',
u'ADDRESS2': u' ',
u'AMTDUE1': 8277.06,
u'AMTDUE2': 9228.93,
u'AMTDUE3': 9353.08,
u'ANNUALTAX': 16554.12,
u'ARN': u'328',
u'ASRNAME': u'ROBERT GONZALES',
u'ASSESSMENT': 1003280,
u'BASEBUILD': 191480,
u'BASELAND': 52020,
u'CAPCURR': 1003280,
u'CAPPROP': 1004220,
u'CAREOFNAME': u' ',
u'CITYSTZIP': u'ELLICOTT CITY, MD 21043-7424',
u'CLASS3': u' ',
u'CLASS3EX': 0,
u'CLASSTYPE': u' ',
u'COOPUNITS': 0,
u'CY1BAL': 8277.06,
u'CY1COLL': 0,
u'CY1CR': 0.0,
u'CY1FEE': 0,
u'CY1INT': 0,
u'CY1PEN': 0,
u'CY1TAX': 8277.06,
u'CY1TOTDUE': 8277.06,
u'CY1TXSALE': u' ',
u'CY1YEAR': u'2014 FIRST HALF',
u'CY2BAL': 0.0,
u'CY2COLL': 0,
u'CY2CR': 0,
u'CY2FEE': 0,
u'CY2INT': 0,
u'CY2PEN': 0,
u'CY2TAX': 0.0,
u'CY2TOTDUE': 0.0,
u'CY2TXSALE': u' ',
u'CY2YEAR': u' ',
u'DEEDDATE': u'2012-11-08T00:00:00.000Z',
u'DEEDSTATUS': u' ',
u'DELCODE': u'N',
u'DUEDATE1': u'20140331',
u'DUEDATE2': u'20140430',
u'DUEDATE3': u'20140530',
u'EXTRACTDA

In [None]:
con.execute("drop table if exists attr_descs")
con.execute("create table attr_descs (code text, codedesc varchar unique)")

attr_codes_codedescs = [('PROPTYPE',('USECODE','Property Use Code')]

# Successful, con.commit() is called automatically afterwards
with con:
    con.execute("insert into person(firstname) values (?)", ("Joe",))

# con.rollback() is called after the with block finishes with an exception, the
# exception is still raised and must be caught
try:
    with con:
        con.execute("insert into person(firstname) values (?)", ("Sarah",))
except sqlite3.IntegrityError:
    print "couldn't add Joe twice"

# OpenStreetMap API

Generate geospatial coordinates to query on the unit disk. Use openstreetmap API to reverse geocode coordinates.

In [None]:
base_lat = 38.904722
base_lng = -77.016389
rev_geocode_url = "http://nominatim.openstreetmap.org/reverse"
geocode_payload = {'format':'json','lat':base_lat,'lon':base_lng}
resp_address = requests.get(rev_geocode_url,params=geocode_payload)
resp_address.json()

# Google Maps Places API

Here, places are queried for nearby subway stations

In [None]:


search_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
# Add this here because key must go last
search_url += '?location='+str(base_lat)+','+str(base_lng)
search_payload = {"key":key,
                 "radius":1000,"types":"subway_station"}
req = Request('GET', search_url,params=search_payload)
prepared = req.prepare()
print prepared.url

session = Session()
resp = session.send(prepared)
resp_json = resp.json()
print resp.status_code
print json.dumps(resp_json,indent=4, separators=(',', ': '))

#Google Distance Matrix API

Compute distances to subway stations

In [None]:
# store results as list of dicts
results = resp_json['results'] 
Place = namedtuple('Place', ['place_id', 'name', 'lat','lng'], verbose=False)
stations = [Place(r['place_id'],r['name'],
                  r['geometry']['location']['lat'],
                  r['geometry']['location']['lng']) 
            for r in results]
for s in stations:
    print s.place_id, s.name, s.lat, s.lng


In [None]:
search_url = "https://maps.googleapis.com/maps/api/distancematrix/json"
# Add this here because key must go last
search_url += '?origins='+str(base_lat)+','+str(base_lng)
search_url += '&destinations='+str(stations[0].lat)+','+str(stations[0].lng)

print search_url


In [None]:
search_payload = {"key":key}
req = Request('GET', search_url,params=search_payload)
prepared = req.prepare()
print prepared.url

session = Session()
resp = session.send(prepared)
resp_json = resp.json()
print resp.status_code
print json.dumps(resp_json,indent=4, separators=(',', ': '))