In [None]:
%load_ext sql

%sql mysql://prod:nerd@52.2.153.189/rental_nerd

result = %sql (SELECT \
properties.id as "property_id", \
property_transaction_logs.id as "transaction_log_id", \
properties.*, \
property_transaction_logs.* \
FROM \
properties, \
property_transactions, \
property_transaction_logs \
WHERE \
properties.id = property_transactions.property_id AND \
property_transactions.property_transaction_log_id = property_transaction_logs.id AND \
property_transactions.transaction_type = 'sales') 
        
data = result.DataFrame()

In [None]:
# imports
from time import gmtime, strftime
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
from mpl_toolkits.basemap import Basemap
import fiona
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import fiona
import shapely as shapely
from geopandas import GeoSeries, GeoDataFrame
from shapely.geometry import Point
from shapely.geometry import asShape
# this allows plots to appear directly in the notebook
%matplotlib inline

result.csv(filename=strftime("%Y%m%d")+ " sales.csv")
data.head()

In [None]:
# create a column of GeoSeries - each house should be represented by a point
pts = GeoSeries([Point(x, y) for x, y in zip(data['longitude'], data['latitude'])])
data['latlong'] = pts

In [None]:
shaped_neighborhood = ['None'] * len(data)
latlong = data['latlong'].values

#with fiona.open('data/Planning_Neighborhoods_4326/planning_hoods_4326.shp') as fiona_collection:
with fiona.open('data/Realtor_Neighborhoods_4326/hoods_4326.shp') as fiona_collection:
    for hood in fiona_collection:
        print "checking for listings in: " + hood["properties"]["nbrhood"]
        # Use Shapely to create the polygon
        shape = asShape( hood['geometry'] )

        for row in range(0,len(data)):
            point = latlong[row] # longitude, latitude

            if shaped_neighborhood[row] != 'None':
                continue 
                
            if shape.contains(point):
                #print `row` + ": Found " + data.address[row] + " in hood " + hood["properties"]["nbrhood"]
                shaped_neighborhood[row] = hood["properties"]["nbrhood"]  # for Planning Neighborhoods, "neighborho"

data['shaped_neighborhood'] = shaped_neighborhood
data.head()

In [None]:
# calculate distance to the nearest park

dist_to_park = [999999] * len(data)  # fill with dummy values to be filtered out
closest_park = ['None'] * len(data)

latlong = data['latlong'].values

with fiona.open('data/RPD_Parks_4326/parks_4326.shp') as park_collection:
    
    for park in park_collection:
        park_name = park["properties"]["map_park_n"]
        
        # Use Shapely to create the polygon
        shape = asShape( park['geometry'] )
        
        if shape.area < 0.0000005:
            print park_name + ' is too small at ' + `shape.area`
            continue
        
        print "checking for proximity to: " + park_name + ' with area: ' + `shape.area`

        for row in range(0,len(data)):
            point = latlong[row] # longitude, latitude
            
            dist = shape.distance(point)
            if dist < dist_to_park[row]:
                dist_to_park[row] = dist
                closest_park[row] = park_name
            

data['dist_to_park'] = dist_to_park
data['closest_park'] = closest_park
data.head()

In [None]:
# filter out any outliers, defined as rent >$10k or >2,500 sq ft, or not in SF

print "Entries before filter: " + `len(data)`
data = data[ (data.shaped_neighborhood != 'None')  
            & (data.sqft <= 2500) 
            & (data.price <= 2000000) 
            & (data.price != 0) 
            & (data.bedrooms <= 4) 
            & (data.bathrooms <= 3) 
            & (data.sqft != 0)
            & (np.isfinite(data.longitude))]


print "Entries after filter: " + `len(data)`

In [None]:
fig = plt.figure(figsize=(12,12))
ax = fig.add_subplot(111)

# Create the Basemap
event_map = Basemap(projection='merc', 
                    resolution='h', epsg=2227,
                    lat_0 = 37.7, lon_0=-122.4, # Map center 
                    llcrnrlon=-122.55, llcrnrlat=37.7, # Lower left corner
                    urcrnrlon=-122.35, urcrnrlat=37.85) # Upper right corner

# Draw important features
#event_map.arcgisimage(service='World_Shaded_Relief', xpixels = 800, verbose= True)

# add neighborhoods
#event_map.readshapefile(
#    'data/Realtor_Neighborhoods_4326/hoods_4326', 'SF', color='black', zorder=2)

# add parks
event_map.readshapefile(
    'data/RPD_Parks_4326/parks_4326', 'parks', color='none', zorder=2)

# fill in parks in green
patches   = []

for shape in event_map.parks:
    patches.append( Polygon(np.array(shape), True) )
        
ax.add_collection(PatchCollection(patches, facecolor= 'green', zorder=2))

# Draw the points on the map:
for row in data.latlong:
    x, y = event_map(row.x, row.y) # Convert lat, long to y,x
    # x2,y2 = event_map(park_pt.x, park_pt.y) 
    event_map.plot(x,y, 'ro', alpha=0.3)
    #event_map.drawgreatcircle(park_pt_x, park_pt.y, row.x, row.y, del_s=10, color='black')
    
plt.show()



