In [33]:
import numpy as np
import math
import glob
import pandas as pd
import folium
from folium import plugins
import json
import datetime
import branca
# These lines set up the plotting functionality and formatting.
# import matplotlib
# matplotlib.use('Agg', warn=False)
# %matplotlib inline
# import matplotlib.pyplot as plots
# plots.style.use('fivethirtyeight')
# import warnings
# warnings.simplefilter(action="ignore", category=FutureWarning)


# 1. Preparing the Data

The repository https://opendata.charlottesville.org/ offers data on various utilities in the City of Charlottesville. The Property subsection itself has various datasets. Thus, it is important to first obtain only portions of each table that are relevant.

## 1.1 Reading/Modifying Sales Table

The Sales Table contains data about sales of particular parcels. It contains their addresses, date of sale, and amount of sale. Firstly, the Street Number and Street Address are combined for ease of use later on. 

In [2]:
sales = pd.read_csv('Data/Real_Estate__Sales_.csv')
sales['Combo'] = sales['StreetNumber'] + ' '+sales['StreetName'] 

A function is written to modify the SaleDate column of the dataframe. The time portion is removed since none of the dates appear to specify an actual time. The backslashes are replaced with dashes to better coincide with numpy's date objects.

In [3]:
def formatDate(row):  
    date = row['SaleDate']
    return date[:10].replace('/','-')

sales = sales[~pd.isna(sales.SaleDate)]
sales = sales.assign(SaleDate= sales.apply(formatDate,axis=1))

## 1.2 Merging Sales with Residential

The Residential datasheet contains a list of all the residential parcels of the area. This is important to filter out the non-residential parcels located in the Sales dataset. Merge the Sales table with the Residential datasheet. The join is done using the ParcelNumber from each dataframe.

In [4]:
# Filter out non-residential parcels
resid = pd.read_csv('Data/Real_Estate__Residential_Details_.csv',header=0)

salesResid = pd.merge(sales,resid.ParcelNumber,on="ParcelNumber")

## 1.3 Merging with Geocoded Addresses

The addresses in the table were then geocoded (found latitude/longitude coordinates using addresses) and located in a .csv file called 'coordinates.csv'. This was merged with the combined Residential Sales table. geoSalesResid is the table that will be used for subsequent sections as the proper changes have been made

In [5]:
geocoded = pd.read_csv('coordinates.csv')

geoSalesResid = pd.merge(salesResid,geocoded,left_on="Combo",right_on="ADDRESS")
geoSalesResid = geoSalesResid[geoSalesResid.SaleAmount > 100].drop(columns=["StreetName","StreetNumber","Combo"])
geoSalesResid

Unnamed: 0,RecordID_Int,ParcelNumber,SaleDate,SaleAmount,Unit,ADDRESS,LATITUDE,LONGITUDE
3,64,010008000,1984-08-31,175000,,0 BARRACKS RD,38.055077,-78.500401
5,227,020001000,1984-08-31,175000,,0 BARRACKS RD,38.055077,-78.500401
9,230,020002000,1984-08-31,175000,,0 BARRACKS RD,38.055077,-78.500401
10,230,020002000,1984-08-31,175000,,0 BARRACKS RD,38.055077,-78.500401
13,62,010006000,2003-01-31,545000,,2028 BARRACKS RD,38.050765,-78.498080
...,...,...,...,...,...,...,...,...
52770,56275,610318000,2001-07-20,89900,,110 MILFORD TER,38.018268,-78.470688
52771,56276,610318000,1999-02-18,93000,,110 MILFORD TER,38.018268,-78.470688
52773,56278,610318000,1999-10-21,81500,,110 MILFORD TER,38.018268,-78.470688
52774,56279,610318000,2017-03-07,148000,,110 MILFORD TER,38.018268,-78.470688


# 2. Finding Boundaries

The goal of this section is to recognize regions (neighborhoods) in Charlottesville that share similar housing prices. recentSales below represents non-zero sales that have occured after a certain date.

## 2.1 Introduction

In [6]:
def makeRecentSales(cutoffDate):
    return geoSalesResid[geoSalesResid.SaleDate > cutoffDate]

recentSales = makeRecentSales('2018-12-31')

The chooseColor function defines the sale cutoffs that will be used on the map. The makeMarker will take each row of recentSales and produce a marker based on the SaleAmount

In [7]:
def chooseColorSale(sale):
    if sale < 89000:
        return '#fef0d9'
    elif 89000 <= sale < 150000:
        return '#fdd49e'
    elif 150000 <= sale < 400000:
        return '#fdbb84'
    elif 400000 <= sale < 800000:
        return '#fc8d59'
    elif 800000 <= sale < 1000000:
        return '#e34a33'
    else:
        return '#b30000'

def addMarkersSale(row,currmap):
    line = row["SaleDate"][:4] + ": $" + str(row['SaleAmount'])
    folium.CircleMarker(location=[row['LATITUDE'],row['LONGITUDE']],
                  popup=line,radius=1.5,color=chooseColorSale(row['SaleAmount']),
                       fill_color=chooseColorSale(row['SaleAmount'])).add_to(currmap)
    return

A Folium map is created, centered on Charlottesville. Before regions can be created, the map should first be viewed with recent sales to determine any patterns. For speed, a map is created with the sales that have occured from 2019 onwards.

In [8]:
def generateSalesMap():
    m1 = folium.Map(location=[38.0293, -78.4767], zoom_start=13)
    recentSales.apply(addMarkersSale,axis=1,args=(m1,))
    return m1
generateSalesMap()

From the above map, it can be seen that there are exist regions that share SaleAmounts of similar magnitude. The webtool https://geojson.io/ offers a GUI-based method of producing JSON polygons for a map. The Folium package has a method of reading in these json files as a list of lists that represent the polygon by its vertices. The pandas read_json function gives a dataframe of this information, and is then removed of unnecessary information.

In [9]:
lay = json.load(open('regions.geojson'))
arr = []
for i in np.arange(len(lay['features'])):
    arr.append(lay['features'][i]['geometry']['coordinates'][0])
lay = pd.Series(arr)

## 2.2 Point in Polygon Algorithm

With the imported Json polygon layers, each point in the previous section should be grouped with each polygon. This is a point in polygon problem: determine whether or not a point is in a polygon defined by its vertices. 

A way to solve this problem is with ray-casting. Take a point and extend it "infinitely" along an arbitrary direction (it is now a ray). Count the number of times the point intersects a side of the polygon. If the number of ray intersections is even, then it is not in the polygon. If the number of ray intersections is odd, then it is in the polygon. 

![Solving PIP with Ray-casting](https://upload.wikimedia.org/wikipedia/commons/c/c9/RecursiveEvenPolygon.svg)


The first issue is, given two pairs of points (that define two line segments), determine if they intersect or not. The solution is to check the orientation of these points (intersecting line segments should have their respective points in between one point from the other line segment). A algorithm written by https://kite.com/python/answers/how-to-check-if-two-line-segments-intersect-in-python is used. The main function below is intersects(s1,s2) and it takes in two tuples.

In [10]:
def on_segment(p,q,r):
    if r[0] <= max(p[0], q[0]) and r[0] >= min(p[0], q[0]) and r[1] <= max(p[1], q[1]) and r[1] >= min(p[1], q[1]):
        return True
    else: 
        return False

def orientation(p,q,r):
    val = ((q[1] - p[1]) * (r[0] - q[0])) - ((q[0] - p[0]) * (r[1] - q[1]))
    if val == 0:
        return 0
    elif val > 0:
        return 1
    else:
        return -1

def intersects(s1,s2):
    p1,q1 =s1
    p2,q2 = s2
    
    o1 = orientation(p1,q1,p2)
    o2 = orientation(p1,q1,q2)
    o3 = orientation(p2,q2,p1)    
    o4 = orientation(p2,q2,q1)
    
    if o1 != o2 and o3 != o4:
        return True
    if o1 == 0 and on_segment(p1,q1,p2):
        return True
    if o2 == 0 and on_segment(p1, q1, q2):
        return True
    if o3 == 0 and on_segment(p2, q2, p1):
        return True
    if o4 == 0 and on_segment(p2, q2, q1):
        return True
    return False


Now the point in polygon algorithm can be applied for this scenario. The main function is pointinpolygons, which will be applied across the entire table with each row (point) as its argument. It will return a series of booleans showing whether or not the point is in each of the polygons in the Json file.

The makeray function receives a point and returns a sequence with two tuples inside that represent the ray. For this, each point is extended horizontally. Additionally, since all points reside in Charlottesville, the left/right longitude are the west-most/east-most points of the area, respectively. 

The pointinpolygon function is the individual function that is applied inside pointinpolygons. It creates a list of tuples that contain the line segments of the polygon. It is assumed that the Json file is configured so that the points can be connected from smallest to biggest (index 0 to index 1, index 1 to index 2, etc) until the very last which is connected back to index 0. It then counts the number of intersections as described above about the ray-casting. 

In [11]:
# defines a point, returns a series of whether or not point is in each JSON polygon
def pointinpolygons(row,layout):
    pt = (row['LATITUDE'],row['LONGITUDE'])
    rayseg = makeray(pt)
    
    foundin = layout.apply(pointinpoly,args=(rayseg,))
    if foundin[foundin].shape[0] == 0:
        return -1
    else:
        return foundin[foundin].index[0]
    

# produces a ray by extending a point horizontally depending on its longitude relative to Cville
def makeray(point):
    # Bounds for Longitude 
    leftb = -78.647930
    rightb = -78.411250
    midb = leftb + (rightb-leftb)/2

    if point[1] > midb:
        return (point,(point[0],leftb)) 
    else:
        return (point,(point[0],rightb))
    
# determines if the point is in or not in a single JSON polygon
def pointinpoly(polygons,raysegment):
    # Makes pairs of indices to form polygon edges (assumes that edges are defined as) from a polygon that has n sides
    # 0 to 1, 1 to 2, 2 to 3, etc... until the last index which would be n-1 to 0
    combo = []
    for i in np.arange(len(polygons)):
        if i < len(polygons) - 1:
            apair = (i,i+1)
        else:
            apair = (i,0)
        combo.append(apair)
    
    # Iterates through the list of pairs and indexes into the polygons variable which is a list of lists. It produces a 
    # a segment represented by two points and it is determined whether it intersects with the address.
    intersections = 0 
    for pair in combo:
        segment1 = ((polygons[pair[0]][1],polygons[pair[0]][0]), (polygons[pair[1]][1],polygons[pair[1]][0]))
        if intersects(segment1,raysegment):
            intersections +=1
            
    if intersections % 2 == 0:
        return False
    else:
        return True



The function is used to insert a column that assigns each point to its polygon region. If it is not in any of the polygons, then it is assigned -1. 

In [12]:
recentSales.insert(recentSales.shape[1],'Region',recentSales.apply(pointinpolygons,axis=1,args=(lay,)))

In [13]:
recentSales[recentSales.Region == -1]

Unnamed: 0,RecordID_Int,ParcelNumber,SaleDate,SaleAmount,Unit,ADDRESS,LATITUDE,LONGITUDE,Region
3863,4306,70026000,2019-12-20,766500,,0 IVY RD,38.045941,-78.516238,-1
6634,7377,170018400,2019-05-30,963418,,0 STADIUM RD,38.031103,-78.513688,-1
6640,7383,170018500,2019-05-30,963418,,0 STADIUM RD,38.031103,-78.513688,-1
6642,7385,170018600,2019-05-30,963418,,0 STADIUM RD,38.031103,-78.513688,-1
10438,11136,200259260,2019-02-06,600000,,101 KEENE CT,42.933692,-72.278141,-1
10447,11145,200259270,2019-02-06,600000,,103 KEENE CT,42.933692,-72.278141,-1
10452,11150,200259280,2019-02-06,600000,,105 KEENE CT,42.933692,-72.278141,-1
10456,11154,200259290,2019-02-06,600000,,107 KEENE CT,42.933692,-72.278141,-1
10462,11160,200259301,2019-02-06,600000,,109 KEENE CT,41.603221,-73.087749,-1
10469,11167,200259310,2019-02-06,600000,,108 KEENE CT,41.603221,-73.087749,-1


To verify the accuracy of the algorithm, the same table is used to create another map with the Json layers. Filtering out those without a region, there does not seem to be a point that lies outside the polygons. 

In [14]:
m2 = folium.Map(location=[38.0293, -78.4767], zoom_start=13)
testTab = recentSales[recentSales.Region>=-10].apply(addMarkersSale,axis=1,args=(m2,))
folium.GeoJson("regions.geojson", name='area').add_to(m2)
m2

## 2.3 Grouping the points

The pandas group function can be used to group all the points by region and determine the average sale amount within that region.

In [15]:
salesByRegion = recentSales.groupby(['Region']).mean().loc[:,"SaleAmount"][1:]
salesByRegion

Region
0     726663.500000
1     555646.567164
2     301864.511111
3     419690.450980
4     452944.389831
5     226035.066667
6     318778.543956
7     544918.114035
8     362383.852941
9     507147.279570
10    592000.000000
11    105000.000000
12    411716.850000
13    534903.600000
14    734040.090909
15    595328.205128
16    267457.722222
17    324666.666667
18    644990.759494
Name: SaleAmount, dtype: float64

A style function tells Folium how to format the JSON layers onto the map.

In [16]:
def stylefunction(x):  
    sale=x['properties']['SaleAverage']
    if sale < 89000:
        color = '#fef0d9'
    elif 89000<= sale < 150000:
        color = '#fdd49e'
    elif 150000 <= sale < 400000:
        color = '#fdbb84'
    elif 400000 <= sale < 800000:
        color = '#fc8d59'
    elif 800000 <= sale < 1000000:
        color = '#e34a33'
    else:
        color = '#b30000'
    return {'weight': 5, 'color': color,'fill': True, 'fillOpacity':0.5}

The generateMap function will do the following:

1. Use pointinpolygons function on entire salesTable, producing a series categorizing each parcel into a JSON region
2. Insert that series into the salesTable (points in no regions are given "-1")
3. Group the salesTable by the regions categorized and determine the mean
4. Load JSON file and update each layer with the calculated average
5. Create a Folium Map
6. Add the JSON regions and markers to map
7. Add the LayerControl to map

In [17]:
def generateMap(salesTable,lay):
    # insert a series, each parcel classified to a JSON region if possible
    salesTable.insert(salesTable.shape[1],'Region',salesTable.apply(pointinpolygons,axis=1,args=(lay,)))
    
    # group by json regions and find average sale amount in each region, ignoring the first since that is for "no regions"
    salesByRegion = salesTable.groupby(['Region']).mean().loc[:,"SaleAmount"]
    salesByRegion = pd.DataFrame({'Regions': pd.Series(data=np.arange(1,20)), 'SaleAmount': salesByRegion}).fillna(value=0)
    
    # load json file, and add the average as a property to each json region
    with open ("regions.geojson") as f:
        regionlayer = json.load(f)
    
    finalMap = folium.Map(location=[38.0293, -78.4767], zoom_start=13)
    folium.Choropleth(geo_data=regionlayer,name='Overlay',data=salesByRegion,columns=['Regions','SaleAmount'],
                    key_on='feature.properties.OBJECTID',fill_color='YlOrRd',legend_name='Sale Amount ($)').add_to(finalMap)
    
#     count = 0
#     for i in regionlayer['features']:
#         try:
#             i['properties']['SaleAverage'] = salesByRegion[count]
#         except:
#             i['properties']['SaleAverage'] = 0
#         count += 1

#     # produce a map
#     finalMap = folium.Map(location=[38.0293, -78.4767], zoom_start=13)
    
#     # add the json regions
#     folium.GeoJson(regionlayer, name='Regions',style_function=stylefunction).add_to(finalMap)
    
#     # add the markers
#     salesTable.apply(addMarkers,axis=1,args=(finalMap,))
    folium.LayerControl().add_to(finalMap)
#     finalMap.save("20years.html")
    return finalMap

generateMap(makeRecentSales('2020-01-01'),lay)

# 3. Sales History Aggregation

The section below observes which parcels have had multiple sales throughout the years.

In [18]:
# grpbysale = geoSalesResid.groupby(['ParcelNumber']).count().iloc[:,0]

# multisales = grpbysale[grpbysale > 1]
# totalParcelsNum = grpbysale.shape[0]
# multiParcelsNum = multisales.shape[0]
# multisales.sort_values(ascending=False)

A couple of functions are written to provide different ways of grouping the table and produce a sales history depending on conditions.
- "normal": used to clump up all listed sales under a possible parcel as a dictionary. 
- "byYear": adds a condition of only including sales after a specified year (set as a default parameter). 

In [19]:
def normal(parcel):
    temp = {}
    if parcel.shape[0] == 1:
        saledate = parcel.iloc[0,2]
        temp[saledate] = parcel.iloc[0,3]
    else:
        for i in np.arange(parcel.shape[0]):
            saledate = parcel.iloc[i,2]
            temp[saledate] = parcel.iloc[i,3]
    return temp

def byYear(parcel,year=2000):
    temp = {}
    if parcel.shape[0] == 1:
        saledate = parcel.iloc[0,2]
        if int(saledate[:4]) >= year:
            temp[saledate] = parcel.iloc[0,3]
    else:
        for i in np.arange(parcel.shape[0]):
            saledate = parcel.iloc[i,2]           
            if int(saledate[:4]) >= year:
                temp[saledate] = parcel.iloc[i,3]
    return temp

The "getSalesHistory" function will return a dictionary of the sales in the table depending on the desired form of filtering (see above) list of functions. Multi parameter means only parcels with multiple sale histories are returned.

In [20]:
def getSalesHistory(table, norm=True, multi=True):
    grouped = table.groupby('ParcelNumber')
    if norm:
        history = grouped.apply(normal).to_dict()
    else:
        temp = grouped.apply(byYear).to_dict()
        history = {}
        for parcel in temp:
            if len(temp[parcel]) > 1:
                history[parcel] = temp[parcel]
            elif not multi and len(temp[parcel]) == 1:
                history[parcel] = temp[parcel]
    return history

Below, a sale history dictionary is created. Norm is set to false so that only sales occuring after the year 1999 are included. Multi is set to True so that only parcels with more than one sale as well are included in addition to the previous condition. In short, these two conditions mean that parcels in the dictionary will:

- Each sale will be from Jan 1, 2000 and beyond
- Every parcel history will have two or more sales

In [21]:
saleHistory = getSalesHistory(geoSalesResid,norm=False)
saleHistory

{'010017000': {'2017-06-08': 636000, '2018-10-19': 1030000},
 '010017100': {'2000-09-14': 459500, '2005-11-10': 654750},
 '010019000': {'2002-05-23': 85000,
  '2006-07-07': 479500,
  '2003-03-03': 360000,
  '2001-11-01': 70000,
  '2008-08-07': 549000},
 '010020000': {'2010-08-04': 962500, '2001-01-24': 275000},
 '010024A00': {'2002-06-12': 125000,
  '2014-06-30': 940000,
  '2007-08-29': 895000},
 '010027000': {'2008-08-04': 483000, '2017-12-08': 695000},
 '010031000': {'2003-06-06': 425000, '2008-12-15': 545000},
 '010034000': {'2018-11-15': 430000, '2019-02-15': 470000},
 '010036000': {'2014-03-21': 695940, '2010-11-01': 389000},
 '010037000': {'2016-05-05': 740000, '2001-07-02': 345000},
 '010038000': {'2018-04-09': 685000, '2019-03-29': 1375000},
 '010039000': {'2007-07-13': 520000, '2007-05-25': 520000},
 '010041000': {'2004-03-03': 530000, '2019-06-06': 1350000},
 '010043000': {'2020-01-07': 680000,
  '2018-07-17': 660000,
  '2011-05-25': 315000,
  '2015-05-06': 590000},
 '0100450

Below are a collection of functions to determine properties of sale histories. The first, strictincr, determines if a given parcel's sale history has been strictly increasing. This means that, sorting sale amount from earliest to latest, the sale amounts will be increasing. 

The second, finalincr, determines if a given parcel's sale history has gone up via the first and last sales. This will return true as long as the latest sale is larger than the earliest.

In [22]:
def strictincr(saleHistory):
    strictinc = {}
    for parcel in saleHistory:
        temp = pd.DataFrame.from_dict(saleHistory[parcel],orient='index')
        temp = temp.sort_index(ascending=True)
        curr = 0
        datesOrd = temp.index.tolist()
        strictinc[parcel] = True
        for saleday in datesOrd:
            if curr < saleHistory[parcel][saleday]:
                curr =  saleHistory[parcel][saleday]
            else:
                strictinc[parcel] = False
                break
    return strictinc

def finalincr(saleHistory):
    finalinc = {}
    for parcel in saleHistory:
        temp = pd.DataFrame.from_dict(saleHistory[parcel],orient='index')
        temp = temp.sort_index(ascending=True)
        
        datesOrd = temp.index.tolist()
        finalinc[parcel]= True
        firstsale = saleHistory[parcel][datesOrd[0]]
        lastsale = saleHistory[parcel][datesOrd[len(datesOrd)-1]]
        if firstsale > lastsale:
            finalinc[parcel] = False
    return finalinc

In [23]:
strictTab = pd.DataFrame.from_dict(strictincr(saleHistory),orient='index',columns=['strictincr'])
finalTab = pd.DataFrame.from_dict(finalincr(saleHistory),orient='index',columns=['finalincr'])

In [24]:
print("Proportion of Parcels that increased in price every subsequent sale: "+ str(round(strictTab.sum()[0]/len(saleHistory),5)))
print("Proportion of Parcels that increased in price from first to last sale: "+ str(round(finalTab.sum()[0]/len(saleHistory),5)))

Proportion of Parcels that increased in price every subsequent sale: 0.62527
Proportion of Parcels that increased in price from first to last sale: 0.78365


In [25]:
def incrPercent(row,saleHistory):
    temp = pd.DataFrame.from_dict(saleHistory[row.name],orient='index')
    temp = temp.sort_index(ascending=True)
    datesOrd = temp.index.tolist()
    firstsale = saleHistory[row.name][datesOrd[0]]
    lastsale = saleHistory[row.name][datesOrd[len(datesOrd)-1]]
    val = (lastsale-firstsale)/firstsale * 100
    return val

def incrPercentDay(row,saleHistory):
    temp = pd.DataFrame.from_dict(saleHistory[row.name],orient='index')
    temp = temp.sort_index(ascending=True)
    datesOrd = temp.index.tolist()
    
    firstdate = datetime.datetime.strptime(datesOrd[0], "%Y-%m-%d").date()
    lastdate = datetime.datetime.strptime(datesOrd[len(datesOrd)-1], "%Y-%m-%d").date()
    
    diff = (lastdate-firstdate).days
    firstsale = saleHistory[row.name][datesOrd[0]]
    lastsale = saleHistory[row.name][datesOrd[len(datesOrd)-1]]      
    return (lastsale-firstsale)/firstsale * 100/diff

In [26]:
percentTab = pd.DataFrame(finalTab[finalTab.finalincr])
percentTab.insert(percentTab.shape[1],'perc',finalTab[finalTab.finalincr].apply(incrPercent,axis=1,args=(saleHistory,)))
percentTab.insert(percentTab.shape[1],'percNorm',finalTab[finalTab.finalincr].apply(incrPercentDay,axis=1,args=(saleHistory,)))

In [27]:
geoSalesResidPercent = pd.merge(geoSalesResid,percentTab,left_on='ParcelNumber', right_index=True).drop_duplicates('ParcelNumber')
geoSalesResidPercent

Unnamed: 0,RecordID_Int,ParcelNumber,SaleDate,SaleAmount,Unit,ADDRESS,LATITUDE,LONGITUDE,finalincr,perc,percNorm
35,86,010017000,2017-06-08,636000,,1893 WESTVIEW RD,38.050093,-78.499944,True,61.949686,0.124397
42,93,010017100,2000-09-14,459500,,1895 WESTVIEW RD,38.050473,-78.499333,True,42.491839,0.022566
47,99,010019000,2002-05-23,85000,,1890 WESTVIEW RD,38.050086,-78.500280,True,684.285714,0.276927
54,106,010020000,2010-08-04,962500,,1888 WESTVIEW RD,38.049628,-78.500349,True,250.000000,0.071860
73,125,010024A00,2002-06-12,125000,,1876 WESTVIEW RD,38.048490,-78.499412,True,652.000000,0.148148
...,...,...,...,...,...,...,...,...,...,...,...
52746,56251,610313000,2001-03-23,83000,,100 MILFORD TER,38.018602,-78.470619,True,116.746988,0.074790
52751,56256,610314000,2002-07-31,117900,,102 MILFORD TER,38.018528,-78.470644,True,6.022053,0.001819
52756,56261,610315000,2005-11-01,174900,,104 MILFORD TER,38.018473,-78.470647,True,6.346484,0.007005
52764,56269,610317000,2009-12-14,102128,,108 MILFORD TER,38.018362,-78.470659,True,19.947517,0.189976


In [54]:
def chooseColorPercNorm(sale):
    if sale < 0.01:
        return '#fef0d9'
    elif 0.01 <= sale < 0.05:
        return '#fdd49e'
    elif 0.05 <= sale < 0.10:
        return '#fdbb84'
    elif 0.10 <= sale < 0.25:
        return '#fc8d59'
    elif 0.25 <= sale < 0.5:
        return '#e34a33'
    else:
        return '#b30000'
    
def chooseColorPerc(sale):
    if sale < 5:
        return '#fef0d9'
    elif 5 <= sale < 10:
        return '#fdd49e'
    elif 20 <= sale < 50:
        return '#fdbb84'
    elif 50 <= sale < 100:
        return '#fc8d59'
    elif 100 <= sale < 200:
        return '#e34a33'
    else:
        return '#b30000'

def addMarkersPerc(row,currmap,norm,group):

    # not normalized to day (perc)
    if not norm:
        line = str(row['perc'])
        marker = folium.CircleMarker(location=[row['LATITUDE'],row['LONGITUDE']],
                      popup=line,radius=1.5,color=chooseColorPerc(row['perc']),
                           fill_color=chooseColorPerc(row['perc']))
    # normalized to day (percNorm)
    else:
        line = str(row['percNorm'])
        marker = folium.CircleMarker(location=[row['LATITUDE'],row['LONGITUDE']],
              popup=line,radius=1.5,color=chooseColorPercNorm(row['percNorm']),
                   fill_color=chooseColorPercNorm(row['percNorm']))
    group.add_child(marker)
    return




In [58]:

def generateMapPercent(table,lay):
    # insert a series, each parcel classified to a JSON region if possible
    table.insert(table.shape[1],'Region',table.apply(pointinpolygons,axis=1,args=(lay,)))
    
    # load json file, and add the average as a property to each json region
    with open ("regions.geojson") as f:
        regionlayer = json.load(f)    
     
    finalMap = folium.Map(location=[38.0293, -78.4767], zoom_start=13)

    # group by json regions and find average sale amount in each region, ignoring the first since that is for "no regions"
    percByReg = table.groupby(['Region']).mean().loc[:,"perc"]
    percByReg = pd.DataFrame({'Regions': pd.Series(data=np.arange(1,lay.shape[0]+1)), 'perc': percByReg}).fillna(value=0)


    folium.Choropleth(geo_data=regionlayer,name='Percent Incr',data=percByReg,columns=['Regions','perc'],
                      key_on='feature.properties.OBJECTID',fill_color='YlOrRd',
                      legend_name='Percent Increase').add_to(finalMap)
    
    percNByReg = table.groupby(['Region']).mean().loc[:,"percNorm"]
    percNByReg = pd.DataFrame({'Regions': pd.Series(data=np.arange(1,lay.shape[0]+1)), 'percNorm': percNByReg}).fillna(value=0)
    folium.Choropleth(geo_data=regionlayer,name='Percent Incr/Day',data=percNByReg,columns=['Regions','percNorm'],
                      key_on='feature.properties.OBJECTID',fill_color='YlOrRd',
                      legend_name='Percent Increase/Days',show=False).add_to(finalMap)
  

    
    percMarkers = folium.FeatureGroup(name='Perc Incr Markers')
    table.apply(addMarkersPerc,axis=1,args=(finalMap,False,percMarkers,))
    finalMap.add_child(percMarkers)
    percMarkersNorm = folium.FeatureGroup(name='Perc Incr/Day Markers', show=False)
    table.apply(addMarkersPerc,axis=1,args=(finalMap,True,percMarkersNorm,))
    finalMap.add_child(percMarkersNorm)  
    
    folium.LayerControl().add_to(finalMap)

    
#     count = 0
#     for i in regionlayer['features']:
#         i['properties']['percNorm'] = percentByRegion[count]
#         count += 1

#     # produce a map
#     tempmap = folium.Map(location=[38.0293, -78.4767], zoom_start=13)
    
#     # add the json regions
#     folium.GeoJson(regionlayer, name='Regions',style_function=stylefunctionPercent).add_to(tempmap)
    
#     # add the markers
#     table.apply(addMarkersPerc,axis=1,args=(tempmap,))
#     folium.LayerControl().add_to(tempmap)
    finalMap.save("percent.html")
    return 

generateMapPercent(geoSalesResidPercent.copy(),lay)
#generateMapPercent(geoSalesResidPercent,lay)
#generateMap(makeRecentSales('2020-01-01'),lay)

In [29]:
# def makeMarker(row,salesDict):
#     parcelnumber = row['ParcelNumber']
#     sales = 'Sales: \n'
#     salesRecord = salesDict[parcelnumber]
#     for saledate in salesRecord:
#         sales += saledate + '  $' + str(salesRecord[saledate])
#         sales += '\n'
    
#     folium.Marker(location=[row['LATITUDE'],row['LONGITUDE']],popup=sales).add_to(m)
#     return

# m = folium.Map(location=[38.0293, -78.4767], zoom_start=13)
# parcels = pd.DataFrame(np.fromiter(saleHistory.keys(),dtype='<U9'))
# filtered = pd.merge(parcels,geoSalesResidFilterSale,left_on=0,right_on="ParcelNumber").drop(columns=[0]).drop_duplicates('ParcelNumber')
# filtered.iloc[:100].apply(makeMarker,axis=1,args=(saleHistory,))
# m
