In [1]:
import pandas as pd
import numpy as np
import shapefile as shp
import matplotlib.pyplot as plt
from shapely.geometry.polygon import LinearRing, Polygon

In [34]:
path_to_parcels = "./data/parcels/property.shp"
path_to_subdiv = "./data/subdivisions/SUBDIVISIONS.shp"#the thought is that subdivisions have neighborhood maps
path_to_roads = "./data/roads/ISRN_Roads.shp"
path_to_citybounds = "./data/citybounds/MunicipalBoundaries.shp"
path_to_countybounds = "./data/countybounds/COUNTYBOUNDARYSHORELINE.shp"
path_to_water = "./data/water2/wsr.shp"

In [3]:
# A function that plots a group of polygons based on their index
def plotPoly(index_list, file_object, color_choice):
    #index_list = DataFrame_of_desired_shapes.index
    for index in index_list:
        index_shape = file_object.shape(index)
        if len(index_shape.parts) > 1:
            for i in range(0,len(index_shape.parts)-1):
                part_points = index_shape.points[index_shape.parts[i]:index_shape.parts[i+1]]
                sub_plot.add_patch(Polygon(part_points, closed = True, color = color_choice))
                #plt.plot([x[0] for x in part_points], [x[1] for x in part_points],color = color_choice)
        else:
            points = index_shape.points
            #sub_plot.add_patch(Polygon(points, closed = True, color = color_choice))
            plt.plot([x[0] for x in index_shape.points], [x[1] for x in index_shape.points], color = color_choice)


In [4]:
# A function that creates an list of indeces that meet a certain criteria
def createDataFrameIndexList(pandas_DataFrame, list_of_criteria_tuples):
    x = pandas_DataFrame
    try:
        for criteria_tuple in list_of_criteria_tuples:
            if len(criteria_tuple) < 3:
                x = x[x[criteria_tuple[0]] == criteria_tuple[1]]
            else:
                x = x[(x[criteria_tuple[0]] == criteria_tuple[1]) | (x[criteria_tuple[0]] == criteria_tuple[2])]
    except KeyError:
        print("KeyError: " + criteria_tuple[0] + " is not a key")
    if len(x) == 0:
        print("No records match these criteria")
        return
    else:
        return x.index


In [15]:
#this uses the shape library to create a properly labeled DataFrame from a shape file's records
def createDataFrameFromShpFile(shape_file):
    field_names = [x[0] for x in shape_file.fields]
    records = pd.DataFrame(shape_file.records())
    records.columns = field_names[1:len(field_names)]
    return records
    

In [13]:
subdiv_file = shp.Reader(path_to_subdiv)
subdiv_shape_rec = subdiv_file.records()

In [16]:
subdiv_fields = [x[0] for x in subdiv_file.fields]
subdiv_fields = subdiv_fields[1:len(subdiv_fields)] #removing deletion flag
subdiv_records = pd.DataFrame(subdiv_shape_rec)
subdiv_records.columns = subdiv_fields

In [17]:
#County and City Lines
countybounds_file = shp.Reader(path_to_countybounds)
citybounds_file = shp.Reader(path_to_citybounds)
countybounds_record = createDataFrameFromShpFile(countybounds_file)
citybounds_record = createDataFrameFromShpFile(citybounds_file)

bunc_county_idx = int(countybounds_record[countybounds_record.NAME == "BUNCOMBE"].index[0])
bunc_cities_idx = citybounds_record[citybounds_record.CNTY_1 == "BUNCOMBE"].index

In [39]:
#Roads 
road_file = shp.Reader(path_to_roads)
road_records = createDataFrameFromShpFile(road_file)

# the roads of Class 1, 2, or 3 are the big ones
bunc_big_roads = road_records[(road_records.County == "BUNCOMBE") & road_records.Ser_Class.isin(1,2,3)]
bunc_roads_idx = bunc_big_roads.index

TypeError: isin() takes 2 positional arguments but 4 were given

In [38]:
road_records.Ser_Class.unique()

array([4, 5, 3, 8, 2, 1, 7])

In [35]:
# # Water Records
# water_file = shp.Reader(path_to_water)
# water_records = createDataFrameFromShpFile(water_file)

In [None]:
#Parcels / Property
prop_file = shp.Reader(path_to_parcels)
prop_record = createDataFrameFromShpFile(prop_file)

In [274]:
#prop_shps = prop_file.shapes() #TAkes a long time


In [111]:
# Property class list : http://gis.buncombecounty.org/classcode.htm
residential_records = prop_records[prop_records.CLASS == '100'] # 100 is for residential
#residential_records['land_acre_value'] = residential_records['LANDVALUE'] / residential_records['ACREAGE']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [299]:
residential_records_nonan = residential_records[np.isnan(residential_records.land_acre_value) == False]
residential_records_nonan = residential_records_nonan[residential_records_nonan.CITYNAME == "ASHEVILLE"]
res_index = [int(i) for i in residential_records_nonan.index]

In [334]:
# 931 Parks
# 100 Residential
# 822 Water Storage
# 320 Undeveloped Farmland
# 180 Multiple Residences
# 311 Residential Lot
# 170 Manufactured Home site
# 405 Leasehold/Commercial
# 312 Non-Dwelling Improve
# 650 Government
# 365 Government/Exempt/Vacant

water_idx = createDataFrameIndexList(water_records, [("BASIN","French Broad-Holston" )])
farmland_idx = createDataFrameIndexList(prop_records, [('CITYNAME',"ASHEVILLE"),("CLASS","320")])
commercial_idx = createDataFrameIndexList(prop_records, [('CITYNAME',"ASHEVILLE"),("CLASS","405")])
multires_idx = createDataFrameIndexList(prop_records, [('CITYNAME',"ASHEVILLE"),("CLASS","180")])
gov_idx = createDataFrameIndexList(prop_records, [('CITYNAME',"ASHEVILLE"),("CLASS","650", "365")])
park_idx = createDataFrameIndexList(prop_records, [('CITYNAME',"ASHEVILLE"),("CLASS","931")])


In [335]:
asheville_records = prop_records[prop_records.CITYNAME == "ASHEVILLE"]

In [74]:
#Histogram of acre values
logrange = range(6,16,1)
regrange = range(0,100000,1000)
plt.hist((residential_records_nonan.land_acre_value), bins = regrange, color = 'r')
plt.title("Histogram of Land Value per Acre in Buncombe County")
plt.ylabel("Count")
plt.show()

In [113]:
################################################################################################################
################################################################################################################
from matplotlib.collections import PolyCollection
from matplotlib.patches import Ellipse, Polygon
import matplotlib.colors as colors
import matplotlib.cm as cmx # colormap
import matplotlib as mpl



In [115]:
minPPA = min(residential_records_nonan.land_acre_value)
maxPPA = max(residential_records_nonan.land_acre_value)


In [344]:
fig = plt.figure()
sub_plot = fig.add_subplot(111)
lonmin, latmin, lonmax, latmax = water_file.bbox # For whole county


plotPoly(water_idx, water_file, "blue")
plt.axis((lonmin, lonmax, latmin, latmax))
#plt.title("Asheville Residential Land Value Heat Map")
plt.show()

In [336]:
############# PLOTTING ###############

cNorm  = colors.Normalize(vmin=0, vmax= 300000)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap='YlOrRd')
bunc_shape = countybounds_file.shape(bunc_county_idx)
ashe_shape = citybounds_file.shape(3)
#lonmin, latmin, lonmax, latmax = bunc_shape.bbox # For whole county
#lonmin, latmin, lonmax, latmax = ashe_shape.bbox # For Asheville
lonmin, latmin, lonmax, latmax = 915000, 660000 , 975000, 715000 # just Asheville

fig = plt.figure()
sub_plot = fig.add_subplot(111)

# Adding county lines
sub_plot.add_patch(Polygon(bunc_shape.points, closed = True, ec = "black", fill = False))

# Adding Parks
plotPoly(multires_idx, prop_file, "red")
plotPoly(gov_idx, prop_file, "gray")
plotPoly(farmland_idx, prop_file, "#A3FFC2")
plotPoly(water_idx, water_file, "blue")


## Adding city limits
for city_index in bunc_cities_idx: # the Asheville city lines have some problems
    city_shape = citybounds_file.shape(city_index)
    if len(city_shape.parts) > 1:
        for i in range(0,len(city_shape.parts)-1):
            part_points = city_shape.points[city_shape.parts[i]:city_shape.parts[i+1]]
            plt.plot([x[0] for x in part_points], [x[1] for x in part_points],color = 'green', alpha = .533)
    else:
        plt.plot([x[0] for x in city_shape.points], [x[1] for x in city_shape.points], color = 'green', alpha = .533)

## Adding roads
for road_index in bunc_roads_idx:
    road_shape = road_file.shape(road_index)
    #sub_plot.add_patch(Polygon(road_shape.points, ec = "blue", fill = False, closed = False))
    plt.plot([x[0] for x in road_shape.points], [x[1] for x in road_shape.points], color = '#bebebe', linewidth = 1.2)

# Adding properties 
for index in res_index[0:1000]:
    shape = prop_shps[index]
    points = shape.points

    #record = prop_records[index,index +1]
    ppa = prop_records.LANDVALUE[index] / prop_records.ACREAGE[index]
    colorVal = scalarMap.to_rgba(ppa)
    sub_plot.add_patch(Polygon(points, closed = True, color = colorVal))

plt.figtext((lonmax - (lonmax - lonmin)/ 2), (latmax - (latmax-latmin)/2), "ASheville", color ='black',size = 1000)
    
    
sub_plot.xaxis.set_visible(False)
sub_plot.yaxis.set_visible(False)
plt.axis((lonmin, lonmax, latmin, latmax))
plt.title("Asheville Residential Land Value Heat Map")
plt.show()
   

In [246]:
(lonmax - (lonmax - lonmin)/ 2)

945000.0

In [298]:
biggie = residential_records_nonan[residential_records_nonan.land_acre_value ==residential_records_nonan.land_acre_value.max()]

In [302]:
biggie.ADDRESS +" " + biggie.CITYNAME

72179    85 WALKER TOWN RD BLACK MOUNTAIN
dtype: object

In [320]:
residential_records.loc[:,'land_acre_value'] = residential_records['LANDVALUE'] / residential_records['ACREAGE']

#residential_records.loc[:,'PIN']
#res_rec_nonan = residential_records[-np.isnan(residential_records.land_acrevalue)] # these don't have correct entries to calculate acre value

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [97]:
katy_PIN = '9638915756'
katy_house = records_df[records_df.PIN == katy_PIN]

In [136]:
katy_house.NEIGHBORHO

24031    R4ED
Name: NEIGHBORHO, dtype: object

In [14]:
#residential_records.NEIGHBORHO.value_counts()



In [39]:
idx_priced = fields.index("PRICED")
idx_tmark = fields.index("TOTALMARKE")
idx_appval = fields.index("APPRAISEDV")
idx_taxval = fields.index("TAXVALUE")

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55],
           dtype='int64')

In [74]:
fields.pop(0)

'PIN'

In [75]:
fields

['PINEXT',
 'OWNER',
 'NMPTYPE',
 'TAXYEAR',
 'CONDOUNIT',
 'CONDOBUILD',
 'DEEDBOOK',
 'DEEDPAGE',
 'PLATBOOK',
 'PLATPAGE',
 'SUBNAME',
 'SUBLOT',
 'SUBBLOCK',
 'SUBSECT',
 'UPDATEDATE',
 'HOUSENUMBE',
 'NUMBERSUFF',
 'DIRECTION',
 'STREETNAME',
 'STREETTYPE',
 'TOWNSHIP',
 'ACREAGE',
 'ACCOUNTNUM',
 'DEEDDATE',
 'STAMPS',
 'INSTRUMENT',
 'REASON',
 'COUNTY',
 'CITY',
 'FIREDISTRI',
 'SCHOOLDIST',
 'CAREOF',
 'ADDRESS',
 'CITYNAME',
 'STATE',
 'ZIPCODE',
 'CLASS',
 'IMPROVED',
 'EXEMPT',
 'PRICED',
 'TOTALMARKE',
 'APPRAISEDV',
 'TAXVALUE',
 'LANDUSE',
 'NEIGHBORHO',
 'LANDVALUE',
 'BUILDINGVA',
 'IMPROVEMEN',
 'APPRAISALA',
 'STATE_ROUT',
 'STATE_RO_1',
 'PROPCARD',
 'Shape_area',
 'Shape_len']

In [41]:
shapeRec = prop_file.shapeRecord(500)


In [42]:
shapeRec.record[idx_priced]

1438900

In [43]:
shapeRec.record[idx_tmark]

1438900

In [38]:
shapeRec.record[idx_appval]

1438900

In [11]:
points = shapeRec.shape.points


In [28]:
poly = Polygon(points)

In [29]:
fields

[('DeletionFlag', 'C', 1, 0),
 ['PINNUM', 'C', 50, 0],
 ['PIN', 'C', 15, 0],
 ['PINEXT', 'C', 50, 0],
 ['OWNER', 'C', 50, 0],
 ['NMPTYPE', 'N', 10, 0],
 ['TAXYEAR', 'C', 50, 0],
 ['CONDOUNIT', 'C', 50, 0],
 ['CONDOBUILD', 'C', 50, 0],
 ['DEEDBOOK', 'C', 50, 0],
 ['DEEDPAGE', 'C', 50, 0],
 ['PLATBOOK', 'C', 50, 0],
 ['PLATPAGE', 'C', 50, 0],
 ['SUBNAME', 'C', 50, 0],
 ['SUBLOT', 'C', 50, 0],
 ['SUBBLOCK', 'C', 50, 0],
 ['SUBSECT', 'C', 50, 0],
 ['UPDATEDATE', 'C', 50, 0],
 ['HOUSENUMBE', 'C', 50, 0],
 ['NUMBERSUFF', 'C', 50, 0],
 ['DIRECTION', 'C', 50, 0],
 ['STREETNAME', 'C', 50, 0],
 ['STREETTYPE', 'C', 50, 0],
 ['TOWNSHIP', 'C', 50, 0],
 ['ACREAGE', 'N', 19, 8],
 ['ACCOUNTNUM', 'C', 50, 0],
 ['DEEDDATE', 'C', 50, 0],
 ['STAMPS', 'N', 19, 8],
 ['INSTRUMENT', 'C', 50, 0],
 ['REASON', 'C', 50, 0],
 ['COUNTY', 'C', 50, 0],
 ['CITY', 'C', 50, 0],
 ['FIREDISTRI', 'C', 50, 0],
 ['SCHOOLDIST', 'C', 50, 0],
 ['CAREOF', 'C', 50, 0],
 ['ADDRESS', 'C', 50, 0],
 ['CITYNAME', 'C', 50, 0],
 ['STATE

In [50]:
for idx, item in enumerate(fields):
    x = 0
    try:
        item.index['ACREAGE']
    except TypeError:
        x = 0
    if x == 1:
        print(idx)

hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello


ValueError: 'lkas' is not in list

In [15]:
fig = pyplot.figure()
ax = fig.add_subplot(111)


In [19]:
ax.plot()

[]