In [61]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import seaborn as sns
import zipfile
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set()

sns.set_context("talk")
import re

# Ensure that Pandas shows at least 100 characters in columns
pd.set_option('max_colwidth', 100)
pd.set_option('display.max_columns', None)

from pathlib import Path

# SQL
import sqlalchemy

In [62]:
# Connect to a local SQLite database. SQLite is a simple file oriented database management "library." 
# in the following we launch a SQLite database stored in the .sqlite file.
dbfile = Path("ZMainBldg.sqlite")
#if dbfile.exists():
    #dbfile.unlink()

sqlite_uri = "sqlite:///ZMainBldg.sqlite"
sqlite_engine = sqlalchemy.create_engine(sqlite_uri)

In [63]:
# Check tables
sqlite_engine.table_names()

['alameda', 'main_bldg', 'sample']

In [64]:
# Work on sample table for now. sample contains a Simple Random Sample of main_bldg of size 100,000
sql_expr = """
SELECT * FROM sample;
"""
sample = pd.read_sql(sql_expr, sqlite_engine)
sample.head()

Unnamed: 0,RowID,ImportParcelID,FIPS,State,County,ValueCertDate,ExtractDate,Edition,ZVendorStndCode,AssessorParcelNumber,DupAPN,ParcelSequenceNumber,ParcelNumberTypeStndCode,RecordSourceStndCode,RecordTypeStndCode,ConfidentialRecordFlag,PropertyAddressSourceStndCode,PropertyHouseNumber,PropertyHouseNumberExt,PropertyStreetPreDirectional,PropertyStreetName,PropertyStreetSuffix,PropertyStreetPostDirectional,PropertyFullStreetAddress,PropertyCity,PropertyState,PropertyZip,OriginalPropertyFullStreetAddress,OriginalPropertyAddressLastline,PropertyBuildingNumber,PropertyZoningDescription,PropertyZoningSourceCode,CensusTract,TaxIDNumber,TaxAmount,TaxYear,TaxDelinquencyFlag,TaxDelinquencyAmount,TaxDelinquencyYear,TaxRateCodeArea,LegalLot,LegalLotStndCode,LegalOtherLot,LegalBlock,LegalSubdivisionCode,LegalSubdivisionName,LegalCondoProjectPUDDevName,LegalBuildingNumber,LegalUnit,LegalSection,LegalPhase,LegalTract,LegalDistrict,LegalMunicipality,LegalCity,LegalTownship,LegalSTRSection,LegalSTRTownship,LegalSTRRange,LegalSTRMeridian,LegalSecTwnRngMer,LegalRecordersMapReference,LegalDescription,LegalNeighborhoodSourceCode,NoOfBuildings,LotSizeAcres,LotSizeSquareFeet,LotSizeFrontageFeet,LotSizeDepthFeet,LotSizeIRR,LotSiteTopographyStndCode,LoadID,PropertyAddressMatchcode,PropertyAddressUnitDesignator,PropertyAddressUnitNumber,PropertyAddressCarrierRoute,PropertyAddressGeoCodeMatchCode,PropertyAddressLatitude,PropertyAddressLongitude,PropertyAddressCensusTractAndBlock,PropertyAddressConfidenceScore,PropertyAddressCBSACode,PropertyAddressCBSADivisionCode,PropertyAddressMatchType,PropertyAddressDPV,PropertyGeocodeQualityCode,PropertyAddressQualityCode,SubEdition,BatchID,BKFSPID,RowID2,NoOfUnits,OccupancyStatusStndCode,PropertyCountyLandUseDescription,PropertyCountyLandUseCode,PropertyLandUseStndCode,PropertyStateLandUseDescription,PropertyStateLandUseCode,BuildingOrImprovementNumber,BuildingClassStndCode,BuildingQualityStndCode,BuildingQualityStndCodeOriginal,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,BathSourceStndCode,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FirePlaceTypeStndCode,FireplaceNumber,WaterStndCode,SewerStndCode,MortgageLenderName,TimeshareStndCode,Comments,StoryTypeStndCode
0,83062198-B6D5-E511-80C1-3863BB43AC67,14064107,6053,CA,MONTEREY,,92012,20,BKF,701-007-009,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,4-000,,,,,,,,,,,,,,,,,,,,,,,TR 933 THE PINE ACRES 006-372-01 TIME SHARE UNIT 7 WEEK 9,,,,,,,,,402108522,N,,,,N,,,,,,,,,,,0,334415,-1,83062198-B6D5-E511-80C1-3863BB43AC67,,,RESIDENTIAL TIME SHARES,2L,RR000,,,1,,,,,,,,,,,,,,,,,,S,,,,,,,,,,,,,,,,
1,6A3C4527-41D5-E511-80C1-3863BB43AC67,19594047,6083,CA,SANTA BARBARA,2015-07-01,72015,23,BKF,117-414-020,,1,,,,,,515.0,,,WINDSOR,ST,,515 WINDSOR ST,SANTA MARIA,CA,93458.0,,,,,,,,2281.6,2014.0,,,,3-000,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.15,6534.0,,,,,249155007,Y,,,C040,Y,34.9368,-120.442,60830024.021018,,,,,,,,0,316551,-1,6A3C4527-41D5-E511-80C1-3863BB43AC67,0.0,O,SINGLE FAMILY RESIDENTIAL,0100,RR101,,,1,,,,,,1962.0,,,,6.0,4.0,,2.0,,0.0,,,C,0.0,,,CE,,,,,,1.0,,,,,,
2,5ABD4B1D-41D5-E511-80C1-3863BB43AC67,10579851,6029,CA,KERN,,82009,17,BKF,004-201-06,,1,,,,,,2701.0,,,TRUXTUN,AVE,,2701 TRUXTUN AVE,BAKERSFIELD,CA,93301.0,,,,,,,4201067.0,1601.03,2009.0,,,,1-001,,,,,,,,,,,,,,,,,,,,,,,"CITY ABBR CTYBK, BLOCK 415, CITY LOT PTN",,,,,,,,,360377339,Y,,,C001,Y,35.3733,-119.033,60290017.004004,,,,,,,,0,328609,-1,5ABD4B1D-41D5-E511-80C1-3863BB43AC67,,,SINGLE FAMILY RESIDENCE,0101,RR101,,,1,,,,,,1945.0,1945.0,,1.0,7.0,3.0,,,,,,1.0,S,,,,FL,,,,,,,,,,,,
3,F0902D88-BCD5-E511-80C1-3863BB43AC67,17573577,6071,CA,SAN BERNARDINO,,72003,11,BKF,2328-172-05,,1,,,,,,115.0,,,OLYMPIC,,,115 OLYMPIC,BIG BEAR LAKE,CA,92315.0,,,,,,,,,,,,,17-001,,,,,,,,,,,,,15.0,,,BIG BEAR LAKE,,,,,,,PARCEL MAP NO 326 PAR NO 2,,,,42001.0,,,,,440251301,N,,,,N,,,,,,,,,,,0,336885,-1,F0902D88-BCD5-E511-80C1-3863BB43AC67,,O,SINGLE FAMILY RESIDENCE,0510,RR101,,,1,,,,,,1998.0,,,2.0,11.0,4.0,,,,,,2.5,S,,CS,,CE,CE,,,,,1.0,,,,,,
4,9FDC3A20-8ED5-E511-80C1-3863BB43AC67,14756165,6059,CA,ORANGE,,102002,10,BKF,644-302-24,,1,,,,,,787.0,,,DIAMOND,ST,,787 DIAMOND ST,LAGUNA BEACH,CA,92651.0,,,,,,,,,,,,,05-003,,,,,,,,,,,,,,,,,,,,,,,TR 764 LOT 170 SLY 20 FT WLY 20 FT,,,,400.0,,,,,405873153,Y,,,C004,Y,33.5326,-117.764,60590626.203015,,,,,,,,0,324049,-1,9FDC3A20-8ED5-E511-80C1-3863BB43AC67,,,SINGLE FAMILY RESIDENTIAL,122,RR101,,,1,,,,,,,,,,,,,,,,,,S,,,,,,,,,,,,,,,,


In [65]:
# 100000
len(sample)

100000

In [66]:
# Number of sample Alameda records
len(sample[sample['County'] == 'ALAMEDA'])

3854

In [32]:
# Check sample Alameda
sql_expr = """
SELECT * FROM alameda;
"""
alameda_s = pd.read_sql(sql_expr, sqlite_engine)
alameda_s.head()

Unnamed: 0,RowID,ImportParcelID,FIPS,State,County,ValueCertDate,ExtractDate,Edition,ZVendorStndCode,AssessorParcelNumber,DupAPN,ParcelSequenceNumber,ParcelNumberTypeStndCode,RecordSourceStndCode,RecordTypeStndCode,ConfidentialRecordFlag,PropertyAddressSourceStndCode,PropertyHouseNumber,PropertyHouseNumberExt,PropertyStreetPreDirectional,PropertyStreetName,PropertyStreetSuffix,PropertyStreetPostDirectional,PropertyFullStreetAddress,PropertyCity,PropertyState,PropertyZip,OriginalPropertyFullStreetAddress,OriginalPropertyAddressLastline,PropertyBuildingNumber,PropertyZoningDescription,PropertyZoningSourceCode,CensusTract,TaxIDNumber,TaxAmount,TaxYear,TaxDelinquencyFlag,TaxDelinquencyAmount,TaxDelinquencyYear,TaxRateCodeArea,LegalLot,LegalLotStndCode,LegalOtherLot,LegalBlock,LegalSubdivisionCode,LegalSubdivisionName,LegalCondoProjectPUDDevName,LegalBuildingNumber,LegalUnit,LegalSection,LegalPhase,LegalTract,LegalDistrict,LegalMunicipality,LegalCity,LegalTownship,LegalSTRSection,LegalSTRTownship,LegalSTRRange,LegalSTRMeridian,LegalSecTwnRngMer,LegalRecordersMapReference,LegalDescription,LegalNeighborhoodSourceCode,NoOfBuildings,LotSizeAcres,LotSizeSquareFeet,LotSizeFrontageFeet,LotSizeDepthFeet,LotSizeIRR,LotSiteTopographyStndCode,LoadID,PropertyAddressMatchcode,PropertyAddressUnitDesignator,PropertyAddressUnitNumber,PropertyAddressCarrierRoute,PropertyAddressGeoCodeMatchCode,PropertyAddressLatitude,PropertyAddressLongitude,PropertyAddressCensusTractAndBlock,PropertyAddressConfidenceScore,PropertyAddressCBSACode,PropertyAddressCBSADivisionCode,PropertyAddressMatchType,PropertyAddressDPV,PropertyGeocodeQualityCode,PropertyAddressQualityCode,SubEdition,BatchID,BKFSPID,RowID2,NoOfUnits,OccupancyStatusStndCode,PropertyCountyLandUseDescription,PropertyCountyLandUseCode,PropertyLandUseStndCode,PropertyStateLandUseDescription,PropertyStateLandUseCode,BuildingOrImprovementNumber,BuildingClassStndCode,BuildingQualityStndCode,BuildingQualityStndCodeOriginal,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,BathSourceStndCode,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FirePlaceTypeStndCode,FireplaceNumber,WaterStndCode,SewerStndCode,MortgageLenderName,TimeshareStndCode,Comments,StoryTypeStndCode
0,69FBEB4A-2AD5-E511-80C1-3863BB43AC67,9044934,6001,CA,ALAMEDA,,72012,20,BKF,475-147-128,,1,,,,,,33171,,,QUAIL,DR,,33171 QUAIL DR,UNION CITY,CA,94587,,,,,,,,,,,,,15-042,,,,,,,,,,,,,,,,,,,,,,,,,1,0.103,4470.0,,,,,339869211,Y,,,C034,Y,37.592,-122.042,60014403.073001,,,,,,,,0,332443,-1,69FBEB4A-2AD5-E511-80C1-3863BB43AC67,,,SINGLE FAMILY RESIDENCE,1100,RR101,,,1,,,,,,1979,1981.0,,1.0,7,4,,,,,,2.0,S,,,,,,,,,,,,,,,,
1,9D3017DF-7AD5-E511-80C1-3863BB43AC67,9072079,6001,CA,ALAMEDA,2014-07-01,72014,22,BKF,48H-7590-87,,1,A,,,,P,15,,,STARVIEW,DR,,15 STARVIEW DR,OAKLAND,CA,94618,,,,,,,,6043.92,2014.0,,,,17-003,,,,,,,,,,,,,,,,,,,,,,,,,1,0.039,1712.0,,,,ST,9493100,Y,,,C015,Y,37.8525,-122.227,60014001.00106,,,,,,,,0,601,-1,9D3017DF-7AD5-E511-80C1-3863BB43AC67,1.0,O,PLANNED DEVELOPMENT - TOWNHOUSE,1500,RR104,,,1,,B,,,,1994,1994.0,,,6,3,,2.0,,1.0,,,C,0.0,,,,,,,,,,,,,,,
2,E474D7B5-8FD5-E511-80C1-3863BB43AC67,9160063,6001,CA,ALAMEDA,,72005,13,BKF,055 -1913-035,,1,,,,,P,2303,,,SPAULDING,AVE,,2303 SPAULDING AVE,BERKELEY,CA,94703,,,,,,,,,,,,,13-000,,,,,,,,,,,,,,,,,,,,,,,,,1,,4158.0,,,,,336628977,Y,,,C009,Y,37.8658,-122.281,60014230.003005,,,,,,,,0,334117,-1,E474D7B5-8FD5-E511-80C1-3863BB43AC67,,O,SINGLE FAMILY RESIDENCE,1100,RR101,,,1,,,,,,1925,,,,7,3,,,,,,1.5,S,,,,,,,,,,,,,,,,
3,0EDE9B49-2AD5-E511-80C1-3863BB43AC67,8923761,6001,CA,ALAMEDA,,72012,20,BKF,15-1364-17,,1,,,,,,666,,,63RD,ST,,666 63RD ST,OAKLAND,CA,94609,,,,,,,,,,,,,17-003,,,,,,,,,,,,,,,,,,,,,,,,,1,0.14,6100.0,,,,,340076500,Y,,,C005,Y,37.8487,-122.268,60014005.003006,,,,,,,,0,332443,-1,0EDE9B49-2AD5-E511-80C1-3863BB43AC67,4.0,O,FOURPLEX OR FOUR UNITS (ANY COMBINATION),2400,RI103,,,1,,,,,,1908,1908.0,,2.0,14,6,,,,,,4.0,S,,,,,,,,,,,,,,,,
4,93F3BA49-2AD5-E511-80C1-3863BB43AC67,8942808,6001,CA,ALAMEDA,,72012,20,BKF,27-857-26,,1,,,,,P,3129,,,CUTHBERT,AVE,,3129 CUTHBERT AVE,OAKLAND,CA,94602,,,,,,,,,,,,,17-001,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,340089849,Y,,,C001,Y,37.7939,-122.215,60014066.022003,,,,,,,,0,332443,-1,93F3BA49-2AD5-E511-80C1-3863BB43AC67,,O,"MULTI, CONDOMINIUMS",7300,RR106,,,1,,,,,,1983,1983.0,,2.0,4,2,,,,,,1.5,S,,,,,,,,,,,,,,,,


In [59]:
# Get cities from JC RC Inventory
cities = list(pd.read_excel('jc_rc.xlsx')['Unnamed: 0'].values[1:len(counties)-1])

In [67]:
# CREATE TABLE alameda AS SELECT * FROM main_bldg WHERE County='ALAMEDA';
sqlite_uri = "sqlite:///ZCountyExtracts.sqlite"
zce_engine = sqlalchemy.create_engine(sqlite_uri)

In [70]:
zce_engine.table_names()

['alameda']

In [71]:
sql_expr = """
SELECT * FROM alameda;
"""
alameda = pd.read_sql(sql_expr, zce_engine)
alameda.head()

Unnamed: 0,RowID,ImportParcelID,FIPS,State,County,ValueCertDate,ExtractDate,Edition,ZVendorStndCode,AssessorParcelNumber,DupAPN,ParcelSequenceNumber,ParcelNumberTypeStndCode,RecordSourceStndCode,RecordTypeStndCode,ConfidentialRecordFlag,PropertyAddressSourceStndCode,PropertyHouseNumber,PropertyHouseNumberExt,PropertyStreetPreDirectional,PropertyStreetName,PropertyStreetSuffix,PropertyStreetPostDirectional,PropertyFullStreetAddress,PropertyCity,PropertyState,PropertyZip,OriginalPropertyFullStreetAddress,OriginalPropertyAddressLastline,PropertyBuildingNumber,PropertyZoningDescription,PropertyZoningSourceCode,CensusTract,TaxIDNumber,TaxAmount,TaxYear,TaxDelinquencyFlag,TaxDelinquencyAmount,TaxDelinquencyYear,TaxRateCodeArea,LegalLot,LegalLotStndCode,LegalOtherLot,LegalBlock,LegalSubdivisionCode,LegalSubdivisionName,LegalCondoProjectPUDDevName,LegalBuildingNumber,LegalUnit,LegalSection,LegalPhase,LegalTract,LegalDistrict,LegalMunicipality,LegalCity,LegalTownship,LegalSTRSection,LegalSTRTownship,LegalSTRRange,LegalSTRMeridian,LegalSecTwnRngMer,LegalRecordersMapReference,LegalDescription,LegalNeighborhoodSourceCode,NoOfBuildings,LotSizeAcres,LotSizeSquareFeet,LotSizeFrontageFeet,LotSizeDepthFeet,LotSizeIRR,LotSiteTopographyStndCode,LoadID,PropertyAddressMatchcode,PropertyAddressUnitDesignator,PropertyAddressUnitNumber,PropertyAddressCarrierRoute,PropertyAddressGeoCodeMatchCode,PropertyAddressLatitude,PropertyAddressLongitude,PropertyAddressCensusTractAndBlock,PropertyAddressConfidenceScore,PropertyAddressCBSACode,PropertyAddressCBSADivisionCode,PropertyAddressMatchType,PropertyAddressDPV,PropertyGeocodeQualityCode,PropertyAddressQualityCode,SubEdition,BatchID,BKFSPID,RowID2,NoOfUnits,OccupancyStatusStndCode,PropertyCountyLandUseDescription,PropertyCountyLandUseCode,PropertyLandUseStndCode,PropertyStateLandUseDescription,PropertyStateLandUseCode,BuildingOrImprovementNumber,BuildingClassStndCode,BuildingQualityStndCode,BuildingQualityStndCodeOriginal,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,BathSourceStndCode,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FirePlaceTypeStndCode,FireplaceNumber,WaterStndCode,SewerStndCode,MortgageLenderName,TimeshareStndCode,Comments,StoryTypeStndCode
0,083BFB02-2AD5-E511-80C1-3863BB43AC67,8904539,6001,CA,ALAMEDA,,82010,18,BKF,1-1111-10,,1,,,,,P,311,,,OAK,ST,,311 OAK ST,OAKLAND,CA,94607,,,,,,,,,,,,,17-022,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,339056319,Y,APT,105,C010,Y,37.7935,-122.268,60014033.001023,,,,,,,,0,330739,-1,083BFB02-2AD5-E511-80C1-3863BB43AC67,,O,CONDOMINIUM,7301,RR106,,,1,,,,,,2003,2003,,10,3,1,,,,,,2,S,,,,,,,Y,,,,,,,,,
1,093BFB02-2AD5-E511-80C1-3863BB43AC67,8904540,6001,CA,ALAMEDA,,82010,18,BKF,1-1111-11,,1,,,,,P,311,,,OAK,ST,,311 OAK ST,OAKLAND,CA,94607,,,,,,,,,,,,,17-022,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,339056320,Y,APT,106,C010,Y,37.7935,-122.268,60014033.001023,,,,,,,,0,330739,-1,093BFB02-2AD5-E511-80C1-3863BB43AC67,,O,CONDOMINIUM,7301,RR106,,,1,,,,,,2003,2003,,10,3,1,,,,,,2,S,,,,,,,Y,,,,,,,,,
2,0A3BFB02-2AD5-E511-80C1-3863BB43AC67,8904541,6001,CA,ALAMEDA,,82010,18,BKF,1-1111-12,,1,,,,,,311,,,OAK,ST,,311 OAK ST,OAKLAND,CA,94607,,,,,,,,,,,,,17-022,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,339056321,Y,APT,107,C010,Y,37.7935,-122.268,60014033.001023,,,,,,,,0,330739,-1,0A3BFB02-2AD5-E511-80C1-3863BB43AC67,,,CONDOMINIUM,7301,RR106,,,1,,,,,,2003,2003,,10,3,1,,,,,,2,S,,,,,,,Y,,,,,,,,,
3,0B3BFB02-2AD5-E511-80C1-3863BB43AC67,8904542,6001,CA,ALAMEDA,,82010,18,BKF,1-1111-13,,1,,,,,,300,,,OAK,ST,,300 OAK ST,OAKLAND,CA,94607,,,,,,,,,,,,,17-022,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,339056322,Y,#,108,,Y,37.7934,-122.267,60014033.001024,,,,,,,,0,330739,-1,0B3BFB02-2AD5-E511-80C1-3863BB43AC67,,,CONDOMINIUM,7301,RR106,,,1,,,,,,2003,2003,,10,3,1,,,,,,2,S,,,,,,,Y,,,,,,,,,
4,0C3BFB02-2AD5-E511-80C1-3863BB43AC67,8904543,6001,CA,ALAMEDA,,82010,18,BKF,1-1111-14,,1,,,,,,300,,,OAK,ST,,300 OAK ST,OAKLAND,CA,94607,,,,,,,,,,,,,17-022,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,339056323,Y,#,109,,Y,37.7934,-122.267,60014033.001024,,,,,,,,0,330739,-1,0C3BFB02-2AD5-E511-80C1-3863BB43AC67,,,CONDOMINIUM,7301,RR106,,,1,,,,,,2003,2003,,10,3,1,,,,,,2,S,,,,,,,Y,,,,,,,,,


In [73]:
# Number of Alameda records
len(alameda)

5286873

In [76]:
# All the cities
alameda['PropertyCity'].value_counts()

OAKLAND           1248993
FREMONT            801421
HAYWARD            540411
LIVERMORE          365555
SAN LEANDRO        359323
BERKELEY           354263
PLEASANTON         287567
ALAMEDA            256777
CASTRO VALLEY      246336
UNION CITY         241414
DUBLIN             162891
NEWARK             149421
SAN LORENZO        116299
ALBANY              63012
PIEDMONT            62079
EMERYVILLE          26645
SUNOL                4168
SUNOL GLEN            120
TRACY                  72
KENSINGTON             42
                       30
BYRON                  25
DELS                    4
MOUNTAIN HOUSE          2
DISCOVERY BAY           2
EL CERRITO              1
Name: PropertyCity, dtype: int64

In [87]:
# Get unique parcel across different years
alameda = alameda.groupby('ImportParcelID', as_index=False).first()

In [89]:
# New length
len(alameda)

411990

In [91]:
alameda['PropertyCity'].value_counts()

OAKLAND          98439
FREMONT          59594
HAYWARD          42238
LIVERMORE        28142
BERKELEY         26993
SAN LEANDRO      26215
PLEASANTON       22973
ALAMEDA          19464
CASTRO VALLEY    17966
UNION CITY       17533
DUBLIN           17494
NEWARK           11902
SAN LORENZO       8566
ALBANY            5268
PIEDMONT          4574
EMERYVILLE        4104
SUNOL              518
KENSINGTON           4
BYRON                3
Name: PropertyCity, dtype: int64

In [92]:
# Load CHPC which will help determine if property is subsidized
chpc = pd.read_excel('chpc.xls', sheet_name='Cleaned')

In [93]:
chpc.head()

Unnamed: 0,DataSet,Property Name,Address_Cleaned,Units with Assistance (Inclusive of All Programs),Units with Rental Assistance (HUD and/or mfhd),HUD Program?,Application Number (TCAC),Type of Tax Credit (TCAC),Application Stage (TCAC),Placed in Service (PIS) Date (TCAC),Construction Type (TCAC),Housing Type (TCAC),Low Income Units (TCAC),Number of SRO/Studio Units,Number of 1 Bedroom Units,Number of 2 Bedroom Units,Number of 3 Bedroom Units,Number of 4 Bedroom Units,Number of 5 Bedroom Units,Number of 6 Bedroom Units,Units at or below 30% AMI,Units at 35% AMI,Units at 40% AMI,Units at 45% AMI,Units at 50% AMI,Units at 55% AMI,Units at 60% AMI,General Partner (1),Management Company,Developer,Borrower+Project ID (mfhd),Tax_Credit_Indicator (mfhd),Date_Tax_Credit_Expires (mfhd),Date_Of_Operation (mfhd),Date_Restrictive_Clause_Expires (mfhd),Notes,Combined Multiple Sites?,LatLong_Google,Latitude,Longitude,Unique: Property Name_CleanedAddress-,GEOID (from Geocode of Lat/Long),TRACTCE (from Geocode of Lat/Long),Within Half Mile of HQT?
0,"HUD, TCAC",COMM22 Senior Housing,"690 Beardsley Street, San Diego, California, 92113",69.0,,,CA-2013-852,0.04,Preliminary Reservation,2015,New Construction,Seniors,69.0,15.0,51.0,4.0,0.0,0.0,0.0,0.0,15.0,0.0,12.0,0.0,42.0,0.0,0.0,"COMM22 Senior GP, LLC",BRIDGE Property Management Company,BRIDGE Housing Corporation,,,,,,HUD has 30 units,,"32.7045781,-117.1444356",32.704578,-117.144436,"COMM22 Senior Housing690 Beardsley Street, San Diego, California, 92113",6073005000.0,4900.0,1.0
1,"HUD, TCAC",Valencia 9 Apartments,"Valencia Avenue and E. 9th Street, San Bernardino, California, 92410",75.0,,,CA-2014-162,0.09,Preliminary Reservation,2015-05-15 00:00:00,New Construction,Large Family,75.0,0.0,10.0,40.0,22.0,4.0,0.0,0.0,8.0,0.0,0.0,34.0,15.0,0.0,18.0,Southern California Housing Development Corporation of the Inland Empire,National Community Renaissance,National Community Renaissance,,,,,,,,"34.1157958,-117.2717424",34.115796,-117.271742,"Valencia 9 ApartmentsValencia Avenue and E. 9th Street, San Bernardino, California, 92410",6071006000.0,6401.0,0.0
2,"HUD, TCAC",Mercy Arc Housing,"1500 Page Street, San Francisco, California, 94117-2018",16.0,15.0,,CA-2014-165,0.09,Preliminary Reservation,2017-03-23 00:00:00,New Construction,Special Needs,16.0,16.0,1.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,7.0,2.0,0.0,1.0,Mercy Housing California Special Needs,Mercy Housing Management Group,Mercy Housing California,,,,,,,,"37.7712566,-122.4458935",37.771257,-122.445893,"Mercy Arc Housing 1500 Page Street, San Francisco, California, 94117-2018",6075017000.0,16600.0,1.0
3,"HUD, TCAC",Windward Apartments (Site N),"7512 South San Pedro, Los Angeles, California, 90003",68.0,68.0,LMSA,CA-2006-825,,,,Acquisition/Rehab,Non Targeted,,,,,,,,,,,,,,,,"1514 Windward Partners, LLC",Alpha Property Management,1768 EFM 2 LLC and Hampstead Partners Development,,,,,,Site unit info came from permit application,,"33.9716945,-118.2693157",33.971694,-118.269316,"Windward Apartments (Site N)7512 South San Pedro, Los Angeles, California, 90003",6037240000.0,239501.0,0.0
4,"HUD, TCAC",Laurel Village,"9700 Laurel Canyon Boulevard, Los Angeles, California, 91331",79.0,80.0,Sec 8 NC,CA-2013-898,0.04,Preliminary Reservation,,Acquisition/Rehabilitation,At-Risk,79.0,0.0,0.0,64.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,55.0,Laurel Village GP LLC,Abode Communities,Richard Bradley,,,,,,,,"34.246445,-118.417999",34.246445,-118.417999,"Laurel Village9700 Laurel Canyon Boulevard, Los Angeles, California, 91331",6037105000.0,104821.0,0.0


In [128]:
# Merge based on euclidean distance
chpc = chpc.dropna(subset=['Longitude', 'Latitude'])
def merge_euclidean(table, eps=0.01):
    chpc_list = []
    table['merge_row'] = table.index.values
    for i, row in table.iterrows():
        chpc_subset = chpc.loc[(chpc.Longitude - row.PropertyAddressLongitude)**2 + (chpc.Latitude - row.PropertyAddressLatitude)**2 < eps]
        chpc_subset['merge_row'] = i
        chpc_list.append(chpc_subset)
    chpc_found = pd.concat(chpc_list)

    result = pd.merge(table, chpc_found, on='merge_row', how='left')
    return result

In [112]:
# Saving to csv to convert to natural numpy dtype
alameda.to_csv('alameda.csv', sep='|', index=False)
#chpc_alameda = merge_euclidean(alameda)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,ImportParcelID,RowID,FIPS,State,County,ValueCertDate,ExtractDate,Edition,ZVendorStndCode,AssessorParcelNumber,DupAPN,ParcelSequenceNumber,ParcelNumberTypeStndCode,RecordSourceStndCode,RecordTypeStndCode,ConfidentialRecordFlag,PropertyAddressSourceStndCode,PropertyHouseNumber,PropertyHouseNumberExt,PropertyStreetPreDirectional,PropertyStreetName,PropertyStreetSuffix,PropertyStreetPostDirectional,PropertyFullStreetAddress,PropertyCity,PropertyState,PropertyZip,OriginalPropertyFullStreetAddress,OriginalPropertyAddressLastline,PropertyBuildingNumber,PropertyZoningDescription,PropertyZoningSourceCode,CensusTract,TaxIDNumber,TaxAmount,TaxYear,TaxDelinquencyFlag,TaxDelinquencyAmount,TaxDelinquencyYear,TaxRateCodeArea,LegalLot,LegalLotStndCode,LegalOtherLot,LegalBlock,LegalSubdivisionCode,LegalSubdivisionName,LegalCondoProjectPUDDevName,LegalBuildingNumber,LegalUnit,LegalSection,LegalPhase,LegalTract,LegalDistrict,LegalMunicipality,LegalCity,LegalTownship,LegalSTRSection,LegalSTRTownship,LegalSTRRange,LegalSTRMeridian,LegalSecTwnRngMer,LegalRecordersMapReference,LegalDescription,LegalNeighborhoodSourceCode,NoOfBuildings,LotSizeAcres,LotSizeSquareFeet,LotSizeFrontageFeet,LotSizeDepthFeet,LotSizeIRR,LotSiteTopographyStndCode,LoadID,PropertyAddressMatchcode,PropertyAddressUnitDesignator,PropertyAddressUnitNumber,PropertyAddressCarrierRoute,PropertyAddressGeoCodeMatchCode,PropertyAddressLatitude,PropertyAddressLongitude,PropertyAddressCensusTractAndBlock,PropertyAddressConfidenceScore,PropertyAddressCBSACode,PropertyAddressCBSADivisionCode,PropertyAddressMatchType,PropertyAddressDPV,PropertyGeocodeQualityCode,PropertyAddressQualityCode,SubEdition,BatchID,BKFSPID,RowID2,NoOfUnits,OccupancyStatusStndCode,PropertyCountyLandUseDescription,PropertyCountyLandUseCode,PropertyLandUseStndCode,PropertyStateLandUseDescription,PropertyStateLandUseCode,BuildingOrImprovementNumber,BuildingClassStndCode,BuildingQualityStndCode,BuildingQualityStndCodeOriginal,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,BathSourceStndCode,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FirePlaceTypeStndCode,FireplaceNumber,WaterStndCode,SewerStndCode,MortgageLenderName,TimeshareStndCode,Comments,StoryTypeStndCode,merge_row
0,8904539,083BFB02-2AD5-E511-80C1-3863BB43AC67,6001,CA,ALAMEDA,,82010,18,BKF,1-1111-10,,1,,,,,P,311,,,OAK,ST,,311 OAK ST,OAKLAND,CA,94607.0,,,,,,,,,,,,,17-022,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,339056319,Y,APT,105,C010,Y,37.7935,-122.268,60014000.0,,,,,,,,0,330739,-1,083BFB02-2AD5-E511-80C1-3863BB43AC67,,O,CONDOMINIUM,7301,RR106,,,1,,,,,,2003.0,2003.0,,10.0,3.0,1.0,,,,,,2.0,S,,,,,,,Y,,,,,,,,,,0
1,8904540,093BFB02-2AD5-E511-80C1-3863BB43AC67,6001,CA,ALAMEDA,,82010,18,BKF,1-1111-11,,1,,,,,P,311,,,OAK,ST,,311 OAK ST,OAKLAND,CA,94607.0,,,,,,,,,,,,,17-022,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,339056320,Y,APT,106,C010,Y,37.7935,-122.268,60014000.0,,,,,,,,0,330739,-1,093BFB02-2AD5-E511-80C1-3863BB43AC67,,O,CONDOMINIUM,7301,RR106,,,1,,,,,,2003.0,2003.0,,10.0,3.0,1.0,,,,,,2.0,S,,,,,,,Y,,,,,,,,,,1


In [114]:
alameda = pd.read_csv('alameda.csv', sep='|')

  interactivity=interactivity, compiler=compiler, result=result)


In [116]:
len(chpc)

5986

In [None]:
chpc_alameda = merge_euclidean(alameda, eps=0.000001)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [None]:
pd.read_csv('chpc_alameda.csv', sep='|')