In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import seaborn as sns
import zipfile
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set()

sns.set_context("talk")
import re

# Ensure that Pandas shows at least 100 characters in columns
pd.set_option('max_colwidth', 100)
pd.set_option('display.max_columns', None)

from pathlib import Path

# SQL
import sqlalchemy

In [2]:
alameda = pd.read_csv('alameda_v2.csv', sep='|')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
#Filter for Berkeley JCE only
berk_jc = alameda[alameda.PropertyCity == 'BERKELEY']
print('Current number:', len(berk_jc))

Current number: 26993


In [4]:
# Filter for not subsidized
berk_jc = berk_jc[berk_jc['Subsidized'] == 'N']

In [5]:
# Check structure 
# (number of records, number of columns)
berk_jc.shape

(26987, 135)

In [6]:
data_dict = pd.read_excel('ZAsmt_DataDictionary_2016-01.xlsx', sheet_name='LandUse', header=5, usecols=np.arange(4))
data_dict.head(2)

Unnamed: 0,StndCode,PropertyLandUse Description,Prefix Code Classification,Classification
0,AG000,AGRICULTURAL GENERAL,AG,Agricultural
1,AG101,FARM (IRRIGATED OR DRY),AG,Agricultural


In [7]:
residential = data_dict[data_dict['Prefix Code Classification'].isin(['RI', 'RR'])]
R = residential['StndCode']

In [8]:
# Quoted comments refer to the lines in the JC and RC inventory. Specifically each coverage/exemption.

# "Units owned by a government agency"
# "Nursing or hospital units or home for the aged and the like"
# Filter rows to only include residential buildings
berk_jc = berk_jc[berk_jc['PropertyLandUseStndCode'].isin(R)]
print('Current number:', len(berk_jc))

Current number: 26423


In [9]:
# "Two unit properties where one unit is owner-occupied"
# Filter out properties with 2 or more units and owner occupied
berk_jc = berk_jc[~(berk_jc['OccupancyStatusStndCode'].isin(['O', 'I']) & ((berk_jc.NoOfUnits == 2) | (berk_jc.PropertyLandUseStndCode == 'RI101')))]
print('Current number:', len(berk_jc))

Current number: 24680


In [10]:
# "Units rented by certain institutions of higher learning to staff, students or faculty
berk_jc = berk_jc[~(berk_jc.PropertyLandUseStndCode == 'RI113')]
print('Last Current number:', len(berk_jc))

Last Current number: 24680


In [11]:
berk_rc = alameda[alameda.PropertyCity == 'BERKELEY']
print('Current number:', len(berk_rc))

Current number: 26993


In [226]:
# Filter for not subsidized
berk_rc = berk_rc[berk_rc['Subsidized'] == 'N']
print('Current number:', len(berk_rc))

Current number: 26987


In [227]:
# Quoted comments refer to the lines in the JC and RC inventory. Specifically each coverage/exemption.

# Filter rows to only include residential buildings
berk_rc = berk_rc[berk_rc['PropertyLandUseStndCode'].isin(R)]
print('Current number:', len(berk_rc))

Current number: 26423


In [228]:
# "Units eligible for RC:"
# "Most multi-unit properties that were built before June 1980"
# "Units eligible for RC: Single family homes with tenants who moved in prior to 1996"

# "Units partially eligible for RC: "
# "Single-family homes first re-rented on or after 1/1/1996"
# "Most condonomiums
# APPROXIMATING THOSE ABOVE^^^^^^^^
# Select buildings before 1996, this automatically include all buildings built before 1980. 
# It will also approximate buildings where people rented, re-rented, or moved in before 1996
berk_rc = berk_rc[berk_rc.YearBuilt < 1996]
print('Current number:', len(berk_rc))

Current number: 22436


In [229]:
berk_rc = berk_rc[(berk_rc.PropertyLandUseStndCode.str.contains('RI') & (berk_rc.YearBuilt < 1981)) | (berk_rc.PropertyLandUseStndCode.isin(['RR101', 'RR999', 'RR106']))]
print('Current number:', len(berk_rc))

Current number: 21963


In [230]:
# "Units fully exempt from RC:"
"""
Any duplex that was owner-occupied
on December 31, 1979, and currently
has an owner living in one of the
units ("Golden Duplexes")

Rental units rented by a non-profit,
accredited institution of a higher
learning (i.e., dorms)

Non-profit cooperatives
"""
berk_rc = berk_rc[~(berk_rc.PropertyLandUseStndCode.isin(['RI113', 'RR107']))]
berk_rc = berk_rc[~((berk_rc['OccupancyStatusStndCode'].isin(['O', 'I'])) & (berk_rc.PropertyLandUseStndCode == 'RI101') & (berk_rc.YearBuilt < 1980))]

In [231]:
# sum up multi-units and single family homes and condos
#value = {'NoOfUnits': 1}
#berk_rc.fillna(value=value, inplace=True)
len(berk_rc)

20726

In [232]:
print('Last JC Current number:', len(berk_jc))
# includes full and partially coverage
print('Last RC Current number:', len(berk_rc))

Last JC Current number: 24680
Last RC Current number: 20726
