In [1]:
%matplotlib inline

In [21]:
import numpy as np
import pandas as pd
import math
from scipy import stats
import pickle
from causality.analysis.dataframe import CausalDataFrame
from sklearn.linear_model import LinearRegression
import datetime

In [3]:
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

Open the data from past notebooks and correct them to only include years that are common between the data structures (>1999).

In [15]:
with open('VariableData/money_data.pickle', 'rb') as f:
    income_data, housing_data, rent_data = pickle.load(f)
with open('VariableData/demographic_data.pickle', 'rb') as f:
    demographic_data = pickle.load(f)
with open('VariableData/endowment.pickle', 'rb') as f:
    endowment = pickle.load(f)
with open('VariableData/expander.pickle', 'rb') as f:
    expander = pickle.load(f)

In [5]:
endowment = endowment[endowment['FY'] > 1997].reset_index()
endowment.drop('index', axis=1, inplace=True)

demographic_data = demographic_data[demographic_data['year'] > 1999].reset_index()
demographic_data.drop('index', axis=1, inplace=True)

income_data = income_data[income_data['year'] > 1999].reset_index()
income_data.drop('index', axis=1, inplace=True)

housing_data = housing_data[housing_data['year'] > 1999].reset_index()
housing_data.drop('index', axis=1, inplace=True)

rent_data = rent_data[rent_data['year'] > 1999].reset_index()
rent_data.drop('index', axis=1, inplace=True)

Read in the data on Harvard owned land and Cambridge's property records. Restrict the Harvard data to Cambridge, MA.

In [6]:
harvard_land = pd.read_excel("Spreadsheets/2018_building_reference_list.xlsx", header=3)
harvard_land = harvard_land[harvard_land['City'] == 'Cambridge']

In [7]:
cambridge_property = pd.read_excel("Spreadsheets/cambridge_properties.xlsx")

Restrict the Cambridge data to Harvard properties, and only use relevant columns.

In [8]:
cambridge_property = cambridge_property[cambridge_property['Owner_Name'] == 'PRESIDENT & FELLOWS OF HARVARD COLLEGE']

In [9]:
cambridge_property = cambridge_property[['Address', 'PropertyClass', 'LandArea', 'BuildingValue', 'LandValue', 'AssessedValue', 'SalePrice', 'SaleDate', 'Owner_Name']]

Fix the time data.

In [10]:
cambridge_property['SaleDate'] = pd.to_datetime(cambridge_property['SaleDate'], infer_datetime_format=True)

In [13]:
clean_property = cambridge_property.drop_duplicates(subset=['Address'])

Only look at properties purchased after 2000.

In [28]:
recent_property = clean_property[clean_property['SaleDate'] > datetime.date(2000, 1, 1)]

In [38]:
property_numbers = recent_property[['LandArea', 'AssessedValue', 'SalePrice']]
num_recent = recent_property['Address'].count()
sum_properties = property_numbers.sum()

In [39]:
sum_properties

LandArea            281219
AssessedValue    112671400
SalePrice         53436500
dtype: int64

In [29]:
recent_property

Unnamed: 0,Address,PropertyClass,LandArea,BuildingValue,LandValue,AssessedValue,SalePrice,SaleDate,Owner_Name
585,100 Land Blvd,"Private College, University",65683,43883200,19941400,63824600,100,2002-07-01,PRESIDENT & FELLOWS OF HARVARD COLLEGE
10249,24 Blackstone St,"Private College, University",109943,3784300,8000000,11784300,14118000,2003-04-09,PRESIDENT & FELLOWS OF HARVARD COLLEGE
10254,45 Blackstone St,CONDO-BLDG,43988,0,0,0,14118000,2003-04-09,PRESIDENT & FELLOWS OF HARVARD COLLEGE
11663,20 Sumner Rd,"Private College, University",8184,607000,1152400,1759400,1288000,2010-10-05,PRESIDENT & FELLOWS OF HARVARD COLLEGE
11665,42 Kirkland St,"Private College, University",10518,1564400,925600,2490000,4112400,2010-10-05,PRESIDENT & FELLOWS OF HARVARD COLLEGE
12996,122 Mt Auburn St,MULTIUSE-RES,18749,9884800,6018000,15902800,11250000,2011-12-19,PRESIDENT & FELLOWS OF HARVARD COLLEGE
13355,153 Mt Auburn St,"Private College, University",10233,1228800,484700,1713500,0,2003-03-21,PRESIDENT & FELLOWS OF HARVARD COLLEGE
13377,9 Ash St,SNGL-FAM-RES,4800,303200,1426200,1729400,1250000,2010-07-07,PRESIDENT & FELLOWS OF HARVARD COLLEGE
18194,113 Walker St,SNGL-FAM-RES,9121,2973800,1740000,4713800,1600000,2000-10-31,PRESIDENT & FELLOWS OF HARVARD COLLEGE
24589,0 Arrow St,"Private College, University",0,8753600,0,8753600,5700000,2004-12-22,PRESIDENT & FELLOWS OF HARVARD COLLEGE
