# Data for Cambridge and Boston
This document analyzes data available about Cambridge and Boston from the 1990, 2000 and 2010 censuses. 

In [55]:
import cityscraper as cs
import numpy as np
import pandas as pd
import math

In [37]:
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
plotly.tools.set_credentials_file(username='hangulu', api_key='78VR3oagCeoHkdYiKB4b')

## Analyzing Demographic Data
Populations for white, Black, Native American, Asian, Pacific Islander, and other races of people in Cambridge and Boston.

In [3]:
# Census API Key
key = "d923b916d08af136ade78b021cc31f162cbb2d2f"

The function to fetch the census data.

In [4]:
def fetch_census(year, census_codes, state_code, place):
    results = {}
    for category, variable in census_codes.items():
        out = pd.read_json('https://api.census.gov/data/{}/sf1?get={}&in=state:{}&for=place:{}&key={}'.format(year, variable, state_code, place, key))
        df = out.iloc[[1]][0]
        results[category] = int(df)
    return (results)

The dictionaries containing the codes for accessing the census data.

In [5]:
census_2010 = dict([("total population", "P0030001"), ("white population", "P0030002"), ("black population", "P0030003"), ("native population", "P0030004"), ("asian population", "P0030005"), ("pacific population", "P0030006"), ("other population", "P0030007")])

census_2000 = dict([("total population", "P003002"), ("white population", "P003003"), ("black population", "P003004"), ("native population", "P003005"), ("asian population", "P003006"), ("pacific population", "P003007"), ("other population", "P003008")])

census_1990 = dict([("total population", "P0010001"), ("white population", "P0060001"), ("black population", "P0060002"), ("native population", "P0060003"), ("asian population", "P0060004"), ("other population", "P0060005")])

Calculate and clean the data for the two cities.

In [6]:
b2010 = fetch_census(2010, census_2010, 25, '07000')
b2000 = fetch_census(2000, census_2000, 25, '07000')
b1990 = fetch_census(1990, census_1990, 25, '07000')

c2010 = fetch_census(2010, census_2010, 25, 11000)
c2000 = fetch_census(2000, census_2000, 25, 11000)
c1990 = fetch_census(1990, census_1990, 25, 11000)

In [46]:
boston = pd.DataFrame([b1990, b2000, b2010])
boston.insert(loc=0, column='year', value=['1990', '2000', '2010'])
boston = boston.fillna(0)
boston['asian and pacific islander population'] = boston['asian population'] + boston['pacific population']
boston['white and asian population'] = boston['asian and pacific islander population'] + boston['white population']
boston['poc population'] = boston['asian and pacific islander population'] + boston['black population'] + boston['native population'] + boston['other population']
boston.apply(pd.to_numeric)

Unnamed: 0,year,asian population,black population,native population,other population,pacific population,total population,white population,asian and pacific islander population,white and asian population,poc population
0,1990,30388,146945,1884,34191,0.0,574283,360875,30388.0,391263.0,213408.0
1,2000,44284,149202,2365,46102,366.0,563263,320944,44650.0,365594.0,242319.0
2,2010,55235,150437,2399,51893,265.0,617594,333033,55500.0,388533.0,260229.0


In [47]:
cambridge = pd.DataFrame([c1990, c2000, c2010])
cambridge.insert(loc=0, column='year', value=[1990, 2000, 2010])
cambridge = cambridge.fillna(0)
cambridge['asian and pacific islander population'] = cambridge['asian population'] + cambridge['pacific population']
cambridge['white and asian population'] = cambridge['asian and pacific islander population'] + cambridge['white population']
cambridge['poc population'] = cambridge['asian and pacific islander population'] + cambridge['black population'] + cambridge['native population'] + cambridge['other population']
cambridge.apply(pd.to_numeric)

Unnamed: 0,year,asian population,black population,native population,other population,pacific population,total population,white population,asian and pacific islander population,white and asian population,poc population
0,1990,8081,12930,288,2381,0.0,95802,72122,8081.0,80203.0,23680.0
1,2000,12036,12079,290,3230,77.0,96734,69022,12113.0,81135.0,27712.0
2,2010,15879,12253,244,2241,38.0,105162,70006,15917.0,85923.0,30655.0


Focus on the change in the populations over the years.

In [48]:
c_change = cambridge.drop('year', axis=1).pct_change()
b_change = boston.drop('year', axis=1).pct_change()
c_change

Unnamed: 0,asian population,black population,native population,other population,pacific population,total population,white population,asian and pacific islander population,white and asian population,poc population
0,,,,,,,,,,
1,0.48942,-0.065816,0.006944,0.356573,inf,0.009728,-0.042983,0.498948,0.011621,0.17027
2,0.319292,0.014405,-0.158621,-0.306192,-0.506494,0.087126,0.014256,0.314043,0.059013,0.106199


In [49]:
c_change2 = cambridge.drop('year', axis=1).pct_change(periods=2)
b_change2 = boston.drop('year', axis=1).pct_change(periods=2)
c_change2

Unnamed: 0,asian population,black population,native population,other population,pacific population,total population,white population,asian and pacific islander population,white and asian population,poc population
0,,,,,,,,,,
1,,,,,,,,,,
2,0.96498,-0.052359,-0.152778,-0.058799,inf,0.097702,-0.029339,0.969682,0.071319,0.294552


Now, the data for the change in these populations (Black, Asian, non-white, and white) will be visualized.

In [50]:
# Demographic changes (bars)
trace1 = go.Bar(
    x=['Black', 'AAPI', 'Non-White', 'White'],
    y=[b_change2.iloc[[2]]['black population'], b_change2.iloc[[2]]['asian and pacific islander population'], b_change2.iloc[[2]]['poc population'], b_change2.iloc[[2]]['white population'], b_change2.iloc[[2]]['white and asian population']],
    name='Boston'
)
trace2 = go.Bar(
    x=['Black', 'AAPI', 'Non-White', 'White'],
    y=[c_change2.iloc[[2]]['black population'], c_change2.iloc[[2]]['asian and pacific islander population'], c_change2.iloc[[2]]['poc population'], c_change2.iloc[[2]]['white population'], c_change2.iloc[[2]]['white and asian population']],
    name='Cambridge'
)

data = [trace1, trace2]
layout = go.Layout(
    title = "The Change In Population Demographics for Boston and Cambridge Since 1990",
    barmode='group'
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='demographic_changes')

In [52]:
# Demographic changes (lines)
trace0 = go.Scatter(
            x = boston['year'],
            y = boston['black population'],
            name='Black'
        )
trace1 = go.Scatter(
            x = boston['year'],
            y = boston['asian population'],
            name='Asian'
        )
trace2 = go.Scatter(
            x = boston['year'],
            y = boston['poc population'],
            name='Non-White'
        )
trace3 = go.Scatter(
            x = boston['year'],
            y = boston['white population'],
            name='White'
        )
trace4 = go.Scatter(
            x = boston['year'],
            y = boston['white and asian population'],
            name='White and Asian'
        )
data = [trace0, trace1, trace2, trace3, trace4]

layout = go.Layout(
            title = "The Change In Population Demographics for Boston Since 1990",
            yaxis=dict(
                title='Population Levels'
            ),
            xaxis=dict(
                title='Year')
        )

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='demographic_changes_line')

In [53]:
# Demographic changes (lines)
trace0 = go.Scatter(
            x = cambridge['year'],
            y = cambridge['black population'],
            name='Black'
        )
trace1 = go.Scatter(
            x = cambridge['year'],
            y = cambridge['asian population'],
            name='Asian'
        )
trace2 = go.Scatter(
            x = cambridge['year'],
            y = cambridge['poc population'],
            name='Non-White'
        )
trace3 = go.Scatter(
            x = cambridge['year'],
            y = cambridge['white population'],
            name='White'
        )
trace4 = go.Scatter(
            x = cambridge['year'],
            y = cambridge['white and asian population'],
            name='White and Asian'
        )
data = [trace0, trace1, trace2, trace3, trace4]

layout = go.Layout(
            title = "The Change In Population Demographics for Cambridge Since 1990",
            yaxis=dict(
                title='Population Levels'
            ),
            xaxis=dict(
                title='Year')
        )

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='cam_demographic_changes_line')

## Analyzing Monetary Data
Income levels, rent, housing prices.

The housing in Cambridge was primarily built before 1939 ( 50.99%), making the housing stock in Cambridge some of the oldest overall in America, although there is a range of ages of homes in Cambridge. The next most important housing age is between 1970-1999 ( 20.24%), followed by between 1940-1969 ( 18.47%). There's also some housing in Cambridge built between 2000 and later ( 10.31%).

In the last 10 years, Cambridge has experienced some of the highest home appreciation rates of any community in the nation. Cambridge real estate appreciated 62.57% over the last ten years, which is an average annual home appreciation rate of 4.98%, putting Cambridge in the top 10% nationally for real estate appreciation. If you are a home buyer or real estate investor, Cambridge definitely has a track record of being one of the best long term real estate investments in America through the last ten years.

Appreciation rates are so strong in Cambridge that despite a nationwide downturn in the housing market, Cambridge real estate has continued to appreciate in value faster than most communities. Looking at just the latest twelve months, Cambridge appreciation rates continue to be some of the highest in America, at 9.83%, which is higher than appreciation rates in 89.78% of the cities and towns in the nation. Based on the last twelve months, short-term real estate investors have found good fortune in Cambridge. Cambridge appreciation rates in the latest quarter were at 1.75%, which equates to an annual appreciation rate of 7.19%.

– https://www.neighborhoodscout.com/ma/cambridge/real-estate



In [59]:
current = cs.scraper('Cambridge', 'City', 'Cambridge-Massachusetts')
test

{'Area': 6.43,
 'City': 'Cambridge',
 'City Housing Price': 739800,
 'City Income': 86657,
 'City Rent': 1968,
 'Population Density': 17211}

http://www.cambridgema.gov/CDD/factsandmaps/demographicfaq

In [60]:
# http://www.cambridgema.gov/CDD/factsandmaps/demographicfaq
c_income2010 = 107897
c_income2000 = 85660
c_income1990 = 74764

# http://www.cambridgema.gov/CDD/factsandmaps/demographicfaq
c_house2010 = 739800
# https://www.trulia.com/real_estate/Cambridge-Massachusetts/market-trends/
c_house2000 = 297000
c_house1990 =

# http://www.cambridgema.gov/CDD/factsandmaps/demographicfaq
c_rent2010 = 2348
c_rent2000 =
c_rent1990 =

SyntaxError: invalid syntax (<ipython-input-60-8ab1d2c53e4e>, line 8)

In [None]:
# http://www.cambridgema.gov/CDD/factsandmaps/demographicfaq
c_income2010 = 107897
c_income2000 = 85660
c_income1990 = 74764

# http://www.cambridgema.gov/CDD/factsandmaps/demographicfaq
c_house2010 = 739800
# https://www.trulia.com/real_estate/Cambridge-Massachusetts/market-trends/
c_house2000 = 297000
c_house1990 =

# http://www.cambridgema.gov/CDD/factsandmaps/demographicfaq
c_rent2010 = 2348
c_rent2000 =
c_rent1990 =

In [61]:
def fetch_monetary(year, census_codes, state_code, place):
    results = {}
    for category, variable in census_codes.items():
        out = pd.read_json('https://api.census.gov/data/{}/acs5?get={}&in=state:{}&for=place:{}&key={}'.format(year, variable, state_code, place, key))
        df = out.iloc[[1]][0]
        results[category] = int(df)
    return (results)