## Update 2/10:
I used Census race absolute totals instead of the below.

## Summary
Appropriate data source is ACS 5-yr survey, which was last done in 2015. They do population estimates by zip code, so I need to manually map this onto the 311 neighborhood definitions. Apparently the city doesn't have its own population estimates by neighborhood

In [1]:
import requests as r

In [2]:
KEY = '3c9b529bfb4d678a803706dfbeb4892dde74735b'

In [3]:
def get_pop(zip_code):
    url = "http://api.census.gov/data/2015/acs5?get=B01001_001E&for=zip+code+tabulation+area:{}&key={}".format(zip_code, KEY)
    response = r.get(url).json()
    ans = response[1][response[0].index('B01001_001E')] # this is total pop
    return int(ans)

### Alternative hacky way

In [4]:
def get_pop2(zip_code):
    """Pulls it out from CensusReporter.org, which takes it from ACS 5-yr from 2015."""
    url = "https://censusreporter.org/profiles/86000US{}-{}/".format(zip_code, zip_code)
    content = r.get(url).content
    soup = BeautifulSoup(content, 'html.parser')    
    text_value = soup.find('div', {'class': 'column-half'}).find('span', {'class': 'value'}).text
    value = int(text_value.replace(',', ''))
    return value

### OK, now to get the data and make the table

In [5]:
zip_codes = ('02108','02109','02110','02111','02113','02114',
             '02115','02116','02118','02119','02120','02121','02122','02124','02125','02126','02127','02128','02129','02130','02131','02132',
             '02134','02135','02136','02199','02210','02215','02467')

In [6]:
import pandas as pd

In [7]:
df = pd.DataFrame({'zip_codes': zip_codes})
df.head(2)

Unnamed: 0,zip_codes
0,2108
1,2109


In [9]:
df['population'] = df.zip_codes.apply(lambda zip_code: get_pop(zip_code))
df.head(2)

Unnamed: 0,zip_codes,population
0,2108,4183
1,2109,3947


In [10]:
df

Unnamed: 0,zip_codes,population
0,2108,4183
1,2109,3947
2,2110,2048
3,2111,7537
4,2113,7221
5,2114,12882
6,2115,28536
7,2116,22433
8,2118,27526
9,2119,26259


### Making population by zip code

In [12]:
# manually made by looking at http://www.cityofboston.gov/images_documents/ZipCodes_tcm3-47884.pdf
bb = df.set_index('zip_codes').to_dict()['population']
bb

{'02108': 4183,
 '02109': 3947,
 '02110': 2048,
 '02111': 7537,
 '02113': 7221,
 '02114': 12882,
 '02115': 28536,
 '02116': 22433,
 '02118': 27526,
 '02119': 26259,
 '02120': 15500,
 '02121': 28051,
 '02122': 24880,
 '02124': 53227,
 '02125': 35379,
 '02126': 27835,
 '02127': 36494,
 '02128': 44989,
 '02129': 18058,
 '02130': 39257,
 '02131': 31890,
 '02132': 28506,
 '02134': 18238,
 '02135': 38682,
 '02136': 31052,
 '02199': 1173,
 '02210': 2179,
 '02215': 24521,
 '02467': 23520}

In [13]:
aa = bb.copy()
for k in aa:
    aa[k] = ''
    
aa

{'02108': '',
 '02109': '',
 '02110': '',
 '02111': '',
 '02113': '',
 '02114': '',
 '02115': '',
 '02116': '',
 '02118': '',
 '02119': '',
 '02120': '',
 '02121': '',
 '02122': '',
 '02124': '',
 '02125': '',
 '02126': '',
 '02127': '',
 '02128': '',
 '02129': '',
 '02130': '',
 '02131': '',
 '02132': '',
 '02134': '',
 '02135': '',
 '02136': '',
 '02199': '',
 '02210': '',
 '02215': '',
 '02467': ''}

In [23]:
zips_to_neighborhoods = {'02108': 'Beacon Hill', # n of commons
     '02109': 'Boston',
     '02110': 'Downtown / Financial District',
     '02111': 'Downtown / Financial District',
     '02113': 'North End',
     '02114': 'Beacon Hill', # mgh
     '02115': 'Fenway / Kenmore / Audubon Circle / Longwood',
     '02116': 'Back Bay',
     '02118': 'South End',
     '02119': 'Roxbury',
     '02120': 'Mission Hill',
     '02121': 'Dorchester',
     '02122': 'Dorchester',
     '02124': 'Dorchester',
     '02125': 'Dorchester',
     '02126': 'Mattapan',
     '02130': 'Jamaica Plain',
     '02131': 'Roslindale', # where is this
     '02134': 'Allston / Brighton',
     '02135': 'Allston / Brighton',
     '02136': 'Hyde Park',
     '02127': 'South Boston',
     '02128': 'East Boston',
     '02129': 'Charlestown',
     '02132': 'West Roxbury',
     '02163': 'Allston / Brighton',
     '02199': 'Back Bay',
     '02203': 'Boston',
     '02210': 'South Boston',
     '02215': 'Fenway / Kenmore / Audubon Circle / Longwood',
     '02467': 'Chestnut Hill'
    }

# mission hill (and prolly others) are messed up
# seems like zip codes don't map neatly onto eighborhoods

In [24]:
df['neighborhood'] = df.zip_codes.apply(lambda z: zips_to_neighborhoods[z])
df.head()

Unnamed: 0,zip_codes,population,neighborhood
0,2108,4183,Beacon Hill
1,2109,3947,Boston
2,2110,2048,Downtown / Financial District
3,2111,7537,Downtown / Financial District
4,2113,7221,North End


In [25]:
df.dropna().to_csv('zip_codes.csv', index=False)