## Los Angeles Times (Population / Geo Data)

In [24]:
import pandas as pd

pops = pd.read_csv('2010_Census_Populations_by_Zip_Code.csv')
pops = pops.iloc[1:]

In [25]:
pops.columns = list(map(str, pops.columns))

### CLEAN DATA

Zip Code is changed from `int` to `str` in order to match the zip codes in the geojson file.

To avoid black colored zones due to missing data, the geojson file is filtered so that only zip codes with population data are retained. This is reflect in empty zones in the resulting folium choropleth map.

In [26]:
pops['Zip Code'] = pops['Zip Code'].apply(str)

In [27]:
pops.shape

(318, 7)

In [28]:
import requests

def get_korean_pop(col, year):
    def helper(zipcode):
        API_KEY = 'bbbc7a422b8b29b0f26b19a60a8edbb96d086f40'

        col_zip = 'zip code tabulation area'

        url = 'https://api.census.gov/data/{}/acs/acs5/profile?key={}'.format(str(year),API_KEY)
        url += '&get={}&for={}:{}'.format(col,col_zip,zipcode)

        r = requests.get(url)
        return r.json()[1][0]
    return helper

In [35]:
korean_dict = {2017:('DP05_0049E','DP05_0049PE'),
               2016:('DP05_0044E','DP05_0044PE'),
               2015:('DP05_0044E','DP05_0044PE'),
               2014:('DP05_0044E','DP05_0044PE'),
               2013:('DP05_0044E','DP05_0044PE'),
               2012:('DP05_0044E','DP05_0044PE')}

for year in range(2013,2018):
    name = 'Korean' + str(year)
    print(name + '...')
    pops[name] = pops['Zip Code'].apply(get_korean_pop(korean_dict[year][0], year))
    name += 'Percentage'
    print(name + '...')
    pops[name] = pops['Zip Code'].apply(get_korean_pop(korean_dict[year][1], year))

Korean2013...
Korean2013Percentage...
Korean2014...
Korean2014Percentage...
Korean2015...
Korean2015Percentage...
Korean2016...
Korean2016Percentage...
Korean2017...
Korean2017Percentage...


In [36]:
pops.head()

Unnamed: 0,Zip Code,Total Population,Median Age,Total Males,Total Females,Total Households,Average Household Size,Korean2012,Korean2012Percentage,Korean2013,Korean2013Percentage,Korean2014,Korean2014Percentage,Korean2015,Korean2015Percentage,Korean2016,Korean2016Percentage,Korean2017,Korean2017Percentage
1,90001,57110,26.6,28468,28642,12971,4.4,17,0.0,20,0.0,25,0.0,14,0.0,6,0.0,7,0.0
2,90002,51223,25.5,24876,26347,11731,4.36,21,0.0,10,0.0,9,0.0,7,0.0,2,0.0,7,0.0
3,90003,66266,26.3,32631,33635,15642,4.22,0,0.0,0,0.0,0,0.0,0,0.0,7,0.0,7,0.0
4,90004,62180,34.8,31302,30878,22547,2.73,7605,12.1,6896,11.0,7102,11.2,7874,12.3,7148,11.3,6935,11.1
5,90005,37681,33.9,19299,18382,15044,2.5,9636,24.3,9740,24.6,9762,25.3,9855,25.3,10577,26.9,10590,26.8


In [37]:
pops.shape

(318, 19)

In [47]:
def positive(n):
    if float(n) < 0:
        return 0.0
    return float(n)

pops['Korean2012'] = pops['Korean2012'].apply(positive)
pops['Korean2013'] = pops['Korean2013'].apply(positive)
pops['Korean2014'] = pops['Korean2014'].apply(positive)
pops['Korean2015'] = pops['Korean2015'].apply(positive)
pops['Korean2016'] = pops['Korean2016'].apply(positive)
pops['Korean2017'] = pops['Korean2017'].apply(positive)

pops['Korean2012Percentage'] = pops['Korean2012Percentage'].apply(positive)
pops['Korean2013Percentage'] = pops['Korean2013Percentage'].apply(positive)
pops['Korean2014Percentage'] = pops['Korean2014Percentage'].apply(positive)
pops['Korean2015Percentage'] = pops['Korean2015Percentage'].apply(positive)
pops['Korean2016Percentage'] = pops['Korean2016Percentage'].apply(positive)
pops['Korean2017Percentage'] = pops['Korean2017Percentage'].apply(positive)

In [39]:
import json

with open('zip-code-tabulation-areas-2012.geojson','r') as jsonfile:
    data = json.load(jsonfile)

geozips = []
zips = pops['Zip Code'].values.tolist()

for i in range(len(data['features'])):
    if data['features'][i]['properties']['name'] in zips:
        geozips.append(data['features'][i])
        
new_json = dict.fromkeys(['type','features'])
new_json['type'] = 'FeatureCollection'
new_json['features'] = geozips

open('updated-file.json','w').write(
    json.dumps(new_json,sort_keys=True, indent=4, separators=(',',': '))
)

6578436

In [40]:
from shapely.geometry import Polygon

with open('updated-file.json') as json_data:
    centroid_data = json.load(json_data)
    
centroid_data = centroid_data['features']

centroid_dict = {}
for data in centroid_data:
    name = data['properties']['name']
    location = data['geometry']['coordinates']
    ref_polygon = Polygon(location[0][0])
    c = ref_polygon.centroid.wkt
    c = c[7:-1].split(' ')
    lon = float(c[0])
    lat = float(c[1])
    centroid_dict[name] = (lat,lon)
    
def get_latitude(z):
    return centroid_dict[z][0]
def get_longitude(z):
    return centroid_dict[z][1]

pops['Latitude'] = pops['Zip Code'].apply(get_latitude)
pops['Longitude'] = pops['Zip Code'].apply(get_longitude)

In [41]:
pops.shape

(318, 21)

### ZIP CODES by POPULATION

In [48]:
import folium

# Los Angeles Coordinates
latitude = 34.0522
longitude = -118.2437

map_la = folium.Map(location=[latitude, longitude], zoom_start=8)

LA_geo = r'updated-file.json'
LA_geo_json = json.load(open('updated-file.json'))

folium.Choropleth(geo_data=LA_geo,
                  data=pops,
                  data_out='dataout.json',
                  columns=['Zip Code', 'Total Population'],
                  key_on='feature.properties.name',
                  fill_color='YlOrRd',
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name='Population').add_to(map_la)

map_la

### ZIP CODES by MEDIAN AGE

In [43]:
# Los Angeles Coordinates
latitude = 34.0522
longitude = -118.2437

map_la = folium.Map(location=[latitude, longitude], zoom_start=8)

LA_geo = r'updated-file.json'
LA_geo_json = json.load(open('updated-file.json'))

folium.Choropleth(geo_data=LA_geo,
                  data=pops,
                  data_out='dataout.json',
                  columns=['Zip Code', 'Median Age'],
                  key_on='feature.properties.name',
                  fill_color='PuBu',
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name='Median Age').add_to(map_la)

map_la

### ZIP CODES by KOREAN POPULATION PERCENTAGE

In [49]:
# Los Angeles Coordinates
latitude = 34.0522
longitude = -118.2437

map_la = folium.Map(location=[latitude, longitude], zoom_start=8)

LA_geo = r'updated-file.json'
LA_geo_json = json.load(open('updated-file.json'))

folium.Choropleth(geo_data=LA_geo,
                  data=pops,
                  data_out='dataout.json',
                  columns=['Zip Code', 'Korean2017Percentage'],
                  key_on='feature.properties.name',
                  fill_color='YlOrRd',
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name='Korean Population (%)').add_to(map_la)

map_la

### FOURSQUARE RESTAURANT DATA

In [50]:
CLIENT_ID = 'OMU1JFUH42SN2MDNS1KXMZLUUOFLXHMMFRGORYAUPNBZIHS3' # your Foursquare ID
CLIENT_SECRET = 'IDVCEKJ40323QZUTJ3GRALIHY5G44ARE2QJU3GKFVK4PWFOT' # your Foursquare Secret
VERSION = '20190624' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OMU1JFUH42SN2MDNS1KXMZLUUOFLXHMMFRGORYAUPNBZIHS3
CLIENT_SECRET:IDVCEKJ40323QZUTJ3GRALIHY5G44ARE2QJU3GKFVK4PWFOT


In [51]:
pops.shape

(318, 21)

In [52]:
from pandas.io.json import json_normalize

latitude = 33.867885
longitude = -118.068612

def get_num_KR(row):
    z = row['Zip Code']
    latitude = row['Latitude']
    longitude = row['Longitude']
#     print(z,':',latitude,longitude)
    url = 'https://api.foursquare.com/v2/venues/search?radius=10000&intent=checkin&categoryId=4bf58dd8d48988d113941735&client_id={}&client_secret={}&v={}&ll={},{}&limit=100'.format(CLIENT_ID,CLIENT_SECRET,VERSION,latitude,longitude)
    results = requests.get(url).json()
    if results['response'] == {}:
        return 0
    return len(results['response']['venues'])

pops['numKR'] = pops.apply(get_num_KR, axis=1)

In [53]:
pops.shape

(318, 22)

In [54]:
pops.columns

Index(['Zip Code', 'Total Population', 'Median Age', 'Total Males',
       'Total Females', 'Total Households', 'Average Household Size',
       'Korean2012', 'Korean2012Percentage', 'Korean2013',
       'Korean2013Percentage', 'Korean2014', 'Korean2014Percentage',
       'Korean2015', 'Korean2015Percentage', 'Korean2016',
       'Korean2016Percentage', 'Korean2017', 'Korean2017Percentage',
       'Latitude', 'Longitude', 'numKR'],
      dtype='object')

### Predict Korean Population

Using census data from 2012 to 2017 we use linear regression to estimate what the Korean population will be in 2020.

In [80]:
filter_columns = ['Korean2012','Korean2013','Korean2014','Korean2015',
                  'Korean2016','Korean2017']

X = [[2012],[2013],[2014],[2015],[2016],[2017]]
Y = pops[filter_columns]

In [93]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline, make_pipeline

def predict_2020(X):
    def helper(y):
        y = list(y)
        model = make_pipeline(PolynomialFeatures(degree=1),LinearRegression())
        model.fit(X,y)
        p = model.predict([[2020]])
        return p
    return helper

pops['Korean2020'] = Y.apply(predict_2020(X),axis=1)

In [100]:
pops['Change2012-2020'] = pops['Korean2020'] - pops['Korean2012']

In [101]:
MaxChange = max(pops['Change2012-2020'])[0]
MaxChange

2411.619047619053

### KOREAN RESTAURANT INDEX

Various factors are important for restaurant success: population size, median age in the 30s and 40s, a robust Korean pouplation, the number of existing Korean restaurants in the area, and the greatest Korean population jump from 2012 to the population inferred for 2020.

In [107]:
def get_index(row):
    pop = row['Total Population']
    age = row['Median Age']
    kor = row['Korean2017']
    nKR = row['numKR']
    chg = row['Change2012-2020']
    
    return int((pop/105549 * 0.2 + age/39 * 0.2 + kor/30 * 0.2 + (50-nKR)/50 * 0.2 + chg/MaxChange * 0.2) * 100)

pops['KRindex'] = pops.apply(get_index, axis=1)

In [108]:
pops.shape

(318, 25)

In [111]:
import folium

# Los Angeles Coordinates
latitude = 34.0522
longitude = -118.2437

map_la = folium.Map(location=[latitude, longitude], zoom_start=8)

LA_geo = r'updated-file.json'

folium.Choropleth(geo_data=LA_geo,
                  data=pops,
                  columns=['Zip Code', 'KRindex'],
                  key_on='feature.properties.name',
                  fill_color='YlOrRd',
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name='Korean Restaurant Index').add_to(map_la)

map_la