https://towardsdatascience.com/visualizing-data-at-the-zip-code-level-with-folium-d07ac983db20

In [1]:
import folium
import pandas as pd
import json

KC_coord = (47.560180,-122.213948)

In [2]:
df = pd.read_csv('data/kc_house_data.csv')

# Header cleaning. More precautionary than anything else
df.columns = [x.strip().lower().replace(' ','').replace('-','') for x in df.columns]

df.head(3)

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,7 Average,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,7 Average,2170,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,6 Low Average,770,0.0,1933,,98028,47.7379,-122.233,2720,8062


In [3]:
#First pass at cleaning
df['waterfront'].fillna('Unknown',inplace= True)
df['yr_renovated'].fillna(0,inplace= True)

df.dropna(subset=['view'],inplace=True)

# Removing outliers
df = df[df['bedrooms'] <=15] #Only 1 record remove. 33 Bed, 1.75 Bath?


# Change view rating to a numeric value. Probably don't need it
df['view'] = df['view'].map({'EXCELLENT':5,
                             'GOOD':4,
                             'FAIR':3,
                             'AVERAGE':2,
                             'NONE':1})

In [4]:
with open('data/Zipcodes_for_King_County_and_Surrounding_Area___zipcode_area.geojson', 'r') as f:
    geoJSON_raw = json.load(f)

In [5]:
# first_feature = geoJSON_raw['features'][0]
# first_feature['geometry'] = 0 #just so we can see the feature. Too verbose
# first_feature

In [6]:
len((df['zipcode'].unique())) #total zipcodes in dataframe

70

In [7]:
#Steps to pare down our geoJSON data to zip codes in our dataframe
#Make a list of zip in geoJSON data that is also in our dataframe
zipcodes_geoJSON = [int(feature['properties']['ZIPCODE'])
                    for feature in geoJSON_raw['features']                    
                    if (int(feature['properties']['ZIPCODE']) in df['zipcode'].unique())             
                   ]

In [8]:
#Steps to pare down our geoJSON data to zip codes in our dataframe
#Make a list of features in geoJSON raw data that is also in our dataframe
geoJSON_data = [feature
                for feature in geoJSON_raw['features']
                if (int(feature['properties']['ZIPCODE']) in df['zipcode'].unique())
               ]

#Create a JSON file
new_json = dict.fromkeys(['type','features'])
new_json['type'] = 'FeaturesCollection'
new_json['features'] = geoJSON_data
open('data/update_GEOJSON.json','w').write(json.dumps(new_json,
                                                sort_keys = True,
                                                indent = 4,
                                                separators = (',',':',)
                                               ))

12459337

In [9]:
# Test to see that all zip codes in our dataframe has geoJSON data
len(set(zipcodes_geoJSON)) == len(df['zipcode'].unique())

True

In [13]:
map = folium.Map(location = KC_coord, tiles = "OpenStreetMap", zoom_start = 9)
number_of_markers = 100
i=0
for each in df.iterrows():
    folium.Marker(radius=100,
        location=[each[1]['lat'], each[1]['long']],
#         popup="The Waterfront",
#         color="crimson",
#         fill=False
    ).add_to(map)
    i+=1
    if i >= number_of_markers:
        break
map

In [None]:
map = folium.Map(location = KC_coord, tiles = "Stamen Terrain", zoom_start = 9)
map

In [None]:
map = folium.Map(location = KC_coord, tiles = "Stamen Toner", zoom_start = 9)
map

In [None]:
map = folium.Map(location = KC_coord, tiles = "OpenStreetMap", zoom_start = 9)
map

In [None]:
# create empty map zoomed in on San Francisco
_map = folium.Map(location=KC_coord, zoom_start=12)

# add a marker for every record in the filtered data, use a clustered view
for record in df[0:100].iterrows():
    _map.simple_marker(
        location = [record[1]["lat"],record[1]["long"]],
        clustered_marker = True)

display(_map)

In [None]:
folium.Choropleth(
    geo_data = ,
    data = df,
    )