# Mapping charter schools

Authors: 
- Harshayu Girase, URAP research apprentice, Computer Science major, UC Berkeley
- Serena Jiang, URAP research apprentice, Computer Science major, UC Berkeley
- Jaren Haber, PhD Candidate, Sociology, UC Berkeley

In [6]:
!pip install pysal

Collecting pysal
  Downloading https://files.pythonhosted.org/packages/e6/e4/c4fb97674458dee71ce8dda517f6ad02c498518d26a0c4b8b692d741c3b9/PySAL-1.14.4.post2.tar.gz (17.9MB)
[K    100% |████████████████████████████████| 17.9MB 27kB/s  eta 0:00:01
Building wheels for collected packages: pysal
  Running setup.py bdist_wheel for pysal ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/43/33/20/65692f75cb30834752a6cb566069d5e98ab209392f24bf5466
Successfully built pysal
Installing collected packages: pysal
Successfully installed pysal-1.14.4.post2
[33mYou are using pip version 9.0.3, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [7]:
# Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from shapely.geometry import Point
import geopandas as gpd
import csv
from geopandas.tools import sjoin
import folium
#from folium import plugins
from folium.plugins import MarkerCluster
import pyepsg
#from folium import IFrame
import shapely
from shapely.geometry import Point
import unicodedata
import fiona
import pysal as ps



In [8]:
import os
os.getcwd()

'/home/jovyan/work/geospatial/scripts'

In [9]:
# Define data file paths
dir_prefix = '/home/jovyan/work/'

charterfile = dir_prefix + 'nowdata/backups/charters_parsed_03-04_no-text_SMALL.csv'
districtfile = dir_prefix + 'data_management/data/ACS_2016_sd-merged_SMALL.csv'
publicfile = dir_prefix + 'pub_schools_merged_2015.csv'

shapefileuni = dir_prefix + 'geospatial/shapefiles/US_sd_uni_2016.shp'
shapefileelm = dir_prefix + 'geospatial/shapefiles/US_sd_elm_2016.shp'
shapefilesec = dir_prefix + 'geospatial/shapefiles/US_sd_sec_2016.shp'
#on top of shapefile, districtfile is the districts. publicfile school data. chartefile non used.

In [10]:
# Create merged geometric DF using district, elementary, secondary school district shapefiles
unishape = gpd.read_file(shapefileuni) # Create shapefile DF with only geometric data (for general use)
elmshape = gpd.read_file(shapefileelm)
secshape = gpd.read_file(shapefilesec)

#'sdshape' has polygons of the districts.
sdshape = pd.concat([unishape, elmshape, secshape], join='outer', ignore_index=True, sort=False) # Merge DFs
sdshape.head(10)

Unnamed: 0,STATEFP,UNSDLEA,GEOID,NAME,LSAD,LOGRADE,HIGRADE,MTFCC,SDTYP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,GISJOIN,Shape_Leng,Shape_Area,geometry,ELSDLEA,SCSDLEA
0,6,1,600001,Acton-Agua Dulce Unified School District,0,KG,12,G5420,,E,555175100.0,182565.0,34.4509057,-118.2276141,G06000001,110175.64712,555357800.0,"POLYGON ((-1992050.8324 -117279.0507999994, -1...",,
1,6,9,600009,Cuyama Joint Unified School District,0,KG,12,G5420,,E,1864349000.0,20054388.0,34.830101,-119.5209313,G06000009,308308.471429,1884403000.0,"POLYGON ((-2103814.7531 -21629.76970000006, -2...",,
2,6,11,600011,Fort Sage Unified School District,0,KG,12,G5420,,E,682689200.0,5404559.0,39.9744995,-120.090101,G06000011,160781.482128,688093700.0,POLYGON ((-2004728.815400001 552450.2798999995...,,
3,6,13,600013,Rocklin Unified School District,0,KG,12,G5420,,E,48713360.0,99188.0,38.8080407,-121.2503648,G06000013,33017.5203,48812550.0,POLYGON ((-2144286.406199999 436434.3016999997...,,
4,6,14,600014,Hesperia Unified School District,0,KG,12,G5420,,E,358275700.0,4180205.0,34.3616273,-117.33698,G06000014,78801.356019,362455900.0,POLYGON ((-1921037.820800001 -123437.125099999...,,
5,6,16,600016,Upland Unified School District,0,KG,12,G5420,,E,67888480.0,502750.0,34.1463165,-117.6585233,G06000016,42826.143777,68391240.0,POLYGON ((-1970528.456900001 -155252.044500000...,,
6,6,18,600018,Mountain Valley Unified School District,0,KG,12,G5420,,E,1630741000.0,99155.0,40.542254,-123.2406802,G06000018,211362.608546,1630841000.0,"POLYGON ((-2259981.4526 686641.2267000005, -22...",,
7,6,19,600019,Dublin Unified School District,0,KG,12,G5420,,E,35426070.0,0.0,37.715377,-121.9104967,G06000019,37799.106445,35426060.0,"(POLYGON ((-2235995.2443 329544.7997999992, -2...",,
8,6,20,600020,Pleasanton Unified School District,0,KG,12,G5420,,E,102460900.0,391116.0,37.6649739,-121.8867095,G06000020,58631.121814,102852000.0,"POLYGON ((-2229697.0096 327679.7281999998, -22...",,
9,6,21,600021,Sunol Glen Unified School District,0,KG,12,G5420,,E,244323000.0,2809642.0,37.5511338,-121.8054596,G06000021,76479.473027,247132600.0,POLYGON ((-2228584.443600001 317234.8045000006...,,


## Define Helper Functions


In [11]:
def refresh_data():
    '''Loads data from file, undoing any bounding boxes, etc. 
    Great for making a fresh start when plotting a new area.
    
    Also merges geometric data from shapefile DF into full school district DF.
    To do this, first creates a common identifier across these DFs by modifying district 'Qualifying Name' 
    so it lines up with the district shapefile.'''
    
    global charterDF, district_df, publicDF, sdshape, gjson
    
    # Import data:
    #gc.disable()  
    #Note for Jaren, gc is not defined, what is it supposed be. I believe it was about convert_df but it is not working
    charterDF = pd.read_csv(charterfile, low_memory=False) # Charter school data
    #publicDF = pd.read_csv(publicfile, low_memory=False) # Public school data
    #publicDF = convert_df(publicDF) # Make memory-efficient
    districtDF = pd.read_csv(districtfile, encoding='latin-1', low_memory=False) # School district data
    #gc.enable()
    
    # Merge sdshape with districtDF:
    districtDF['NAME'] = districtDF['Qualifying Name'].apply(lambda x: ",".join(x.split(",")[:-1]).strip())
    district_df = pd.merge(districtDF, sdshape, on="NAME") 
    district_df = gpd.GeoDataFrame(district_df)
    district_df = district_df.apply(pd.to_numeric, errors ='ignore')
    district_df.crs = sdshape.crs
    gjson = district_df.to_json()
    #list district_df gives you the column names. 
    
    '''NOTES:
    #district_df and charter_df are geodataframes, publicdf is neither it is some kind of data
    #district df both has the polygons and the total white percentage stuff
    #charterdf has points!
    #district_df.head()
    #whites = district_df[['% Total Population: White Alone']]
    #whites.dtypes
    #type(names)
    #names = names.apply(pd.to_numeric)
    #names.dtypes
    # districtchanged['style'] = [
    #     {'fillColor': '#ff0000', 'weight': 2, 'color': 'black'},
    #     {'fillColor': '#00ff00', 'weight': 2, 'color': 'black'},
    #     {'fillColor': '#0000ff', 'weight': 2, 'color': 'black'}
    # ]
    '''
    
    # Create geometry for charter schools:
    charterDF['geometry'] = charterDF[['LONGCODE', 'LATCODE']].apply(lambda row: Point(row["LONGCODE"], row["LATCODE"]), axis=1) # Use GeoPandas to create Point class for each school
    charterDF = gpd.GeoDataFrame(charterDF)
    charterDF.crs = {'init' :'epsg:4326'}
    charterDF = charterDF.to_crs(sdshape.crs)
    #charterdf has ideology information. 
    
    
    district_df.crs = sdshape.crs
    gjson = district_df.to_json()

In [12]:
def convert_df(df):
    """Makes a Pandas DataFrame more memory-efficient through intelligent use of Pandas data types: 
    specifically, by storing columns with repetitive Python strings not with the object dtype for unique values 
    (entirely stored in memory) but as categoricals, which are represented by repeated integer values. This is a 
    net gain in memory when the reduced memory size of the category type outweighs the added memory cost of storing 
    one more thing. As such, this function checks the degree of redundancy for a given column before converting it."""
    
    converted_df = pd.DataFrame() # Initialize DF for memory-efficient storage of strings (object types)
    # TO DO: Infer dtypes of df
    df_obj = df.select_dtypes(include=['object']).copy() # Filter to only those columns of object data type

    for col in df.columns: 
        if col in df_obj: 
            num_unique_values = len(df_obj[col].unique())
            num_total_values = len(df_obj[col])
            if (num_unique_values / num_total_values) < 0.5: # Only convert data types if at least half of values are duplicates
                converted_df.loc[:,col] = df[col].astype('category') # Store these columns as dtype "category"
            else: 
                converted_df.loc[:,col] = df[col]
        else:    
            converted_df.loc[:,col] = df[col]
                      
    converted_df.select_dtypes(include=['float']).apply(pd.to_numeric,downcast='float')
    converted_df.select_dtypes(include=['int']).apply(pd.to_numeric,downcast='signed')
    
    return converted_df

In [13]:
sdshape.crs

{'proj': 'aea',
 'lat_1': 29.5,
 'lat_2': 45.5,
 'lat_0': 37.5,
 'lon_0': -96,
 'x_0': 0,
 'y_0': 0,
 'datum': 'NAD83',
 'units': 'm',
 'no_defs': True}

In [14]:
refresh_data()
#this takes like a minute or two, be careful. what you do before might be lost.

In [15]:
# print (type(district_df))
#district_df and charter_df are geodataframes, publicdf is neither it is some kind of data
#district df both has the polygons and the total white percentage stuff
#charterdf has points!
#district_df.head()
#whites = district_df[['% Total Population: White Alone']]
#whites.dtypes

#creation of district data easy again
districtchanged = district_df.apply(pd.to_numeric, errors ='ignore')
#type(names)
#names = names.apply(pd.to_numeric)
#names.dtypes
#list(districtchanged)
#need to get only california(or even better sf data)
#districtchanged = district_df
#districtchanged = districtchanged[(districtchanged.iloc[: , 3] == 'ca')]    #İlk üc columna bakıo. 

districtchangedu = districtchanged[['FIPS','Name of Area','% Total Population: White Alone','Shape_Leng','Shape_Area','geometry']]
#districtchangedu = districtchangedu.loc[983: 1500, :]
districtchanged = districtchangedu
# districtchanged['style'] = [
#     {'fillColor': '#ff0000', 'weight': 2, 'color': 'black'},
#     {'fillColor': '#00ff00', 'weight': 2, 'color': 'black'},
#     {'fillColor': '#0000ff', 'weight': 2, 'color': 'black'}
# ]
districtchanged



#try FIPS = 6

Unnamed: 0,FIPS,Name of Area,% Total Population: White Alone,Shape_Leng,Shape_Area,geometry
0,100001.0,"Fort Rucker School District, Alabama",79.90,97610.669919,2.357952e+08,POLYGON ((964471.7861000001 -619835.7846999997...
1,100003.0,"Maxwell AFB School District, Alabama",64.04,14236.382240,9.043613e+06,POLYGON ((900511.9332999997 -524987.7790999999...
2,100005.0,"Albertville City School District, Alabama",91.85,121162.081906,6.899051e+07,(POLYGON ((889477.8247999996 -317969.414699999...
3,100006.0,"Marshall County School District, Alabama",94.18,301456.169769,1.371943e+09,POLYGON ((602420.1983999982 -276312.5311000003...
4,100006.0,"Marshall County School District, Alabama",94.18,629389.578406,1.370790e+09,"(POLYGON ((897668.364599999 -324489.5702, 8976..."
5,100006.0,"Marshall County School District, Alabama",94.18,119941.110936,8.810379e+08,POLYGON ((683498.7168000005 -30136.98530000076...
6,100006.0,"Marshall County School District, Alabama",94.18,125282.440985,8.084751e+08,"POLYGON ((1304935.894200001 382478.6041999999,..."
7,100006.0,"Marshall County School District, Alabama",94.18,187604.621103,9.742443e+08,POLYGON ((834317.7391999997 -160248.6121999994...
8,2103810.0,"Marshall County School District, Kentucky",98.00,301456.169769,1.371943e+09,POLYGON ((602420.1983999982 -276312.5311000003...
9,2103810.0,"Marshall County School District, Kentucky",98.00,629389.578406,1.370790e+09,"(POLYGON ((897668.364599999 -324489.5702, 8976..."


In [16]:
p2 = districtchanged.loc[0, 'geometry']

In [17]:
districtchanged.head()

Unnamed: 0,FIPS,Name of Area,% Total Population: White Alone,Shape_Leng,Shape_Area,geometry
0,100001.0,"Fort Rucker School District, Alabama",79.9,97610.669919,235795200.0,POLYGON ((964471.7861000001 -619835.7846999997...
1,100003.0,"Maxwell AFB School District, Alabama",64.04,14236.38224,9043613.0,POLYGON ((900511.9332999997 -524987.7790999999...
2,100005.0,"Albertville City School District, Alabama",91.85,121162.081906,68990510.0,(POLYGON ((889477.8247999996 -317969.414699999...
3,100006.0,"Marshall County School District, Alabama",94.18,301456.169769,1371943000.0,POLYGON ((602420.1983999982 -276312.5311000003...
4,100006.0,"Marshall County School District, Alabama",94.18,629389.578406,1370790000.0,"(POLYGON ((897668.364599999 -324489.5702, 8976..."


In [18]:
#list(districtcha)
#X X X map charterdf data on districtDF map 
#just charterdf colorfully on to a map. 

#the sooner you get to the tableou ohoto jaren sent, its better. work on getting that.
#for districts color with %total population, white alone. 

##Jaren asked, all 3 district files working and make the folium work. 

# CREATION OF CHARTERDF EASY DATAFRAME
charterDF
charterchanged = charterDF.apply(pd.to_numeric, errors ='ignore')
#comment the next line to draw for all states.
#charterchanged = charterchanged[(charterchanged.iloc[: , 0] == 'CALIFORNIA')]
charterchanged
charterchanged = charterchanged[['LEA_NAME','SCH_NAME','geometry']]
charterchanged
charterchanged5 = charterchanged.head(5)
charterchanged
charterchanged['latitude'] = (charterchanged.geometry.x)
charterchanged['longitude'] = (charterchanged.geometry.y)
charterchanged
charterdropped = charterchanged.dropna()
charterdropped

Unnamed: 0,LEA_NAME,SCH_NAME,geometry,latitude,longitude
0,Lower Kuskokwim School District,Ayaprun Elitnaurvik,POINT (-3698126.569546513 3859820.829485077),-3.698127e+06,3.859821e+06
1,Ketchikan Gateway Borough School District,Ketchikan Charter School,POINT (-2328109.419412659 2408321.007984187),-2.328109e+06,2.408321e+06
2,Ketchikan Gateway Borough School District,Tongass School of Arts and Sciences Charter Sc...,POINT (-2328109.419412659 2408321.007984187),-2.328109e+06,2.408321e+06
3,Anchorage School District,Aquarian Charter School,POINT (-3093389.285215473 3467742.156542414),-3.093389e+06,3.467742e+06
4,Anchorage School District,Family Partnership Charter School,POINT (-3090998.731611759 3466887.320557346),-3.090999e+06,3.466887e+06
5,Anchorage School District,Winterberry School,POINT (-3086881.254226208 3463840.630775834),-3.086881e+06,3.463841e+06
6,Anchorage School District,Eagle Academy Charter School,POINT (-3069435.014155697 3467383.870144491),-3.069435e+06,3.467384e+06
7,Anchorage School District,Frontier Charter School,POINT (-3091945.754119765 3467116.654106261),-3.091946e+06,3.467117e+06
8,Anchorage School District,Highland Tech High Charter School,POINT (-3086095.385439134 3463283.728068264),-3.086095e+06,3.463284e+06
9,Anchorage School District,Rilke Schule Charter School,POINT (-3093326.880683478 3465443.497560237),-3.093327e+06,3.465443e+06


# Convert Charter Charter Data --> GeoJSON File

In [12]:
charterdropped.crs = sdshape.crs
charterdroppedlol = charterdropped.to_crs(epsg='4326')
#cjson = charterdropped.to_json() #contains coordinates in correct format
charterdroppedlol['latitude'] = (charterdroppedlol.geometry.x)
charterdroppedlol['longitude'] = (charterdroppedlol.geometry.y)
charterdroppedlol.head()

Unnamed: 0,LEA_NAME,SCH_NAME,geometry,latitude,longitude
0,Lower Kuskokwim School District,Ayaprun Elitnaurvik,POINT (-161.765194 60.796131),-161.765194,60.796131
1,Ketchikan Gateway Borough School District,Ketchikan Charter School,POINT (-131.641191 55.34700099999997),-131.641191,55.347001
2,Ketchikan Gateway Borough School District,Tongass School of Arts and Sciences Charter Sc...,POINT (-131.641191 55.34700099999997),-131.641191,55.347001
3,Anchorage School District,Aquarian Charter School,POINT (-149.916872 61.19240699999999),-149.916872,61.192407
4,Anchorage School District,Family Partnership Charter School,POINT (-149.876 61.1981),-149.876,61.1981


In [20]:
districtchanged.crs = sdshape.crs
districtchangedlol = districtchanged.to_crs(epsg='4326')

In [21]:
type(districtchangedlol.loc[0, 'geometry'])

shapely.geometry.polygon.Polygon

In [27]:
districtchangedlol.to_pickle("../../jerryshi/districtchangedlol.pkl")

In [None]:
# chartmap = folium.Map([37.7556, -122.4399], zoom_start = 7)
# fg=folium.FeatureGroup(name="Charter schools")
# for index, row in charterchanged.iterrows():
#     fg.add_child(folium.Marker(location=[lat,lon],popup=(folium.Popup(name)),icon=folium.Icon(color=color(elev),icon_color='green')))
# charterchanged.crs = sdshape.crs
# latlong = [xy for xy in charterchanged['geometry']]
# latlong

#IGNORE THIS CODE
charterchanged5.crs = sdshape.crs
charterchanged5 = charterchanged5.to_crs(epsg='4326')
cjson = charterchanged5.to_json()
print(cjson)


#charterchanged5.plot(marker='o', color='blue', markersize=0.5)
# print(charterchanged.crs)
# charterjson = charterchanged.to_json()
# #folium.GeoJson(charterchanged).add_to(chartmap)
# points = folium.features.GeoJson(charterjson)
# # my_marker_cluster = folium.MarkerCluster().add_to(chartmap) 
# # for ix, row in deaths.iterrows(): 
# #     text = "Name: " + row['SCH_NAME'] + "<br>" + "Location: " + str(row['LEA_NAME']) 
# #     popup = folium.Popup(IFrame(text, width=300, height=100)) 
# #     folium.Marker(location = [row['Latitude'],row['Longitude']], popup=popup).add_to(my_marker_cluster)
# charterchanged5['latitude'] = (charterchanged5.geometry.x)
# charterchanged5['longitude'] = (charterchanged5.geometry.y)
# locations  = charterchanged5[['latitude', 'longitude']]
# locationlist = locations.values.tolist()
# print(locationlist)
# charterchanged5
# cmap = folium.Map(location=[37.7556, -122.4399], zoom_start = 7)
# for point in range(0, len(locationlist)):
#     folium.Marker(locationlist[point], popup='is working?')
# cmap
#     #chartmap.add
# chartmap.add_child(points)
# chartmap.save(os.path.join(dir_prefix, 'geospatial/shapefiles/charterschools.html'))
# chartmap

# Convert District Charter Data --> GeoJSON File

In [None]:
#see if we can make shapefiles smaller
s = (districtchanged.iloc[0].geometry).simplify(500, preserve_topology = True)
s
#s = x.simplify(0.05, preserve_topology=False)

In [None]:
first5 = (districtchanged.loc[:4]).copy()
first5.head(7)
print(type(first5.geometry))
geoser = first5.geometry
first5['reducedgeo'] = geoser.simplify(tolerance = 500)

In [16]:
geoser = districtchanged.geometry
districtchanged['reducedgeo'] = geoser.simplify(tolerance = 500)
districtchanged = districtchanged.drop('geometry', axis = 1)

In [17]:
districtchanged.head()

Unnamed: 0,FIPS,Name of Area,% Total Population: White Alone,Shape_Leng,Shape_Area,reducedgeo
0,100001.0,"Fort Rucker School District, Alabama",79.9,97610.669919,235795200.0,POLYGON ((964471.7861000001 -619835.7846999997...
1,100003.0,"Maxwell AFB School District, Alabama",64.04,14236.38224,9043613.0,POLYGON ((900511.9332999997 -524987.7790999999...
2,100005.0,"Albertville City School District, Alabama",91.85,121162.081906,68990510.0,(POLYGON ((889477.8247999996 -317969.414699999...
3,100006.0,"Marshall County School District, Alabama",94.18,301456.169769,1371943000.0,POLYGON ((602420.1983999982 -276312.5311000003...
4,100006.0,"Marshall County School District, Alabama",94.18,629389.578406,1370790000.0,"(POLYGON ((897668.364599999 -324489.5702, 8976..."


In [18]:
districtchanged.columns = ['FIPS','Name of Area', '% Total Population: White Alone', 'Shape_Leng', 'Shape_Area', 'geometry']
districtchanged.head()

Unnamed: 0,FIPS,Name of Area,% Total Population: White Alone,Shape_Leng,Shape_Area,geometry
0,100001.0,"Fort Rucker School District, Alabama",79.9,97610.669919,235795200.0,POLYGON ((964471.7861000001 -619835.7846999997...
1,100003.0,"Maxwell AFB School District, Alabama",64.04,14236.38224,9043613.0,POLYGON ((900511.9332999997 -524987.7790999999...
2,100005.0,"Albertville City School District, Alabama",91.85,121162.081906,68990510.0,(POLYGON ((889477.8247999996 -317969.414699999...
3,100006.0,"Marshall County School District, Alabama",94.18,301456.169769,1371943000.0,POLYGON ((602420.1983999982 -276312.5311000003...
4,100006.0,"Marshall County School District, Alabama",94.18,629389.578406,1370790000.0,"(POLYGON ((897668.364599999 -324489.5702, 8976..."


In [21]:
#districtchanged.crs = {'init': 'epsg:4326'}
districtchanged.crs = district_df.crs
print(districtchanged.crs)
type(districtchanged)
#strr = districtchanged.crs['init'].split(':')[1]
#pyepsg.get(strr)
#gjson = districtchanged.to_json()
# districtchanged.plot()
# f, ax = plt.subplots(1, figsize=(10, 6))
# ax.set_title('White percentage')
# districtchanged.plot(column='% Total Population: White Alone', scheme='fisher_jenks', k=7, 
#                          cmap=plt.cm.Blues, legend=True, ax=ax)
# ax.set_axis_off()
# plt.axis('equal');

{'proj': 'aea', 'lat_1': 29.5, 'lat_2': 45.5, 'lat_0': 37.5, 'lon_0': -96, 'x_0': 0, 'y_0': 0, 'datum': 'NAD83', 'units': 'm', 'no_defs': True}


geopandas.geodataframe.GeoDataFrame

# Choropleth Map 

In [31]:
districtchanged.crs = district_df.crs
charterdropped.crs = sdshape.crs
charterdropped = charterdropped.to_crs(epsg='4326')
cjson = charterdropped.to_json()
mappo = folium.Map([37.7556, -122.4399], zoom_start = 7)

mappo.choropleth(
    geo_data=districtchanged,
    data=districtchanged,
    columns=['FIPS', '% Total Population: White Alone'],
    key_on='feature.properties.FIPS',
    legend_name='White people', 
    fill_color='YlGn',
    fill_opacity=0.4,
    highlight=True) 
points = folium.features.GeoJson(cjson)
mappo.add_child(points)

mappo.save('districtsandchartersCA.html')


In [97]:
mappo = folium.Map([37.7556, -122.4399], zoom_start = 7)

mappo.choropleth(
    geo_data=districtchanged,
    data=districtchanged,
    columns=['FIPS', '% Total Population: White Alone'],
    key_on='feature.properties.FIPS',
    legend_name='White people', 
    fill_color='YlGn',
    fill_opacity=0.4,
    highlight=True) 

mappo.save('districtsUSA.html')


# Working Cluster Method
We changed ordering of coordinates from (x,y) --> (y,x) in locationlist and it helped the cluster method map. Since we were skeptical maybe NaN values in our huge dataset were preventing us from mapping. We took a small subset, copy pasted just 3 coordinate sets and assigned them to a variable loca.. We used loca to see if cluster method below will work. It seems the reordering of longitude and latitude helped. Maybe they were mislabeled in data handling? 

In [19]:
locations = charterdroppedlol[['longitude','latitude']]
locationlist = locations.values.tolist()
locationlist

[[60.796131, -161.76519399999998],
 [55.34700099999997, -131.64119100000002],
 [55.34700099999997, -131.64119100000002],
 [61.19240699999999, -149.91687199999996],
 [61.198100000000004, -149.876],
 [61.194449999999975, -149.791641],
 [61.319213000000005, -149.579442],
 [61.194914000000026, -149.891223],
 [61.19395799999999, -149.77575900000005],
 [61.173154000000004, -149.895616],
 [61.217660000000016, -149.810421],
 [58.30278000000004, -134.407219],
 [60.55409100000002, -151.25400599999998],
 [59.643600000000006, -151.56784],
 [60.49207599999997, -151.068568],
 [60.568277, -151.27914299999998],
 [61.60666900000002, -149.102464],
 [61.58262000000003, -149.634475],
 [61.58184000000001, -149.63480099999998],
 [61.614509999999996, -149.449061],
 [61.61234, -149.27624],
 [61.59876999999998, -149.27201100000002],
 [64.54245299999998, -165.40546999999998],
 [64.81964100000003, -147.752261],
 [64.76390999999995, -147.34792900000002],
 [64.84782699999997, -147.840318],
 [64.82704999999999, -14

In [22]:
mapclus = folium.Map(location=[37.7556, -122.4399], zoom_start=7)

marker_cluster = MarkerCluster().add_to(mapclus)

for point in range(0, len(locationlist)):
    folium.Marker(locationlist[point], popup="hi").add_to(marker_cluster)

mapclus.choropleth(
    geo_data=districtchanged,
    data=districtchanged,
    columns=['FIPS', '% Total Population: White Alone'],
    key_on='feature.properties.FIPS',
    legend_name='White people', 
    fill_color='YlGn',
    fill_opacity=0.4,
    highlight=True) 

mapclus.save('clusterusawithdistrictsfinal.html')


In [28]:
#Clean the data(remove Nans, worst case shorten,  )

https://python-graph-gallery.com/292-choropleth-map-with-folium/ <br>
https://github.com/skoeb/CO-EV-Rate-Map/blob/master/.ipynb_checkpoints/folium%20COEV%20Rates-checkpoint.ipynb <br>
http://nbviewer.jupyter.org/gist/BibMartin/affe53e6e577167d8de2 <br>