In [1]:
import json
import pandas as pd
import numpy as np
import folium
import geopy.distance
import matplotlib as mpl
import branca

In [2]:
with open('data/districts_geojson.json', 'r') as file:
    districts = json.load(file)
with open('data/district_name_references.json', 'r') as file:
    district_name_references = json.load(file)
with open('data/district_names.json', 'r') as file:
    district_names = json.load(file)

In [3]:
# Restrict geo data to only London districts
districts_dicts = districts['features']
london_districts = [district for district in districts_dicts if district['properties']['name'] in district_names]
london_districts_gj = {'type': 'FeatureCollection', 'features':london_districts}

In [4]:
school_data = pd.read_csv('data/all_schools.csv', parse_dates=['last_report_date'])
school_data['school_id'] = school_data['link'].apply(lambda x: x.split('/')[-2]+'_'+x.split('/')[-1])
# Add school geo coordinates
school_geo_data = pd.read_csv('data/school_geo_data.csv')
school_data = pd.merge(school_geo_data,school_data,on=['school_id'],how='left')
# Infer district from address
school_data['district'] = school_data['address'].str.split().str[-2]
# We have schools from outside of London. Remove them.
school_data = school_data[school_data['district'].isin(district_names)]
rating_mapping = {'Outstanding':4, 'Good':3, 'Requires Improvement':2, 'Inadequate':1}
school_data['numerical_rating'] = school_data['rating'].map(rating_mapping)
school_data['is_outstanding'] = school_data['rating'] == 'Outstanding'
# Add weighted school rating based on all inspections
weighted_scores = pd.read_csv('data/weighted_scores.csv')
school_data = pd.merge(school_data, weighted_scores, how='left')

## School Ratings by District
Most independent schools don't have a rating, but some have do. I only include schools that have rating.

In [5]:
def create_map_data(school_data, london_districts_gj):
    district_rating_data = school_data.groupby(['district'],as_index=False).agg({'numerical_rating':['mean','count'],'is_outstanding':'sum','weighted_score':'mean'})
    district_rating_data.columns = ['district','mean_current_rating','n_rated_schools','n_outstanding_schools','mean_weighted_rating']
    district_rating_data['share_of_outstanding_schools'] = district_rating_data['n_outstanding_schools']/district_rating_data['n_rated_schools']
    district_rating_data.loc[district_rating_data['n_rated_schools'] == 0, 'share_of_outstanding_schools'] = np.NaN
    district_rating_data.sort_values(by='mean_current_rating',ascending=False)
    
    # Update london_counties_gj with properties
    for district in london_districts_gj['features']:
        district['properties']['District Name'] = district_name_references[district['properties']['name']]
        if district_rating_data[district_rating_data['district'] == district['properties']['name']].shape[0] == 1:
            district['properties']['Average Current School Score'] = round(district_rating_data[district_rating_data['district'] == district['properties']['name']]['mean_current_rating'].item(),2)
            district['properties']['Number of Rated Schools'] = district_rating_data[district_rating_data['district'] == district['properties']['name']]['n_rated_schools'].item()
            district['properties']['Share of Outstanding Schools'] = f"{round(100*district_rating_data[district_rating_data['district'] == district['properties']['name']]['share_of_outstanding_schools'].item(),0):.0f}%"
            district['properties']['Average Weighted Historical School Score'] = round(district_rating_data[district_rating_data['district'] == district['properties']['name']]['mean_weighted_rating'].item(),2)
        else:
            district['properties']['Average Current School Score'] = ''
            district['properties']['Number of Rated Schools'] = ''
            district['properties']['Share of Outstanding Schools'] = ''
            district['properties']['Average Weighted Historical School Score'] = ''
    return district_rating_data, london_districts_gj

In [6]:
def create_district_map(school_data, london_districts_gj, map_name, save_name=None):
    rating_by_district, london_districts_gj = create_map_data(school_data, london_districts_gj)

    max_n_rated_schools = rating_by_district['n_rated_schools'].max()
    if max_n_rated_schools > 25:
        rated_school_bins = (0, 1, 3, 5, 7, 10, 15, 25, max_n_rated_schools)
    else:
        rated_school_bins = (0, 1, 3, 5, 7, 10, 15, max_n_rated_schools)
    m = folium.Map(tiles=None,location=[51.53, -0.1], zoom_start = 10.5)
    base_layer = folium.TileLayer(tiles='cartodbpositron',name='Background Map').add_to(m)

    current_ratings = folium.Choropleth(
        geo_data=london_districts_gj, 
        name='Average Current School Score', 
        data=rating_by_district, 
        columns=['district','mean_current_rating'],
        key_on='feature.properties.name', 
        fill_color='RdYlGn', 
        fill_opacity=0.7, 
        line_opacity=0.2,
        legend_name='Average Current School Score',
        nan_fill_color="white",
        highlight=True,
    ).add_to(m)


    weighted_ratings = folium.Choropleth(
        geo_data=london_districts_gj, 
        name='Average Weighted Historical School Score', 
        data=rating_by_district, 
        columns=['district','mean_weighted_rating'],
        key_on='feature.properties.name', 
        fill_color='RdYlGn', 
        fill_opacity=0.7, 
        line_opacity=0.2,
        legend_name='Average Weighted Historical School Score',
        nan_fill_color="white",
        highlight=True,
        show=False
    ).add_to(m)

    num_rated_schools = folium.Choropleth(
        geo_data=london_districts_gj, 
        name='Number of Rated Schools', 
        data=rating_by_district, 
        columns=['district','n_rated_schools'],
        key_on='feature.properties.name', 
        fill_color='Blues', 
        fill_opacity=0.7, 
        line_opacity=0.2,
        legend_name='Number of Rated Schools',
        nan_fill_color="white",
        bins=rated_school_bins,
        highlight=True,
        show=False,
    ).add_to(m)


    share_outstanding_schools = folium.Choropleth(
        geo_data=london_districts_gj, 
        name='Share of Outstanding Schools', 
        data=rating_by_district, 
        columns=['district','share_of_outstanding_schools'],
        key_on='feature.properties.name', 
        fill_color='Greens', 
        fill_opacity=0.7, 
        line_opacity=0.2,
        legend_name='Share of Outstanding Schools',
        nan_fill_color="white",
        highlight=True,
        show=False,
    ).add_to(m)

    features_info = ['District Name', 'Average Current School Score','Average Weighted Historical School Score', 'Number of Rated Schools','Share of Outstanding Schools']
    folium.GeoJsonTooltip(features_info).add_to(current_ratings.geojson)
    folium.GeoJsonTooltip(features_info).add_to(num_rated_schools.geojson)
    folium.GeoJsonTooltip(features_info).add_to(share_outstanding_schools.geojson)
    folium.GeoJsonTooltip(features_info).add_to(weighted_ratings.geojson)


    folium.LayerControl(collapsed=False).add_to(m)
    legend_html = """
    <div style="
        display: flex;
        justify-content: center;
        align-items: center;
        text-align: center;     
        font-size:18px;
        font-weight: bold;
        ">{}</div> """.format(map_name)
    m.get_root().html.add_child(folium.Element( legend_html ))
    if save_name:
        m.save(f'maps/district/{save_name}.html')
    return m

In [7]:
create_district_map(school_data, london_districts_gj, 'London All Primary School Ratings by District', save_name='all_primary_schools')

In [8]:
nr_data = school_data[~school_data['is_religious']].copy()
create_district_map(nr_data, london_districts_gj, 'London Non-Religious Primary School Ratings by District', save_name='non_religious_primary_schools')

In [9]:
nr_pub_data = school_data[(~school_data['is_religious'])&(school_data['category'] == 'Primary')].copy()
create_district_map(nr_pub_data, london_districts_gj, 'London Non-Religious State Primary School Ratings by District', save_name='state_primary_schools')

## Currently outstanding school analysis
Clustered mean that there are multiple schools in a given doubled radius.

In [10]:
print(f"Overall proportion of currently outstanding schools: {(school_data[school_data['is_outstanding']].shape[0]/school_data.shape[0]*100):.1f}%")

Overall proportion of currently outstanding schools: 23.6%


In [11]:
school_colors = {
    'Residential Special School':'yellow',
    'Boarding School':'grey', 
    'Primary':'blue', 
    'Pupil Referral Unit':'yellow',
    'Special School':'yellow',
    'Independent School':'green', 
    'Other schools':'yellow',
}
religious_colors = {
    True: 'red',
    False: 'black'
}

In [12]:
def create_neighborhood_school_map(school_data, radius, map_name, save_name=None):
    m = folium.Map(tiles=None,location=[51.53, -0.1], zoom_start = 10.5)
    base_layer = folium.TileLayer(tiles='cartodbpositron',name='Background Map').add_to(m)

    lattitudes = school_data['lattitude'].to_list()
    longitudes = school_data['longitude'].to_list()
    categories = school_data['category'].to_list()
    school_names = school_data['school_name'].to_list()
    is_religious = school_data['is_religious'].to_list()
    for i, coord in enumerate(zip(lattitudes, longitudes)):
        folium.Circle(coord, color=religious_colors[is_religious[i]],weight=2, radius=radius, fill_opacity=0.2, fill_color=school_colors[categories[i]],popup="{} meters".format(radius),
        tooltip=f"{school_names[i]}",).add_to(m)
    legend_html = """
    <div style="
        display: flex;
        justify-content: center;
        align-items: center;
        text-align: center;     
        font-size:18px;
        font-weight: bold;
        ">{}</div> """.format(map_name)
    m.get_root().html.add_child(folium.Element( legend_html ))
    if save_name:
        m.save(f'maps/individual/{save_name}.html')
    return m

In [13]:
def find_clustered_schools(school_data, radius):
    lattitudes = school_data['lattitude'].to_list()
    longitudes = school_data['longitude'].to_list()
    distances = np.zeros((len(lattitudes),len(lattitudes)))
    for i, coord_i in enumerate(zip(lattitudes, longitudes)):
        for j, coord_j in enumerate(zip(lattitudes, longitudes)):
            if i == j:
                distances[i][j] = 50_000 
            else:
                distances[i][j] = geopy.distance.geodesic(coord_i, coord_j).m
    min_distances = distances.min(axis=1)

    clustered_schools = school_data[min_distances < 2*radius].copy()
    return clustered_schools

In [14]:
outstanding_schools = school_data[school_data['is_outstanding']&(school_data['category'] != 'Online education provider')].copy()
radius = 300
create_neighborhood_school_map(outstanding_schools, 
                               radius, 
                               f'London Outstanding Primary Schools by Religious Status and by School Type with Radius = {radius}m',
                               save_name=f'all_primary_schools_{radius}m_radius_current_rating'
                              )

In [15]:
clustered_outstanding_schools = find_clustered_schools(outstanding_schools, radius)
create_neighborhood_school_map(clustered_outstanding_schools, 
                               radius, 
                               f'London Clustered Outstanding Primary Schools by Religious Status and by School Type with Radius = {radius}m',
                               save_name=f'all_clustered_primary_schools_{radius}m_radius_current_rating'
                              )

In [16]:
radius = 500
nr_pub_outstanding_schools = school_data[(~school_data['is_religious'])&school_data['is_outstanding']&(school_data['category'] == 'Primary')].copy()
clustered_nr_pub_outstanding_schools = find_clustered_schools(nr_pub_outstanding_schools, radius)
create_neighborhood_school_map(clustered_nr_pub_outstanding_schools, 
                               radius, 
                               f'London Clustered Non-Religious State Outstanding Primary Schools with Radius = {radius}m',
                               save_name=f'state_primary_clustered_schools_{radius}m_radius_current_rating'
                              )

In [17]:
def create_neighborhood_school_map_weighted_score(
    school_data, 
    radius, 
    map_name, 
    color_maps, 
    min_score, 
    max_score=4, 
    save_name=None):

    m = folium.Map(tiles='cartodbpositron',location=[51.53, -0.1], zoom_start = 11)
    lattitudes = school_data['lattitude'].to_list()
    longitudes = school_data['longitude'].to_list()
    categories = school_data['category'].to_list()
    school_names = school_data['school_name'].to_list()
    is_religious = school_data['is_religious'].to_list()
    ratings = school_data['weighted_score'].to_list()
    for i, coord in enumerate(zip(lattitudes, longitudes)):
        colormap = color_maps[categories[i]]
        colormap.caption = f'Weighted Rating {categories[i]}'
        colormap.add_to(m)
        fill_color = colormap(ratings[i])
        folium.Circle(coord, 
                      color=religious_colors[is_religious[i]],
                      weight=2, 
                      radius=radius, 
                      fill_opacity=0.6, 
                      fill_color=fill_color,
                      popup=f"{school_names[i]}\n{ratings[i]:.2f}",
                      tooltip=f"{school_names[i]}\n{ratings[i]:.2f}",
                     ).add_to(m)
    legend_html = """
    <div style="
        display: flex;
        justify-content: center;
        align-items: center;
        text-align: center;     
        font-size:18px;
        font-weight: bold;
        ">{}</div> """.format(map_name)
    m.get_root().html.add_child(folium.Element( legend_html ))
    if save_name:
        m.save(f'maps/individual/{save_name}.html')
    return m

In [18]:
MIN_SCORE = 3.5
MAX_SCORE = 4
school_cmaps = {
    'Residential Special School': branca.colormap.linear.YlOrRd_03.scale(MIN_SCORE,MAX_SCORE),
    'Boarding School': branca.colormap.linear.Greens_05.scale(MIN_SCORE,MAX_SCORE), 
    'Primary': branca.colormap.linear.Blues_09.scale(MIN_SCORE,MAX_SCORE), 
    'Pupil Referral Unit': branca.colormap.linear.YlOrRd_03.scale(MIN_SCORE,MAX_SCORE),
    'Special School': branca.colormap.linear.YlOrRd_03.scale(MIN_SCORE,MAX_SCORE),
    'Independent School': branca.colormap.linear.Greens_05.scale(MIN_SCORE,MAX_SCORE), 
    'Other schools': branca.colormap.linear.Purples_08.scale(MIN_SCORE,MAX_SCORE),
}
radius = 300
map_name = f'London All Primary Schools with Weighted Score > {MIN_SCORE:.1f} and Radius = {radius}m'
outstanding_schools = school_data[(school_data['weighted_score']>MIN_SCORE)].copy()
create_neighborhood_school_map_weighted_score(outstanding_schools, 
                                              radius, 
                                              map_name, 
                                              school_cmaps, 
                                              MIN_SCORE,
                                              save_name=f'all_primary_schools_{radius}m_radius_weighted_rating')

In [19]:
map_name = f'London Non-Religious State Primary Schools with Weighted Score > {MIN_SCORE:.1f} and Radius = {radius}m'
nr_pub_outstanding_schools = school_data[(school_data['weighted_score']>MIN_SCORE)&(~school_data['is_religious'])&(school_data['category']=='Primary')].copy()
create_neighborhood_school_map_weighted_score(nr_pub_outstanding_schools, 
                                              radius, 
                                              map_name, 
                                              school_cmaps, 
                                              MIN_SCORE,
                                              save_name=f'state_primary_schools_{radius}m_radius_weighted_rating'
                                             )

In [20]:
radius = 500
map_name = f'London Clustered Non-Religious State Primary Schools with Weighted Score > {MIN_SCORE:.1f} and Radius = {radius}m'
nr_pub_outstanding_schools = school_data[(school_data['weighted_score']>MIN_SCORE)&(~school_data['is_religious'])&(school_data['category']=='Primary')].copy()
clustered_nr_pub_outstanding_schools = find_clustered_schools(nr_pub_outstanding_schools, radius)
create_neighborhood_school_map_weighted_score(clustered_nr_pub_outstanding_schools, 
                                              radius, 
                                              map_name, 
                                              school_cmaps, 
                                              MIN_SCORE,
                                              save_name=f'state_clustered_primary_schools_{radius}m_radius_weighted_rating'
                                             )