**PRELIMINARY CLEANING AND PROCESSING OF DATA FROM OFSTED**

- Obtain locations of all 'Outstanding' rated Ofsted schools with London postcodes.

- Create a 500m radius around each school to represent a realistic catchment area, based on 'distance travelled' data.

In [1]:
# import OFSTED data and select relevant columns

import pandas as pd
import geopandas as gpd

schools_all = pd.read_csv('../../../big_files/state_schools_ofsted.csv', low_memory=False, header = 1)
cols = ['School name','Region', 'Ofsted region', 'Postcode', 'Overall effectiveness', 'Ofsted phase']
schools = schools_all[cols]

In [6]:
# assumption - parents don't move in order to get into Pupil Referral Units, and Special Measures are given priority - also no need to move

schools['Ofsted phase'].value_counts()

Primary      16787
Secondary     3399
Special       1050
Nursery        389
PRU            348
Name: Ofsted phase, dtype: int64

In [None]:
# filter by establishment type, location, and outstanding rating

london_southeast = schools[(schools['Region'] == 'London') | (schools['Region'] == 'South East')]
primary_secondary = london_southeast[(london_southeast['Ofsted phase'] == 'Primary') | (london_southeast['Ofsted phase'] == 'Secondary')]
outstanding = primary_secondary[primary_secondary['Overall effectiveness'] == 1]
outstanding = outstanding.rename(columns = {'Postcode': 'postcode'})

In [None]:
# import list of london postodes with latitude and longitude coordinates, and merge with outstanding schools data
# convert to geo dataframe, set CRS, select only postcode and geometry cols

london_postcodes = pd.read_csv('../../../big_files/london_postcodes.csv')
outstanding_postcodes = pd.merge(outstanding, london_postcodes, how = 'inner', on = 'postcode')
schools_geo = gpd.GeoDataFrame(outstanding_postcodes, geometry = gpd.points_from_xy(outstanding_postcodes['longitude'], outstanding_postcodes['latitude']))
schools_geo = schools_geo.set_crs(epsg= 4326)
schools_geo = schools_geo.to_crs(epsg= 27700)
new_cols = ['postcode', 'geometry']
final_schools = schools_geo[new_cols]

In [17]:
# copy dataframe and establish a 500m buffer around each school - realistic distance to get into oversubscribed, outstanding school

schools_500m = final_schools.copy()
schools_500m['geometry'] = schools_500m['geometry'].buffer(500)
schools_500m = schools_500m.rename(columns = {'postcode': 'school_postcode'})

In [19]:
schools_500m.to_file('../../../big_files/schools_500m.shp')