### Import library

In [None]:
import pandas as pd

### Read Elmbridge Postcodes

In [None]:
# Read postcodes
postcodes_df = pd.read_csv('Elmbridge Postcodes.csv')

### Create dictionary to map Postcodes to Wards

In [None]:
# Create dict
d = dict(zip(postcodes_df['Postcode'], postcodes_df['Ward']))

### Map all parks to wards

In [None]:
# Read parks
parks_df = pd.read_excel('parks clean.xlsx')

# Get wards for park
parks_df['Ward'] = parks_df['ZIP Code'].map(d)

parks_df = parks_df.dropna(subset = 'Ward')
parks_df = parks_df[['name', 'latitude', 'longitude', 'category', 'ZIP Code', 'Ward']]
parks_df.to_excel('C:/Users/Alexander.Lee/OneDrive - Community Solutions/Desktop/parks.xlsx')
parks_df

### Map all schools to wards

In [None]:
# Read schools
schools_df = pd.read_excel('schools clean.xlsx')

# Get wards for schools
schools_df['Ward'] = schools_df['Postcode'].map(d)

schools_df = schools_df[['ID', 'Postcode', 'Primary', 'Secondary', 'Post 16', 'Gender', 'Rating', 'Postcode', 'Ward']].copy()
schools_df['Rating'] = schools_df['Rating'].replace(['Outstanding', 'Good', 'Requires improvement', 'Serious Weaknesses', 'Special Measures'], [1, 2, 3, 4, 5])
schools_df

### Map all services to wards

In [None]:
# Read services
services_df = pd.read_excel('services.xlsx')

# Get wards for services
services_df['Ward'] = services_df['Postcode'].map(d)

# Clean columns
services_df = services_df[['Services', 'Specialisms/services', 'Latitude', 'Longitude', 'Postcode', 'Ward']].copy()

services_df

### Map crimes to wards

In [None]:
# Read crimes
crimes_df = pd.read_excel('C:/Users/Alexander.Lee/OneDrive - Community Solutions/Desktop/crimes.xlsx')

### Classify crimes by type (high danger or low danger)

In [None]:
# Read police mapping
crime_map = pd.read_excel('C:/Users/Alexander.Lee/OneDrive - Community Solutions/Desktop/School/Capstone/crime map.xlsx')

In [None]:
# Get ward of crime
crimes_df['Ward'] = crimes_df['ZIP Code'].map(d)

# Fill in NA type
crimes_df['Crime type'] = crimes_df['Crime type'].fillna('Crime Type N/A')

# Create dict
crime_type_map = dict(zip(crime_map['Row Labels in our Dataset'].str.split(':').str[1].str.split("'").str[0].str.strip(), crime_map['Category']))

# Get type of crime for each crime
crimes_df['Severity'] = crimes_df['Crime type'].map(crime_type_map)

### Get number of parks, number of schools by type, number of social services for each ward

In [None]:
df = pd.DataFrame(columns = ['Ward', 'Number of Parks', 'Number of Primary Schools', 'Primary School AVG Rating', 
                             'Number of Secondary Schools', 'Secondary School AVG Rating', 'Number of Post 16 Schools', 
                             'Post 16 School AVG Rating', 'Number of Healthcare Services', 'Number of Dentistries'])

wards = list(set(d.values()))

for i in range(len(wards)):
    
    ward = wards[i]
    number_of_parks = len(parks_df.loc[parks_df['Ward'] == wards[i]])
    number_primary = len(schools_df.loc[(schools_df['Primary'] == 1) & (schools_df['Ward'] == wards[i])])
    primary_rating = schools_df.loc[(schools_df['Primary'] == 1) & (schools_df['Ward'] == wards[i])].dropna(subset = 'Rating')['Rating'].mean()
    number_secondary = len(schools_df.loc[(schools_df['Secondary'] == 1) & (schools_df['Ward'] == wards[i])]) 
    secondary_rating = schools_df.loc[(schools_df['Secondary'] == 1) & (schools_df['Ward'] == wards[i])].dropna(subset = 'Rating')['Rating'].mean()
    number_post_16 = len(schools_df.loc[(schools_df['Post 16'] == 1) & (schools_df['Ward'] == wards[i])])
    post_16_rating = schools_df.loc[(schools_df['Post 16'] == 1) & (schools_df['Ward'] == wards[i])].dropna(subset = 'Rating')['Rating'].mean()
    number_of_healthcare = len(services_df.loc[(services_df['Services'] == 'Doctors/GPs') & (services_df['Ward'] == wards[i])])
    number_of_dentists = len(services_df.loc[(services_df['Services'] == 'Dentist') & (services_df['Ward'] == wards[i])])
    
    data = [ward, number_of_parks, number_primary, primary_rating, number_secondary, secondary_rating, number_post_16,
            post_16_rating, number_of_healthcare, number_of_dentists]
    df.loc[len(df.index) + 1] = data
    df = df.fillna(-1)

### Read in pricing data

In [None]:
# Read in pricing data
one_year_df = pd.read_excel('data_1year_features.xlsx')
three_year_df = pd.read_excel('data_3year_features.xlsx')
five_year_df = pd.read_excel('data_5year_features.xlsx')

### Merge with ward features

In [None]:
# Merge other features based on ward
one_year_df = one_year_df.merge(df, on = 'Ward', how = 'left')
three_year_df = three_year_df.merge(df, on = 'Ward', how = 'left')
five_year_df = five_year_df.merge(df, on = 'Ward', how = 'left')

### Setup crimes table by type

In [None]:
# Count high severity crimes
high_crimes_df = crimes_df.loc[crimes_df['Severity'] == 'High']
low_crimes_df = crimes_df.loc[crimes_df['Severity'] == 'Low']

### Count number of crimes in each ward by type past year

In [None]:
high_crimes_df = crimes_df.loc[crimes_df['Severity'] == 'High']
low_crimes_df = crimes_df.loc[crimes_df['Severity'] == 'Low']

one_year_df['Number of High Severity Crimes Previous Year'] = 0
one_year_df['Number of Low Severity Crimes Previous Year'] = 0

for i in range(len(one_year_df)):
    year = one_year_df['Year'][i]
    ward = one_year_df['Ward'][i]
    one_year_df['Number of High Severity Crimes Previous Year'][i] = len(high_crimes_df.loc[(high_crimes_df['Year'] == year - 1) & (high_crimes_df['Ward'] == ward)])
    one_year_df['Number of Low Severity Crimes Previous Year'][i] = len(low_crimes_df.loc[(low_crimes_df['Year'] == year - 1) & (low_crimes_df['Ward'] == ward)])

In [None]:
one_year_df.to_excel('one_year_features.xlsx')

### Count number of crimes in each ward by type past 3 years

In [None]:
three_year_df['Number of High Severity Crimes Previous 3 Years'] = 0
three_year_df['Number of Low Severity Crimes Previous 3 Years'] = 0


for i in range(len(three_year_df)):
    year = three_year_df['Year'][i]
    ward = three_year_df['Ward'][i]
    ward_high_crimes = high_crimes_df.loc[high_crimes_df['Ward'] == ward]
    ward_low_crimes = low_crimes_df.loc[low_crimes_df['Ward'] == ward]
    
    three_year_df['Number of High Severity Crimes Previous 3 Years'][i] = len(ward_high_crimes.loc[(ward_high_crimes['Year'] == year - 1) | (ward_high_crimes['Year'] == year - 2) | (ward_high_crimes['Year'] == year - 3)])
    three_year_df['Number of Low Severity Crimes Previous 3 Years'][i] = len(ward_low_crimes.loc[(ward_low_crimes['Year'] == year - 1) | (ward_low_crimes['Year'] == year - 2) | (ward_low_crimes['Year'] == year - 3)])

In [None]:
three_year_df.to_excel('three_year_features.xlsx')

### Count number of crimes in each ward by type past 5 years

In [None]:
five_year_df['Number of High Severity Crimes Previous 5 Years'] = 0
five_year_df['Number of Low Severity Crimes Previous 5 Years'] = 0


for i in range(len(five_year_df)):
    year = five_year_df['Year'][i]
    ward = five_year_df['Ward'][i]
    ward_high_crimes = high_crimes_df.loc[high_crimes_df['Ward'] == ward]
    ward_low_crimes = low_crimes_df.loc[low_crimes_df['Ward'] == ward]
    
    five_year_df['Number of High Severity Crimes Previous 5 Years'][i] = len(ward_high_crimes.loc[(ward_high_crimes['Year'] == year - 1) | (ward_high_crimes['Year'] == year - 2) | (ward_high_crimes['Year'] == year - 3) | (ward_high_crimes['Year'] == year - 4) | (ward_high_crimes['Year'] == year - 5)])
    five_year_df['Number of Low Severity Crimes Previous 5 Years'][i] = len(ward_low_crimes.loc[(ward_low_crimes['Year'] == year - 1) | (ward_low_crimes['Year'] == year - 2) | (ward_low_crimes['Year'] == year - 3) | (ward_low_crimes['Year'] == year - 4) | (ward_low_crimes['Year'] == year - 5)])

In [None]:
five_year_df.to_excel('five_year_features.xlsx')