# Classification of Flood&Crime Dataset  Using KNN
## Training dataset = School Ratings Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# read school data file and make into a DataFrame
schools_file = pd.read_csv('Resources/Schools_Ratings_Harris_Co.csv')
schools_harris = pd.DataFrame(schools_file)
schools_harris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 972 entries, 0 to 971
Data columns (total 52 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                972 non-null    int64  
 1   School.ID                 972 non-null    int64  
 2   School.Name               972 non-null    object 
 3   Street.Address            972 non-null    object 
 4   CITY                      972 non-null    object 
 5   ZIP                       972 non-null    float64
 6   County                    972 non-null    object 
 7   District.Name             972 non-null    object 
 8   School.Type               972 non-null    object 
 9   Charter.Trad              972 non-null    object 
 10  Grade.Current             972 non-null    object 
 11  TEA Grade                 972 non-null    object 
 12  GoldRibbon                68 non-null     float64
 13  PEG                       972 non-null    object 
 14  County_Ran

In [3]:
# convert TEA grades to numbers, A=95, B=85, C=75, D=65, F=55
schools_harris["School Rating"] = ""
schools_harris.loc[schools_harris["TEA Grade"]=="A", "School Rating"]=95
schools_harris.loc[schools_harris["TEA Grade"]=="B", "School Rating"]=85
schools_harris.loc[schools_harris["TEA Grade"]=="C", "School Rating"]=75
schools_harris.loc[schools_harris["TEA Grade"]=="D", "School Rating"]=65
schools_harris.loc[schools_harris["TEA Grade"]=="F", "School Rating"]=55

In [4]:
# read file with residential addresses and make into a DataFrame
res_file = pd.read_csv('Resources/flood_crime_hcad_V2.csv')
test_df = pd.DataFrame(res_file)
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26406 entries, 0 to 26405
Data columns (total 29 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             26406 non-null  int64  
 1   Address                26406 non-null  object 
 2   Latitude               26406 non-null  float64
 3   Longitude              26406 non-null  float64
 4   Flood Description      21794 non-null  object 
 5   Flood Zone             26406 non-null  object 
 6   Flood Risk             26406 non-null  int64  
 7   SITE_ADDR_1            26406 non-null  object 
 8   ACCOUNT                26406 non-null  int64  
 9   TAX_YEAR               26406 non-null  int64  
 10  SITE_ADDR_2            26406 non-null  object 
 11  SITE_ADDR_3            26406 non-null  int64  
 12  STATE_CLASS            26406 non-null  object 
 13  SCHOOL_DIST            26406 non-null  int64  
 14  NEIGHBORHOOD_CODE      26406 non-null  float64
 15  NE

## Zoning for elementary public schools

In [5]:
# training dataset for elementary public schools
elem_pub = schools_harris.loc[
    (schools_harris['School.Type']=='Elementary')&(schools_harris['Charter.Trad']=='Traditional Public School'),:]

In [6]:
# elementary public schools classifier
elem_pub_classifier = KNeighborsClassifier(n_neighbors=1)
elem_pub_classifier.fit(elem_pub.loc[:,["latitude","longitude"]],elem_pub["School.ID"])

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [7]:
# elementary public schools classification results
elem_pub_pred = elem_pub_classifier.predict(test_df.loc[:,['Latitude','Longitude']])

## Zoning for elementary charter schools

In [8]:
# training dataset for elementary charter schools
elem_charter = schools_harris.loc[
    (schools_harris['School.Type']=='Elementary')&(schools_harris['Charter.Trad']=='Charter'),:]

In [9]:
# elementary charter schools classifier
elem_char_classifier = KNeighborsClassifier(n_neighbors=1)
elem_char_classifier.fit(elem_charter.loc[:,["latitude","longitude"]],elem_charter["School.ID"])

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [10]:
# elementary charter schools classification results
elem_char_pred = elem_char_classifier.predict(test_df.loc[:,['Latitude','Longitude']])

## Zoning for public middle schools

In [11]:
# training dataset for public middle schools
middle_pub = schools_harris.loc[
    (schools_harris['School.Type']=='Middle')&(schools_harris['Charter.Trad']=='Traditional Public School'),:]

In [12]:
# public middle schools classifier
middle_pub_classifier = KNeighborsClassifier(n_neighbors=1)
middle_pub_classifier.fit(middle_pub.loc[:,["latitude","longitude"]],middle_pub["School.ID"])

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [13]:
# public middle schools classification results
middle_pub_pred = middle_pub_classifier.predict(test_df.loc[:,['Latitude','Longitude']])

## Zoning for charter middle schools

In [14]:
# training dataset for charter middle schools
middle_charter = schools_harris.loc[
    (schools_harris['School.Type']=='Middle')&(schools_harris['Charter.Trad']=='Charter'),:]

In [15]:
# charter middle schools classifier
middle_char_classifier = KNeighborsClassifier(n_neighbors=1)
middle_char_classifier.fit(middle_charter.loc[:,["latitude","longitude"]],middle_charter["School.ID"])

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [16]:
# charter middle schools classification results
middle_char_pred = middle_char_classifier.predict(test_df.loc[:,['Latitude','Longitude']])

## Zoning for public high schools

In [17]:
# training dataset for public high schools
high_pub = schools_harris.loc[
    (schools_harris['School.Type']=='High')&(schools_harris['Charter.Trad']=='Traditional Public School'),:]

In [18]:
# public high schools classifier
high_pub_classifier = KNeighborsClassifier(n_neighbors=1)
high_pub_classifier.fit(high_pub.loc[:,["latitude","longitude"]],high_pub["School.ID"])

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [19]:
# public high schools classification results
high_pub_pred = high_pub_classifier.predict(test_df.loc[:,['Latitude','Longitude']])

## Zoning for charter high schools

In [20]:
# training dataset for charter high schools
high_charter = schools_harris.loc[
    (schools_harris['School.Type']=='High')&(schools_harris['Charter.Trad']=='Charter'),:]

In [21]:
# charter high schools classifier
high_char_classifier = KNeighborsClassifier(n_neighbors=1)
high_char_classifier.fit(high_charter.loc[:,["latitude","longitude"]],high_charter["School.ID"])

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [22]:
# charter high schools classification results
high_char_pred = high_char_classifier.predict(test_df.loc[:,['Latitude','Longitude']])

## Adding classification and school ratings to main dataframe

In [23]:
# add columns to the test dataset to host each classification, school name and rating
test_df["Elementary Public School ID"] = elem_pub_pred
test_df["Elementary Public School Name"] = ""
test_df["Elementary Public School Rating"] = ""
test_df["Elementary Charter School ID"] = elem_char_pred
test_df["Elementary Charter School Name"] = ""
test_df["Elementary Charter School Rating"] = ""
test_df["Public Middle School ID"] = middle_pub_pred
test_df["Public Middle School Name"] = ""
test_df["Public Middle School Rating"] = ""
test_df["Charter Middle School ID"] = middle_char_pred
test_df["Charter Middle School Name"] = ""
test_df["Charter Middle School Rating"] = ""
test_df["Public High School ID"] = high_pub_pred
test_df["Public High School Name"] = ""
test_df["Public High School Rating"] = ""
test_df["Charter High School ID"] = high_char_pred
test_df["Charter High School Name"] = ""
test_df["Charter High School Rating"] = ""

In [24]:
# create lists for school name and TEA grade
school_id = schools_harris["School.ID"].to_list()
school_names = schools_harris["School.Name"].to_list()
school_ratings = schools_harris["School Rating"].to_list()

In [25]:
# loop through lists above to insert data in the main DataFrame
for x in range(0,len(school_names)):
    test_df.loc[test_df[
    "Elementary Public School ID"]==school_id[x],"Elementary Public School Name"]=school_names[x]
    test_df.loc[test_df[
    "Elementary Public School ID"]==school_id[x],"Elementary Public School Rating"]=school_ratings[x]
    
    test_df.loc[test_df[
    "Elementary Charter School ID"]==school_id[x],"Elementary Charter School Name"]=school_names[x]
    test_df.loc[test_df[
    "Elementary Charter School ID"]==school_id[x],"Elementary Charter School Rating"]=school_ratings[x]
    
    test_df.loc[test_df[
    "Public Middle School ID"]==school_id[x],"Public Middle School Name"]=school_names[x]
    test_df.loc[test_df[
    "Public Middle School ID"]==school_id[x],"Public Middle School Rating"]=school_ratings[x]
    
    test_df.loc[test_df[
    "Charter Middle School ID"]==school_id[x],"Charter Middle School Name"]=school_names[x]
    test_df.loc[test_df[
    "Charter Middle School ID"]==school_id[x],"Charter Middle School Rating"]=school_ratings[x]
    
    test_df.loc[test_df[
    "Public High School ID"]==school_id[x],"Public High School Name"]=school_names[x]
    test_df.loc[test_df[
    "Public High School ID"]==school_id[x],"Public High School Rating"]=school_ratings[x]
    
    test_df.loc[test_df[
    "Charter High School ID"]==school_id[x],"Charter High School Name"]=school_names[x]
    test_df.loc[test_df[
    "Charter High School ID"]==school_id[x],"Charter High School Rating"]=school_ratings[x]

In [26]:
# calculate averate of all school ratings per residence
test_df["Average School Rating"] = test_df[["Elementary Public School Rating","Elementary Charter School Rating",
    "Public Middle School Rating","Charter Middle School Rating",
    "Public High School Rating","Charter High School Rating"]].mean(axis=1)

In [27]:
test_df.sample(4)

Unnamed: 0.1,Unnamed: 0,Address,Latitude,Longitude,Flood Description,Flood Zone,Flood Risk,SITE_ADDR_1,ACCOUNT,TAX_YEAR,...,Charter Middle School ID,Charter Middle School Name,Charter Middle School Rating,Public High School ID,Public High School Name,Public High School Rating,Charter High School ID,Charter High School Name,Charter High School Rating,Average School Rating
21750,21750,"2428 ADDISON RD HOUSTON, Texas 77030",29.71201,-95.41622,0.2 PCT ANNUAL CHANCE FLOOD HAZARD,X,2,2428 ADDISON RD,641690040028,2019,...,101871041,A+ UNLIMITED POTENTIAL MIDDLE,75,101912026,DEBAKEY HIGH FOR HEALTH PROF,95,101912323,CHALLENGE EARLY COLLEGE HIGH,95,86.666667
13891,13891,"1316 STANFORD ST HOUSTON, Texas 77019",29.75496,-95.38918,AREA OF MINIMAL FLOOD HAZARD,X,1,1316 STANFORD ST,1314990010002,2019,...,101912371,YOUNG SCHOLARS ACADEMY FOR EXCELLENCE MIDDLE,65,101912322,CARNEGIE VANGUARD HIGH,95,101912348,HOUSTON ACADEMY FOR INTERNATIONAL STUDIES HIGH,95,81.666667
7485,7485,"421 DENNIS ST HOUSTON, Texas 77006",29.74751,-95.37893,AREA OF MINIMAL FLOOD HAZARD,X,1,421 DENNIS ST,132320010001,2019,...,101912371,YOUNG SCHOLARS ACADEMY FOR EXCELLENCE MIDDLE,65,101912322,CARNEGIE VANGUARD HIGH,95,101912348,HOUSTON ACADEMY FOR INTERNATIONAL STUDIES HIGH,95,75.0
22849,22849,"1728 HAWTHORNE ST HOUSTON, Texas 77098",29.74219,-95.40325,AREA OF MINIMAL FLOOD HAZARD,X,1,1728 HAWTHORNE ST,542300000007,2019,...,101912057,LANIER MIDDLE,95,101912322,CARNEGIE VANGUARD HIGH,95,101912348,HOUSTON ACADEMY FOR INTERNATIONAL STUDIES HIGH,95,83.333333


In [28]:
test_df.to_csv("Output/combined_flood_crime_and_school_data.csv")

In [29]:
qc = test_df[["Address","Elementary Public School Name","Elementary Charter School Name","Public Middle School Name",
                       "Charter Middle School Name","Public High School Name","Charter High School Name"]]
qc.sample(10)

Unnamed: 0,Address,Elementary Public School Name,Elementary Charter School Name,Public Middle School Name,Charter Middle School Name,Public High School Name,Charter High School Name
10581,"2113 CROCKER ST HOUSTON, Texas 77006",WHARTON K-8 DUAL LANGUAGE ACADEMY MIDDLE,YOUNG SCHOLARS ACADEMY FOR EXCELLENCE MIDDLE,WHARTON K-8 DUAL LANGUAGE ACADEMY MIDDLE,LANIER MIDDLE,CARNEGIE VANGUARD HIGH,HOUSTON ACADEMY FOR INTERNATIONAL STUDIES HIGH
12331,"2520 PELHAM DR HOUSTON, Texas 77019",RIVER OAKS ELEMENTARY,CROCKETT ELEMENTARY,WILSON MONTESSORI MIDDLE,LANIER MIDDLE,LAMAR HIGH,CHALLENGE EARLY COLLEGE HIGH
21032,"2525 SOUTHGATE BLVD HOUSTON, Texas 77030",ROBERTS ELEMENTARY,HARMONY SCHOOL OF FINE ARTS AND TECHNOLOGY - H...,RICE SCHOOL /LA ESCUELA RICE MIDDLE,A+ UNLIMITED POTENTIAL MIDDLE,DEBAKEY HIGH FOR HEALTH PROF,CHALLENGE EARLY COLLEGE HIGH
1316,"2215 BOLSOVER ST HOUSTON, Texas 77005",ROBERTS ELEMENTARY,HARMONY SCHOOL OF FINE ARTS AND TECHNOLOGY - H...,RICE SCHOOL /LA ESCUELA RICE MIDDLE,A+ UNLIMITED POTENTIAL MIDDLE,DEBAKEY HIGH FOR HEALTH PROF,HOUSTON ACADEMY FOR INTERNATIONAL STUDIES HIGH
4889,"2905 PLUMB ST HOUSTON, Texas 77005",WEST UNIVERSITY ELEMENTARY,HARMONY SCHOOL OF FINE ARTS AND TECHNOLOGY - H...,RICE SCHOOL /LA ESCUELA RICE MIDDLE,LANIER MIDDLE,LAMAR HIGH,CHALLENGE EARLY COLLEGE HIGH
17863,"4022 GLENSHIRE ST HOUSTON, Texas 77025",SHEARN ELEMENTARY,HARMONY SCHOOL OF FINE ARTS AND TECHNOLOGY - H...,PERSHING MIDDLE,HARMONY SCHOOL OF INGENUITY - HOUSTON MIDDLE,BELLAIRE HIGH,HARMONY SCHOOL OF INGENUITY - HOUSTON MIDDLE
12806,"3665 WILLOWICK RD HOUSTON, Texas 77019",RIVER OAKS ELEMENTARY,AMIGOS POR VIDA-FRIENDS FOR LIFE CHARTER S MIDDLE,TANGLEWOOD MIDDLE,LANIER MIDDLE,LAMAR HIGH,CHALLENGE EARLY COLLEGE HIGH
20111,"4102 NORFOLK ST HOUSTON, Texas 77027",WEST UNIVERSITY ELEMENTARY,AMIGOS POR VIDA-FRIENDS FOR LIFE CHARTER S MIDDLE,PIN OAK MIDDLE,AMIGOS POR VIDA-FRIENDS FOR LIFE CHARTER S MIDDLE,LAMAR HIGH,CHALLENGE EARLY COLLEGE HIGH
13388,"1404 ONEIL ST HOUSTON, Texas 77019",GREGORY-LINCOLN EDUCATIONAL CENTER MIDDLE,YOUNG SCHOLARS ACADEMY FOR EXCELLENCE MIDDLE,GREGORY-LINCOLN EDUCATIONAL CENTER MIDDLE,YOUNG SCHOLARS ACADEMY FOR EXCELLENCE MIDDLE,CARNEGIE VANGUARD HIGH,HOUSTON ACADEMY FOR INTERNATIONAL STUDIES HIGH
11111,"2006 FAIRVIEW ST HOUSTON, Texas 77019",WILSON MONTESSORI MIDDLE,YOUNG SCHOLARS ACADEMY FOR EXCELLENCE MIDDLE,WILSON MONTESSORI MIDDLE,LANIER MIDDLE,LAMAR HIGH,HOUSTON ACADEMY FOR INTERNATIONAL STUDIES HIGH
