# Process Dataset With School Ratings
### Data Source: https://texasschoolguide.org/school-rankings/

In [1]:
import pandas as pd
import gmaps
import requests
import json
from config import gkey

In [2]:
# configure the google maps key for api requests
gmaps.configure(api_key=gkey)

In [3]:
# read csv file
file = pd.read_csv("Resources/tsg_final_11.17_From_Children_at_Risk.csv")

# make the file into a DataFrame
schools = pd.DataFrame(file)

In [4]:
# drop rows without school rating or school name
schools.dropna(how='any',subset=['TEA Grade','School.Name'], inplace=True)

In [5]:
# filter the data to show only schools in Harris County, TX
schools_harris = schools.loc[schools["County"]=="HARRIS",:]

In [6]:
# filter the data to remove school type listed as unavailable
schools_harris = schools_harris.loc[schools['School.Type']!="Unavailable"]
schools_harris.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 972 entries, 23 to 9643
Data columns (total 51 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   School.ID                 972 non-null    int64  
 1   School.Name               972 non-null    object 
 2   Street.Address            972 non-null    object 
 3   CITY                      972 non-null    object 
 4   ZIP                       972 non-null    float64
 5   County                    972 non-null    object 
 6   District.Name             972 non-null    object 
 7   School.Type               972 non-null    object 
 8   Charter.Trad              972 non-null    object 
 9   Grade.Current             972 non-null    object 
 10  TEA Grade                 972 non-null    object 
 11  GoldRibbon                68 non-null     float64
 12  PEG                       972 non-null    object 
 13  County_Ranking            972 non-null    object 
 14  Asian.pc

In [7]:
# locate rows with missing coordinates
missing_info = schools_harris.loc[schools['latitude'].isna()]
missing_info

Unnamed: 0,School.ID,School.Name,Street.Address,CITY,ZIP,County,District.Name,School.Type,Charter.Trad,Grade.Current,...,Math.Current-2,Math.Change,Student.Growth.Reading,Student.Growth.Math,School.Program,Community.Resources,longitude,latitude,Student.Teacher.Ratio,Grade.Range
4848,227820213,KIPP PEACE ELEMENTARY,5400 MLK JR BLVD,HOUSTON,77021.0,HARRIS,KIPP TEXAS PUBLIC SCHOOLS,Elementary,Charter,C,...,,,HIGH,HIGH,,,,,,PK—04
4854,227820060,KIPP PRIME COLLEGE PREPARATORY MIDDLE,8805 FERNDALE,HOUSTON,77017.0,HARRIS,KIPP TEXAS PUBLIC SCHOOLS,Middle,Charter,C,...,,,LOW,LOW,,,,,,05—07


In [8]:
# geolocate schools without lat/lng information
base_url = "https://maps.googleapis.com/maps/api/geocode/json?"
addresses = missing_info["Street.Address"].to_list()
params = {"key": gkey}
google_info = []
g_lat = []
g_lng = []
for address in addresses:
    params['address'] = address
    response = requests.get(base_url,params=params).json()
    google_info.append(response)
    lat = response['results'][0]['geometry']['location']['lat']
    g_lat.append(lat)
    lng = response['results'][0]['geometry']['location']['lng']
    g_lng.append(lng)

In [13]:
# add the coordinates to the schools DataFrame
for x in range(0,len(addresses)):    
    schools_harris.loc[schools_harris['Street.Address'] == addresses[x], ['latitude']] = g_lat[x]
    schools_harris.loc[schools_harris['Street.Address'] == addresses[x], ['longitude']] = g_lng[x]
    
schools_harris.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 972 entries, 23 to 9643
Data columns (total 51 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   School.ID                 972 non-null    int64  
 1   School.Name               972 non-null    object 
 2   Street.Address            972 non-null    object 
 3   CITY                      972 non-null    object 
 4   ZIP                       972 non-null    float64
 5   County                    972 non-null    object 
 6   District.Name             972 non-null    object 
 7   School.Type               972 non-null    object 
 8   Charter.Trad              972 non-null    object 
 9   Grade.Current             972 non-null    object 
 10  TEA Grade                 972 non-null    object 
 11  GoldRibbon                68 non-null     float64
 12  PEG                       972 non-null    object 
 13  County_Ranking            972 non-null    object 
 14  Asian.pc

In [14]:
# export the csv file
schools_harris.to_csv("Output/Schools_Ratings_Harris_Co.csv")