In [3]:
# Dependencies
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import gmaps
import requests
import json
import time

# read in crime csv file
crime_df = pd.read_csv("Resources/2013-2014_MN_Crime_OUTPUT_FILE.csv")

# API key
gkey = ''
gmaps.configure(api_key=gkey)


In [4]:
# Google Maps API
base_url = "https://maps.googleapis.com/maps/api/geocode/json"

# set up place holders for lats and lngs, target_city
lats = []
lngs = []
target_city = ''

# loop through cities, get lats and longs
for city in crime_df.Agency:
    
    target_city = city
    
    params = {'address': f"{target_city}, MN", 'key': gkey}

    print(f"Looking up city: {city}")
    try:
        # lookup city
        response = requests.get(base_url, params=params)

        # Convert to JSON
        geodata = response.json()

        # Extract lat/lng
        lats.append(geodata["results"][0]["geometry"]["location"]["lat"])
        lngs.append(geodata["results"][0]["geometry"]["location"]["lng"])
        
        print("Data retrieved")
    except:

        print("------- DATA NOT FOUND -----------")

Looking up city: Albert Lea
Data retrieved
Looking up city: Alexandria
Data retrieved
Looking up city: Anoka
Data retrieved
Looking up city: Apple Valley
Data retrieved
Looking up city: Austin
Data retrieved
Looking up city: Bemidji
Data retrieved
Looking up city: Big Lake
Data retrieved
Looking up city: Blaine
Data retrieved
Looking up city: Bloomington
Data retrieved
Looking up city: Brainerd
Data retrieved
Looking up city: Brooklyn Center
Data retrieved
Looking up city: Brooklyn Park
Data retrieved
Looking up city: Buffalo
Data retrieved
Looking up city: Burnsville
Data retrieved
Looking up city: Circle Pines-Lexington
Data retrieved
Looking up city: Champlin
Data retrieved
Looking up city: Chaska
Data retrieved
Looking up city: Cloquet
Data retrieved
Looking up city: Columbia Heights
Data retrieved
Looking up city: Coon Rapids
Data retrieved
Looking up city: Cottage Grove
Data retrieved
Looking up city: Crystal
Data retrieved
Looking up city: Duluth
Data retrieved
Looking up city: 

In [117]:
# add geo-coords to dataframe
crime_df['Lat'] = lats
crime_df['Lng'] = lngs

crime_rates = crime_df[['Agency', 'Avg_VC', 'Avg_PC', 'Avg_Crime_Rate', 'Lat', 'Lng']]

crime_rates.rename(columns={'Agency': 'City',
                           'Avg_VC': 'Violent_Crime_Rate',
                           'Avg_PC': 'Property_Crime_Rate',
                           'Avg_Crime_Rate': 'Overall_Crime_Rate',
                           'Lat': 'Latitude',
                           'Lng': 'Longitude'},
                  inplace=True)

crime_rates.to_csv("Resources/CleanCrimeRatesWith_Lat_Lngs.csv", index=False)

ValueError: Length of values does not match length of index

In [159]:
crime = pd.read_csv("Resources/CleanCrimeRatesWith_Lat_Lngs.csv")

rates_locations = crime[['Latitude', 'Longitude']]
violent_crime = crime['Violent_Crime_Rate'].astype(float)

In [163]:
# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(rates_locations, weights= violent_crime, 
                                 dissipating=False, max_intensity=violent_crime.max()*.8,
                                 point_radius = 0.3)


fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [77]:
# Read in school data
school_data = pd.read_excel("Resources/CleanedGradFile2018.xlsx")
# Drop extra index row
school_data.drop(['Unnamed: 0'], axis=1, inplace=True)

school_data.drop_duplicates(inplace=True)
school_data.dropna(how='any', inplace=True)

In [78]:
school_data.count()

District Number            634
School Number              634
School Name                634
Demographic Description    634
Ending Status              634
Four Year Percent          634
Four Year Total            634
Physical Line 1            634
Physical City              634
Physical State             634
Physical Zip               634
dtype: int64

In [79]:
# Google Maps API
base_url = "https://maps.googleapis.com/maps/api/geocode/json"

# set up place holders for lats and lngs, target_city
lats = []
lngs = []
address = ''
city = ''
zipcode = ''

print("Beginning data retrieval....\n" \
     ".................................")

# loop through cities, get lats and longs
for index, row in school_data.iterrows():
    
    city = row['Physical City']
    address = row['Physical Line 1']
    zipcode = row['Physical Zip']
    
    params = {'address': f"{address}, {city}, MN {zipcode}", 'key': gkey}

    print(f"Looking up school #{index + 1} of {school_data['School Name'].count()}: {row['School Name']}")
    try:
        # lookup city
        response = requests.get(base_url, params=params)

        # Convert to JSON
        geodata = response.json()

        # Extract lat/lng
        lats.append(geodata["results"][0]["geometry"]["location"]["lat"])
        lngs.append(geodata["results"][0]["geometry"]["location"]["lng"])
        
        print("Data retrieved")
    except:
        # append NaN to lists
        lats.append('NaN')
        lngs.append('NaN')
          
        print("------- DATA NOT FOUND -----------")
        
    time.sleep(.33)
          
print("--- Data Retrieval Complete! ---")

Beginning data retrieval....
.................................
Looking up school #1 of 634: AITKIN SECONDARY SCHOOL
Data retrieved
Looking up school #2 of 634: BASIC SKILLS-4
Data retrieved
Looking up school #3 of 634: 800 W BROADWAY
Data retrieved
Looking up school #4 of 634: FAIR SENIOR HIGH
Data retrieved
Looking up school #5 of 634: MERC
Data retrieved
Looking up school #6 of 634: LORING-NICOLLET HIGH
Data retrieved
Looking up school #7 of 634: EDISON SENIOR HIGH
Data retrieved
Looking up school #10 of 634: LONGFELLOW ALTERNATIVE
Data retrieved
Looking up school #11 of 634: HENRY SENIOR HIGH
Data retrieved
Looking up school #14 of 634: PLYMOUTH YOUTH CENTER
Data retrieved
Looking up school #15 of 634: ROOSEVELT SENIOR HIGH
Data retrieved
Looking up school #19 of 634: SOUTH SENIOR HIGH
Data retrieved
Looking up school #23 of 634: WELLSTONE INTERNATIONAL HIGH
Data retrieved
Looking up school #24 of 634: SOUTHWEST SENIOR HIGH
Data retrieved
Looking up school #28 of 634: WASHBURN SENIO

In [81]:
school_data['Latitude'] = lats
school_data['Longitude'] = lngs

school_data.head()

Unnamed: 0,District Number,School Number,School Name,Demographic Description,Ending Status,Four Year Percent,Four Year Total,Physical Line 1,Physical City,Physical State,Physical Zip,Latitude,Longitude
0,1,1,AITKIN SECONDARY SCHOOL,All Students,Graduate,90.8,87.0,306 2nd St NW,Aitkin,MN,56431,46.533666,-93.711217
1,1,307,BASIC SKILLS-4,All Students,Graduate,77.36,106.0,1250 W Broadway Ave,Minneapolis,MN,55411,44.999864,-93.296567
2,1,346,800 W BROADWAY,All Students,Graduate,3.57,28.0,1250 West Broadway,Minneapolis,MN,55411,44.999864,-93.296567
3,1,347,FAIR SENIOR HIGH,All Students,Graduate,79.37,63.0,10 South 10th Street,Minneapolis,MN,55403,44.972454,-93.271116
4,1,348,MERC,All Students,Graduate,3.7,27.0,2539 Pleasant Ave,Minneapolis,MN,55404-4213,44.95597,-93.282631


In [100]:
# remove schools within schools and ALCs

clean_school = school_data[(school_data['School Name'].str.contains(' - SWS') == False) &
                          (school_data['School Name'].str.contains(' ALC') == False)]
clean_school.count()

District Number            614
School Number              614
School Name                614
Demographic Description    614
Ending Status              614
Four Year Percent          614
Four Year Total            614
Physical Line 1            614
Physical City              614
Physical State             614
Physical Zip               614
Latitude                   614
Longitude                  614
dtype: int64

In [134]:
school_by_city = clean_school.groupby('Physical City').median()
school_by_city.head()

Unnamed: 0_level_0,District Number,School Number,Four Year Percent,Four Year Total,Latitude,Longitude
Physical City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ada,2854.0,30.0,95.12,41.0,47.29878,-96.527396
Adams,500.0,40.0,97.92,48.0,43.567879,-92.71604
Adrian,511.0,20.0,94.87,39.0,43.63254,-95.936358
Aitkin,1.0,1.0,90.8,87.0,46.533666,-93.711217
Albany,745.0,60.0,96.95,131.0,45.631843,-94.563729


In [168]:
locations = school_by_city[['Latitude', 'Longitude']]
grad_percent = school_by_city['Four Year Percent']

latitudes = school_by_city['Latitude']
longitudes = school_by_city['Longitude']

grad_data = zip(latitudes, longitudes, grad_percent)

# Create a Heatmap layer
fig = gmaps.Map()

heat_layer = gmaps.heatmap_layer(locations, weights= grad_percent, 
                                 dissipating=False, max_intensity=grad_percent.max()*2,
                                 point_radius = 0.3)


fig.add_layer(heat_layer)

fig

Map(configuration={'api_key': 'AIzaSyBWHQdnW9ZeMfnJa55MMzCfztvQu3rRioQ'}, data_bounds=[(42.821592335498444, -9…

In [116]:
locations = clean_school[['Latitude', 'Longitude']]
grad_percent = clean_school['Four Year Percent']

latitudes = clean_school['Latitude']
longitudes = clean_school['Longitude']

grad_data = zip(latitudes, longitudes, grad_percent)

# Create a Heatmap layer
fig = gmaps.Map()

heat_layer = gmaps.heatmap_layer(locations, weights= grad_percent, 
                                 dissipating=False, max_intensity=0.1,
                                 point_radius = 0.1)


fig.add_layer(heat_layer)

fig

Map(configuration={'api_key': 'AIzaSyBWHQdnW9ZeMfnJa55MMzCfztvQu3rRioQ'}, data_bounds=[(43.017966379834135, -9…

In [157]:
crime_2014 = pd.read_csv("Resources/2014_MN_Crime_Rates_OUTPUT_FILE.csv")
crime_2014.dropna(inplace=True)

violent_crime = crime_2014['VC_Total'].astype(float)

crime_locations = crime[['Latitude', 'Longitude']]

# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(crime_locations, weights= violent_crime, 
                                 dissipating=False, max_intensity=violent_crime.max()/4,
                                 point_radius = 0.4)


fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [145]:
crime_2014.count()

Agency         91
Population     91
VC_Total       91
VC_Rate        91
PC_Total       91
PC_Rate        91
Crime_Total    91
Crime_Rate     91
dtype: int64

In [154]:
violent_crime.max()

4093.0