In [2]:
# Data 205 - Capstone in Data Science
# Jennifer Paraboschi     Fall 2020
# Inputs: data sets from dataMontgomery
# including Alcohol Beverage Licensing Violations
####


In [3]:
# Import packages. 
# Pull in API data set as json.
import json
import numpy as np
import pandas as pd
import re
import requests
import seaborn as sns
import matplotlib as plt
violations_data = pd.DataFrame(requests.get("https://data.montgomerycountymd.gov/resource/4tja-rkhg.json").json())
violations_data.head()

Unnamed: 0,facilityname,address,violationdate,violation,disposition,dispositiondate
0,CASPIAN HOUSE OF KABOB,"72 MARKET ST , GAITHERSBURG , MARYLAND - 20878",2020-08-05T00:00:00.000,5.4 ALCOHOL AWARENESS CERTIFICATION,$100 +ALERT,2020-10-10T00:00:00.000
1,CASPIAN HOUSE OF KABOB,"72 MARKET ST , GAITHERSBURG , MARYLAND - 20878",2020-08-05T00:00:00.000,5.5 RECORDS,$500 +ALERT,2020-10-10T00:00:00.000
2,VILLAGE WEST LIQUOR,"19520 WATERS ROAD , GERMANTOWN , MARYLAND - 20876",2020-05-12T00:00:00.000,5.5 RECORDS,$500 +ALERT,2020-10-10T00:00:00.000
3,Q' VIVA COCINA & LOUNGE,"2322 UNIVERSITY BLVD. , SILVER SPRING , MARYLA...",2020-03-28T00:00:00.000,"6.4 CONSUMPTION, POSSESSION, OR SALE DURING PR...",$1000 +ALERT,2020-09-24T00:00:00.000
4,FINNEGAN'S WAKE IRISH PUB,"100 GIBBS ST , ROCKVILLE , MD - 20850",2020-03-20T00:00:00.000,"6.4 CONSUMPTION, POSSESSION, OR SALE DURING PR...",$1000 +ALERT,2020-11-05T00:00:00.000


In [4]:
print(violations_data)

                  facilityname  \
0       CASPIAN HOUSE OF KABOB   
1       CASPIAN HOUSE OF KABOB   
2          VILLAGE WEST LIQUOR   
3      Q' VIVA COCINA & LOUNGE   
4    FINNEGAN'S WAKE IRISH PUB   
..                         ...   
932               JOSE'S GRILL   
933               JOSE'S GRILL   
934         SEVEN-ELEVEN STORE   
935         SEVEN-ELEVEN STORE   
936         SEVEN-ELEVEN STORE   

                                               address  \
0       72 MARKET ST , GAITHERSBURG , MARYLAND - 20878   
1       72 MARKET ST , GAITHERSBURG , MARYLAND - 20878   
2    19520 WATERS ROAD , GERMANTOWN , MARYLAND - 20876   
3    2322 UNIVERSITY BLVD. , SILVER SPRING , MARYLA...   
4                100 GIBBS ST , ROCKVILLE , MD - 20850   
..                                                 ...   
932             11423 GEORGIA AVE. , WHEATON, MD 20902   
933             11423 GEORGIA AVE. , WHEATON, MD 20902   
934  14101 GEORGIA AVE. , ASPEN HILL , MARYLAND - 2...   
935       1

In [5]:
violations_data.shape

(937, 6)

In [6]:
type(violations_data)

pandas.core.frame.DataFrame

In [7]:
# There are 937 rows and 6 variables/columns.

In [8]:
violations_data.dtypes

facilityname       object
address            object
violationdate      object
violation          object
disposition        object
dispositiondate    object
dtype: object

In [9]:
# Python pulled the variables in as objects.

In [10]:
violations_data.describe(include="all")

Unnamed: 0,facilityname,address,violationdate,violation,disposition,dispositiondate
count,937,937,937,937,935,934
unique,544,579,337,38,144,405
top,GAITHERSBURG SUPERMARKET,"220 EAST DIAMOND AVENUE , GAITHERSBURG, MD 20877",2018-08-17T00:00:00.000,SALE TO MINOR,SETTLED; $1000.00 + ALERT,2020-09-11T00:00:00.000
freq,11,11,16,381,96,18


In [11]:
# There are a few missing values for disposition and disposition date (935 and 934 versus 937 in total).
# The most frequent violation is for sale to minor.

In [12]:
violations_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 937 entries, 0 to 936
Data columns (total 6 columns):
facilityname       937 non-null object
address            937 non-null object
violationdate      937 non-null object
violation          937 non-null object
disposition        935 non-null object
dispositiondate    934 non-null object
dtypes: object(6)
memory usage: 44.0+ KB


In [13]:
violations_data.axes

[RangeIndex(start=0, stop=937, step=1),
 Index(['facilityname', 'address', 'violationdate', 'violation', 'disposition',
        'dispositiondate'],
       dtype='object')]

In [14]:
# Replace missing disposition values with "No Violation".
violations_data["disposition"]=violations_data["disposition"].fillna(value="No Violation", inplace=True)

In [None]:
# Note To Self: How do I verify this step? (QA)

In [15]:
# The disposition var inconsistently contains a $ amount of the penalty with inconsistent formatting. 
# Come back to cleaning the var "disposition". 

In [16]:
# Import the public high schools data set.
schools_data = pd.DataFrame(requests.get("https://data.montgomerycountymd.gov/resource/772q-4wm8.json").json())
schools_data.head()

Unnamed: 0,category,school_name,address,city,zip_code,phone,url,longitude,latitude,location,:@computed_region_vu5j_pcmz,:@computed_region_tx5f_5em3,:@computed_region_kbsp_ykn9,:@computed_region_d7bw_bq6x,:@computed_region_rbt8_3x7n
0,HIGH SCHOOLS,Walter Johnson HS,6400 Rock Spring Dr,Bethesda,20814,301-803-7100,http://www.montgomeryschoolsmd.org/schools/wjhs,-77.1301017226,39.0253918855,"{'latitude': '39.0253918855', 'longitude': '-7...",1,1,19,103,1
1,HIGH SCHOOLS,Bethesda-Chevy Chase HS,4301 East West Hwy,Bethesda,20814,240-497-6300,http://www.montgomeryschoolsmd.org/schools/bcchs,-77.0889699717,38.9868264765,"{'latitude': '38.9868264765', 'longitude': '-7...",1,1,12,111,1
2,ELEMENTARY SCHOOLS,Bethesda ES,7600 Arlington Rd,Bethesda,20814,301-657-4979,http://www.montgomeryschoolsmd.org/schools/bet...,-77.0998645245,38.9864595389,"{'latitude': '38.9864595389', 'longitude': '-7...",1,1,12,111,1
3,ELEMENTARY SCHOOLS,Garrett Park ES (at Grosvenor Center),5701 Grosvenor La,Bethesda,20814,301-929-2170,http://www.montgomeryschoolsmd.org/schools/gar...,-77.1141310594,39.0235338133,"{'latitude': '39.0235338133', 'longitude': '-7...",1,1,19,111,1
4,ELEMENTARY SCHOOLS,Somerset ES,5811 Warwick Pl,Chevy Chase,20815,301-657-4985,http://www.montgomeryschoolsmd.org/schools/som...,-77.092408162,38.9691922144,"{'latitude': '38.9691922144', 'longitude': '-7...",1,1,5,110,7


In [17]:
# Drop the unnecessary columns (i.e., category, elementary/middle schools, phone and url).

In [18]:
high_schools=schools_data[schools_data["category"] == "HIGH SCHOOLS"]
cols_drop=["category","phone","url"]
high_schools.drop(cols_drop, inplace=True, axis=1)
print(high_schools)

                 school_name                    address           city  \
0          Walter Johnson HS        6400 Rock Spring Dr       Bethesda   
1    Bethesda-Chevy Chase HS         4301 East West Hwy       Bethesda   
20           Walt Whitman HS          7100 Whittier Blv       Bethesda   
28            Poolesville HS           17501 Willard Rd    Poolesville   
37       Thomas S Wootton HS           2100 Wootton Pkw      Rockville   
40              Rockville HS          2100 Baltimore Rd      Rockville   
46     Richard Montgomery HS  250 Richard Montgomery Dr      Rockville   
60      Winston Churchill HS      11300 Gainsborough Rd        Potomac   
66     Col Zadok Magruder HS     5939 Muncaster Mill Rd      Rockville   
72               Sherwood HS  300 Olney Sandy Spring Rd   Sandy Spring   
75           Paint Branch HS     14121 Old Columbia Pik   Burtonsville   
76             Clarksburg HS              22500 Wims Rd     Clarksburg   
81               Damascus HS          

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [19]:
# Map the high school locations.

In [38]:
# Import the plotly express

import plotly.express as px

fig_schools = px.scatter_geo(high_schools, 
                     lon="longitude", 
                     lat="latitude",
                     # choose the map chart's projection
                     projection="albers usa",
                     center=dict(lon=-77.14, lat=39.098),
                     # columns which is in bold in the pop up
                     hover_name = "school_name",
                     # format of the popup not to display these columns' data
                     hover_data = {"longitude": False, "latitude": False})
fig_schools.show()



In [None]:
# Note To Self: having trouble zooming in on this map. I decided to use a different map (below) that includes streets.

In [36]:
# Follow instructions from here: https://plotly.com/python/mapbox-layers/#openstreetmap-tiles-no-token-needed
high_schools['latitude']=high_schools['latitude'].astype(float)
high_schools['longitude']=high_schools['longitude'].astype(float)

fig2 = px.scatter_mapbox(high_schools, lat="latitude", lon="longitude", hover_name="school_name", zoom=9, 
                         hover_data={"latitude":False, "longitude":False})
fig2.update_layout(mapbox_style="open-street-map")
fig2.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig2.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
# Note To Self: I don't know what to do about the warnings (above)
    # A value is trying to be set on a copy of a slice from a DataFrame.
    # Try using .loc[row_indexer,col_indexer] = value instead

In [None]:
# Note To Self: While this map is OK, I can't decide what would make it better. I was not able to make the points bigger.
# I tried zooming in closer but then not all of the high schools show up.
# I tried to get the city to display as well as the HS name but was not able to get this to work.

In [None]:
# Note To Self: I was able to clean the addresses in excel, upload to geocoio, then use that csv to map. 
# However, I want to be able to do this directly from the dataMontgomery API. 
# I tried again for the geolocation using geopy (below) following these instructions from towardsdatascience.com.
#  https://towardsdatascience.com/pythons-geocoding-convert-a-list-of-addresses-into-a-map-f522ef513fd6


In [39]:
# Import the violations data set
violations_data = pd.DataFrame(requests.get("https://data.montgomerycountymd.gov/resource/4tja-rkhg.json").json())
violations_data.head()

Unnamed: 0,facilityname,address,violationdate,violation,disposition,dispositiondate
0,CASPIAN HOUSE OF KABOB,"72 MARKET ST , GAITHERSBURG , MARYLAND - 20878",2020-08-05T00:00:00.000,5.4 ALCOHOL AWARENESS CERTIFICATION,$100 +ALERT,2020-10-10T00:00:00.000
1,CASPIAN HOUSE OF KABOB,"72 MARKET ST , GAITHERSBURG , MARYLAND - 20878",2020-08-05T00:00:00.000,5.5 RECORDS,$500 +ALERT,2020-10-10T00:00:00.000
2,VILLAGE WEST LIQUOR,"19520 WATERS ROAD , GERMANTOWN , MARYLAND - 20876",2020-05-12T00:00:00.000,5.5 RECORDS,$500 +ALERT,2020-10-10T00:00:00.000
3,Q' VIVA COCINA & LOUNGE,"2322 UNIVERSITY BLVD. , SILVER SPRING , MARYLA...",2020-03-28T00:00:00.000,"6.4 CONSUMPTION, POSSESSION, OR SALE DURING PR...",$1000 +ALERT,2020-09-24T00:00:00.000
4,FINNEGAN'S WAKE IRISH PUB,"100 GIBBS ST , ROCKVILLE , MD - 20850",2020-03-20T00:00:00.000,"6.4 CONSUMPTION, POSSESSION, OR SALE DURING PR...",$1000 +ALERT,2020-11-05T00:00:00.000


In [43]:
# I had to pip install geopy on the cmd line to get this to work.
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="sample app")
# Apply geolocator.geocode to the address column
violations_data["location"]=violations_data["address"].apply(geolocator.geocode)

In [44]:
violations_data["point"]=violations_data["location"].apply(lambda loc: tuple(loc.point) if loc else None)

In [None]:
# Then pull the values into 3 diff vars (the 3rd one is altitude)

In [47]:
violations_data[["latitude", "longitude", "altitude"]] = 
pd.DataFrame(violations_data["point"].to_list(), index=violations_data.index)

In [48]:
# Map the locations of the violations
fig3 = px.scatter_mapbox(violations_data, lat="latitude", lon="longitude", hover_name="facilityname", zoom=9, 
                         hover_data={"latitude":False, "longitude":False})
fig3.update_layout(mapbox_style="open-street-map")
fig3.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig3.show()
