# Exploring NYCDOH's Violations Dataset

Lucky, a software engineer from Arizona is looking to move to New York! They're looking for an apartment to rent, and in doing so are looking for neighborhoods to avoid. They've asked us to help them with their research, to help narrow down their streeteasy filters, as... well they're a fan of hot water and really hate rodents. 

As a result, we're getting two birds with one stone:

 We get to work on our data wrangling and data visualization skills, and also help out a friend in need! So let's dive in!

In [16]:
import pandas as pd
import hvplot.pandas  
import geopandas as gpd

import plotly.express as px 
from bokeh.layouts import layout

import panel as pn
pn.extension()
import holoviews as hv
hv.extension('bokeh')
import warnings
warnings.filterwarnings('ignore')

In [2]:
#uncomment and run for cleaned CSV file

# import data_cleaning
# closed_violations = data_cleaning.clean('Housing_Maintenance_Code_Violations.csv')
# closed_violations.to_csv('Housing_Maintenance_CV_cleaned.csv')

In [3]:
closed_violations = pd.read_csv('Housing_Maintenance_CV_cleaned.csv')

We'll primarily be manipulating two columns to help us in our analysis: InspectionDate and OriginalCorrectByDate

**InspectionDate** column is when the violation was observed  <br>
**OriginalCorrectByDate** is when the owner was expected to correct the violation by

Out of those two, we'll be creating two new columns:

**ViolationLength**: The length of time from Inspection date to the Date that owner should have corrected their violation <br>
**ViolationYear**: The year in which the Inspection was given

This will help us with our visualizations and plotting things over time

Our first visualization is going to be showing the amount of violations over time! 

#### Length of Violations on Average per Year by Class

In [26]:
violation_length = closed_violations.groupby(['Class','ViolationYear'])['ViolationLength'].mean().reset_index().round(2)
vio_length = list(violation_length.Class.unique())


In [42]:
first_select = pn.widgets.Select(name='Select Class:', value='A', options= vio_length,)

@pn.depends(first_select)
def violation_length_plot(vio_length):
  return violation_length[violation_length['Class']==vio_length].hvplot('ViolationYear','ViolationLength',kind='line',yformatter='%.0f', color="#EF830F", title='Average Violation Lengths by Year (and Class)')

## Class Violations by Borough

In [28]:
violations_by_borough = closed_violations.groupby(['Class','Borough'])['ViolationID'].count().reset_index(name='Count')
class_violations = list(violations_by_borough.Class.unique())


In [41]:
second_select = pn.widgets.Select(name='Select Class:', value='C', options= class_violations)


@pn.depends(second_select)
def violations_by_borough_plot(class_violations):
  return violations_by_borough[violations_by_borough['Class']==class_violations].hvplot('Borough','Count',kind='bar',yformatter='%.0f', color="#ff6f69", title='Violations by Borough (and Class)')

## Class C Violations (Enter Zipcode)

In [51]:
uZip = pn.widgets.TextInput(name="Enter the zipcode", value='11213')

@pn.depends(uZip)
def plotly_vioMap(paramZip):
    postcode_Violations = closed_violations[closed_violations['Postcode'] == paramZip]

    figMapbox = px.scatter_mapbox(
                            postcode_Violations,
                            title ='Violations by Zipcode', 
                            lat=postcode_Violations.Latitude,
                            lon=postcode_Violations.Longitude,
                            color='Class',
                            hover_name="Address",
                            hover_data=['Class'],
                            opacity=1,
                            mapbox_style='carto-positron',
                            zoom=13)

    return figMapbox

In [9]:
class_c_violations = closed_violations[closed_violations['Class']=='C'] 
closed_violations['Address']=closed_violations['HouseNumber']+ ' '+ closed_violations['StreetName']

ntaVIO= class_c_violations.groupby(['NTA'])['BuildingID'].count().reset_index()
ntaVIO.rename(columns={'BuildingID':'Count'}, inplace=True)
ntaShape = gpd.read_file("NTA map.geojson")

In [10]:
# step 1: get the list of all number of violations
vioAMOUNT= ntaVIO[ntaVIO['Count']<= 1000000]

# Step 2: Now groupby this data by neightborhood .sum() to get a count by NTA
vioTotalAmt = vioAMOUNT.groupby(['NTA'])['Count'].sum().reset_index() # resetting to return df

#3 Merge the count by NTA/COUNT dataset and nta SHAPE files on NTA field

vioAmtShape = pd.merge(ntaShape,vioTotalAmt, 
                      how='inner', 
                      left_on='ntaname', right_on='NTA')

#set index of merged DF to zipcode field
vioAmtShape.set_index("NTA", inplace=True)


# Use the choropleth_mapbox and it's attributes to set desired visual properties
figVioPxChoro = px.choropleth_mapbox(vioAmtShape,
                          geojson=vioAmtShape.geometry,
                          locations=vioAmtShape.index,
                          color="Count",
                          color_continuous_scale=px.colors.sequential.Teal, 
                          
                          center={"lat": 40.754932, "lon": -73.984016}, 
                          mapbox_style="carto-positron",
                          zoom=9)

## Class C Violation Totals by Neighborhood

In [11]:
# Add the year slider with ranges set to min and max values of violation counts in the above merged dataset
vioSlider=pn.widgets.IntSlider(name="Violation totals by neighborhood", 
                              start=int(min(ntaVIO['Count'])),
                              end=int(max(ntaVIO['Count'])),
                              value=2000)
#this chart allows the user to slide through violation amounts

@pn.depends(vioSlider)
def plotly_violationSliderChoroMap(paramSlider):
    # step 1: get the list of all number of vacated units
    vioAMOUNT= ntaVIO[ntaVIO['Count']<= paramSlider]

# Step 2: Now groupby this data on zipcode and perform .sum() to get a count by zipcode
    vioTotalAmt = vioAMOUNT.groupby(['NTA'])['Count'].sum().reset_index() # resetting to return df

#3 Merge the count by zipcode/vacate dataset and zip SHAPE files on zipcode field

    vioAmtShape = pd.merge(ntaShape,vioTotalAmt, 
                              how='inner', 
                              left_on='ntaname', right_on='NTA')

#set index of merged DF to zipcode field
    vioAmtShape.set_index("NTA", inplace=True)


# Use the choropleth_mapbox and it's attributes to set desired visual properties
    figPxChoro = px.choropleth_mapbox(vioAmtShape,
                          title='Class C violations by neighborhood',   
                          geojson=vioAmtShape.geometry,
                          locations=vioAmtShape.index,
                          color="Count",
                          color_continuous_scale=px.colors.sequential.Teal, 
                          
                          center={"lat": 40.754932, "lon": -73.984016}, 
                          mapbox_style="carto-positron",
                          zoom=9)

# Return the figure container
    return figPxChoro

## Violation Count by Neighborhood

In [12]:
uNTAcode = pn.widgets.MultiChoice(name='Select Neighborhood:',
                                  value=['Crown Heights North','Flatbush'],
                                  options=list(closed_violations.NTA.unique()),
                                  solid=False,   
                                  max_items= 200)
uNTAcode

#more useful chloro map with options to pick specific neighberhoods

@pn.depends(uNTAcode)
def plotly_violationChoroMap(uSelect):
    
    # step 1: get the list of all number of violations
    vioAMOUNT= ntaVIO[ntaVIO['NTA'].isin(uSelect)]

# Step 2: Now groupby this data on NTA and perform .sum() to get a count by neighborhood
    vioTotalAmt= vioAMOUNT.groupby(['NTA'])['Count'].sum().reset_index() 

#3 Merge the count by nta/count dataset and NTA SHAPE files on NTA field

    vioAmtShape = pd.merge(ntaShape,vioTotalAmt, 
                              how='inner', 
                              left_on='ntaname', right_on='NTA')

#set index of merged DF to NTA field
    vioAmtShape.set_index("NTA", inplace=True)


# Use the choropleth_mapbox and it's attributes to set desired visual properties
    figPxChoro = px.choropleth_mapbox(vioAmtShape,
                          title='Individual Class C violations by Neighborhood',
                          geojson=vioAmtShape.geometry,
                          locations=vioAmtShape.index,
                          color="Count",
                          color_continuous_scale=px.colors.sequential.Teal, 
                          
                          center={"lat": 40.754932, "lon": -73.984016}, 
                          mapbox_style="carto-positron",
                          zoom=9)

# Return the figure container
    return figPxChoro

## Dashboard Code

In [54]:
#shout out holoviz discourse forums and user maximlt

vanilla = pn.template.VanillaTemplate(title='NYC DOH Violations Dataset',header_background='#000')
page = pn.Column(sizing_mode='stretch_width')

#Assigning graphs/widgets to an array
content1 = [
  pn.Row(first_select, margin=0),
  pn.Column(violation_length_plot, margin=0, align="center"),
]
content2 = [
  pn.Column(second_select, margin=0),
  pn.Column(violations_by_borough_plot, margin=0, align="center")
]

content3 = [
  pn.Column(uZip, margin=0),
  pn.Column(plotly_vioMap, margin=0, align="center")
]

content4 = [
  pn.Column(vioSlider, margin=0),
  pn.Column(plotly_violationSliderChoroMap, margin=0, align="center")
]

content5 = [
  pn.Column(uNTAcode, margin=0),
  pn.Column(plotly_violationChoroMap, margin=0, align="center")
]

#Adding sidebar interactivity
link1 = pn.widgets.Button(name='Average Violation Lengths by Year (and Class)')
link2 = pn.widgets.Button(name='Violation Counts by Borough (and Class)')
link3 = pn.widgets.Button(name='Violations by Zipcode')
link4 = pn.widgets.Button(name='Total Class C Violations by Neighborhood')
link5 = pn.widgets.Button(name='Individual Class C Violations by Neighborhood')

vanilla.sidebar.append(link1)
vanilla.sidebar.append(link2)
vanilla.sidebar.append(link3)
vanilla.sidebar.append(link4)
vanilla.sidebar.append(link5)

vanilla.main.append(page)

#Loading events on Click
def load_content1(event):
  vanilla.main[0].objects = content1
def load_content2(event):
  vanilla.main[0].objects = content2
def load_content3(event):
  vanilla.main[0].objects = content3
def load_content4(event):
  vanilla.main[0].objects = content4
def load_content5(event):
  vanilla.main[0].objects = content5

link1.on_click(load_content1)
link2.on_click(load_content2)
link3.on_click(load_content3)
link4.on_click(load_content4)
link5.on_click(load_content5)


vanilla.show()

Launching server at http://localhost:43931


<panel.io.server.Server at 0x7f1e09168f40>