[Data description](http://www.opendatanetwork.com/dataset/data.cityofnewyork.us/9w7m-hzhe) 

Articles

* [Lower fines](http://www.nydailynews.com/new-york/restaurants-cheer-plan-slash-penalties-article-1.1430407) (nice window photo)
* [Grading pooh-pooh](http://www.nydailynews.com/opinion/city-council-throws-rotten-tomatoes-restaurant-grades-article-1.1036545) (grading placards)

# To do

1. Hall of shame (high scorers)
2. Grade not posted

Other post(?):

1. Violations '7' (obstruction) and 
    1. 10A (no toilet paper)

# Requirements

In [1]:
from __future__ import division, print_function
import pandas as pd
import numpy as np
import bokeh as bk
import pickle

import my_functions as mf

from bokeh.plotting import figure, output_notebook, show, output_file
from bokeh.io import push_notebook, reset_output
from bokeh.models import ColumnDataSource, HoverTool, Span, Range1d
from bokeh.embed import components

In [2]:
output_notebook()

# Load data

In [3]:
df15 = pd.read_csv('inspected_2015.csv',
                   parse_dates=['INSPECTION DATE','GRADE DATE'])
original_file = 'DOHMH_New_York_City_Restaurant_Inspection_Results_new.csv'

In [4]:
with open('dicts.pickle','rb') as f:
    dict_action, dict_violation = pickle.load(f)

In [17]:
days_of_week = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']

# Inpections by day of week

In [5]:
day_2015 = df15.groupby('INSPECTION DATE')['CAMIS'].nunique()  # Count unique inspections

In [7]:
day_2015 = day_2015.reindex(pd.date_range('2015-01-01','2015-12-31')) # Add days w/ no inspections
day_2015.fillna(0,inplace=True)
day_2015 = day_2015.apply(int)

In [8]:
holidays = pd.to_datetime(['2015-01-01','2015-01-19','2015-02-16','2015-05-25','2015-07-04',
            '2015-09-07','2015-10-12','2015-11-03','2015-11-26','2015-12-25'])
# New Year, MLK, President's, Memorial, ID4, 
# Labor, Columbus, Election, Thanksgiving, Xmas
day_2015.drop(holidays,inplace=True)

In [9]:
day_2015 = day_2015.to_frame('INSPECTIONS')
day_2015['DAY'] = day_2015.index.dayofweek
day_2015.index.names = ['DATE']

In [12]:
reset_output()
p = figure(title='No. of NYC restaurant inspections by day (2015)',
           x_axis_label='*NY state holidays are excluded.',
          x_range=['Mon','Tue','Wed','Thu','Fri','Sat','Sun'],
          y_range=Range1d(0,320))
hover = HoverTool(tooltips='@DATE')
p.add_tools(hover)
p.title_text_font_size = '14pt'
p.xaxis.axis_label_text_font_size = '10pt'

source = ColumnDataSource(day_2015)
source.add(day_2015.index.strftime('%m/%d/%y'), name="DATE")

# freq = day_2015.groupby('DAY')['INSPECTIONS'].value_counts() # Size for scatter plot
# no longer used

p.circle(day_2015['DAY']+1,'INSPECTIONS',
         # size = day_2015.apply(lambda x: freq[x['DAY'],x['INSPECTIONS']], axis=1) *2.3, 
         size = 6, alpha=0.5, line_color=None,
         source = source,
         legend = 'Date(s)') # +1 b/c range starts at 0

mean = day_2015.groupby("DAY")['INSPECTIONS'].mean()
std = day_2015.groupby("DAY")['INSPECTIONS'].std()
source2 = ColumnDataSource({})
source2.add(mean.values,name="DATE") 

err_x = []
err_y = []

for x, y, yerr in zip(mean.index+1,mean.values,std.values):
    err_x.append((x,x))
    err_y.append((y-yerr,y+yerr))

s_color = 'coral'    
s_width = 1.5
p.rect(mean.index+1, mean/2, width=.3, height=mean,
      fill_color=None, line_color=s_color, line_width=s_width,
      legend='Mean',
       source=source2)
p.multi_line(err_x,err_y,
             color=s_color,line_width=s_width,
             legend='Stdv')

p.xgrid.grid_line_color = None
p.xaxis.major_tick_line_color = None
# p.legend.location('top_right')

output_file('images/day_inspection.html')
show(p)

# Score by day

In [13]:
score_day = df15.groupby(['INSPECTION DATE','CAMIS'])['SCORE'].max().mean(level=0)

In [15]:
day_2015 = pd.concat([day_2015, score_day], axis=1)

In [18]:
reset_output()
from bokeh.models import Span

p = figure(title = 'NYC inspection score by day',
           x_range=days_of_week, y_range=Range1d(0,25))
p.title_text_font_size = '14pt'
# p.xaxis.axis_label_text_font_size = '10pt'

hover = HoverTool(tooltips='@DATE')
p.add_tools(hover)

source = ColumnDataSource(day_2015)
source.add(day_2015.index.strftime('%m/%d/%y'), name="DATE")

# A-B line
AB_threshold = Span(location=13.5, dimension='width',
                   line_color='#BBBBBB', line_dash='dashed')
p.renderers.extend([AB_threshold])

#def mtext(x,y,color):
#    p.text(0.8,12, text='[A]', text_font_style='bold', text_font_size,  text_color='royalblue')

x_pos = 7.3
f_size = '11pt'
p.text(x_pos,12.5, text=['A'], text_font_style='bold', text_font_size=f_size,  text_color='royalblue')
p.text(x_pos,13.7, text=['B'], text_font_style='bold', text_font_size=f_size, text_color='forestgreen')

c_alpha = 0.5
p.circle(day_2015['DAY']+1,'SCORE',
         # line_alpha=c_alpha,
         fill_alpha=c_alpha,
         line_width=1,
         line_color=None,
         size = 6,
         legend = 'Date(s)',
         source = source)

mean = day_2015.groupby("DAY")['SCORE'].mean()
std = day_2015.groupby("DAY")['SCORE'].std()
source2 = ColumnDataSource({})
source2.add(mean.values,name="DATE") 

err_x = []
err_y = []

for x, y, yerr in zip(mean.index+1,mean.values,std.values):
    err_x.append((x,x))
    err_y.append((y-yerr,y+yerr))

s_color = 'coral'    
s_width = 1.5
p.rect(mean.index+1, mean/2, width=.3, height=mean,
      fill_color=None, line_color=s_color, line_width=s_width,
      legend='Mean',
       source=source2)
p.multi_line(err_x,err_y,
             color=s_color,line_width=s_width,
             legend='Stdv')

p.xgrid.grid_line_color = None
p.xaxis.major_tick_line_color = None

output_file('images/score_day.html')
show(p)

# Sandbox

In [130]:
dfx.columns = ['A','B','C']

In [128]:
d = {'a':[1,1,2,3,1],
     'b':[1,2,2,1,2],
    'c':['x','y','z','y','x']}
dfx = pd.DataFrame(d)

In [197]:
q.fillna(0)

0    1
1    1
2    2
3    3
4    1
Name: A, dtype: int64

In [177]:
q = dfx['A']

In [192]:
x = range(2)
x[0] = q

In [189]:
a = dfx['A']
b = dfx['B']
Q = [a,b]

In [29]:
a = dfx.groupby(['a','b'])['c'].unique()

In [109]:
list1 = ['1', '2', '3']
str1 = ''.join(list1)

In [118]:
x.data

{'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9], 'index': [0, 1, 2]}

In [114]:
df3 = pd.DataFrame({'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]})
# df3.where(lambda x: x > 4, lambda x: x + 10)

In [371]:
d = {'a':[1,1,2,3,1],
     'b':[1,2,2,1,2],
     'c':['x','x','x','x','x']}
df1 = pd.DataFrame(d)
df1.groupby(['a','b']).count() 

Unnamed: 0_level_0,Unnamed: 1_level_0,c
a,b,Unnamed: 2_level_1
1,1,1
1,2,2
2,2,1
3,1,1


In [376]:
df2 = df1.drop('c',1)
q = df2.groupby(['a'])['b'].value_counts() 

In [377]:
type(q)

pandas.core.series.Series