In [None]:
# Library imports
import datetime
from matplotlib import pyplot as plt

# Import your module containing the analysis Python functions
# You will need to set your PYTHONPATH to your working directory for this import
import example_code_aaw as eca

In [None]:
# Display matplotlib graphs within Jupyter notebook
%matplotlib inline

In [None]:
# Global variable to full dataset
RAW_DATA = '../data/sf_restaurant_scores_full.csv'

In [None]:
# Use load_data() function to pull dataset into the Jupyter notebook
all_data = eca.load_file(RAW_DATA)

In [None]:
# Examine dataset columns (keys in the dictionary)
all_data[0].keys()

In [None]:
# Filter on Routine Inspections only
filtered_data = eca.filter_by_inspection_type(all_data)

In [None]:
# Build custom function to reformat 'inspection_date' into a usable format for our month/day
def reformat_date(inspection_date):
    date_as_string = inspection_date[:10]
    month, day, year = date_as_string.split('/')
    return f'{year}-{month}-{day}'

In [None]:
# Output most recent 'inspection_date' in dataset
max(reformat_date(row['inspection_date']) for row in filtered_data)

In [None]:
# Output oldest 'inspection_date' in dataset
min(reformat_date(row['inspection_date']) for row in filtered_data)

In [None]:
# Split oldest 'inspect_date' back into its individual year, month, day parts
year, month, day = min(reformat_date(row['inspection_date']) for row in filtered_data).split('-')
print(year, month, day)

In [None]:
# Split most recent 'inspect_date' back into its individual year, month, day parts
year, month, day = max(reformat_date(row['inspection_date']) for row in filtered_data).split('-')
print(year, month, day)

In [None]:
# Convert data to a datetime date object
start_date = datetime.date(int(year), int(month), int(day))

In [None]:
# Build custom function to iterate through all month/year combinations in the dataset,
# from the oldest month/year to the most recent month/year
# Yields tuples: (month, year)
def generate_month_year(start_month, start_year, end_month, end_year):
    month = start_month
    year = start_year
    
    while True:
        yield month, year
        if month == end_month and year == end_year:
            break
        
        month +=1
        if month > 12:
            month = 1
            year +=1

In [None]:
# Build a list of all month/year combinations in the dataset, and
# a list of the number of violations by risk category, per month/year
dates_of_data = []
aggregated_data = []
for month, year in generate_month_year(6, 2015, 6, 2018):
    all_filtered_data = eca.filter_by_month(filtered_data, month, year)
    aggregated_data.append(eca.count_restaurants_by_risk_score(all_filtered_data))
    dates_of_data.append((datetime.date(year, month, 1)))

In [None]:
# Inspect date data
dates_of_data

In [None]:
# Inspect violations data
aggregated_data

In [None]:
# Plot data using matplotlib
# X axis: year/month
# Y axis: number of violations; one line per risk category
# Adds labels, a title, and a legend
fig, ax = plt.subplots(figsize=(20, 6))
ax.set_xlabel('Month')
ax.set_ylabel('Number of Violations')
ax.set_title('Restaurant Violations by Risk Category by Month')
ax.plot(dates_of_data, [row['No Violations'] for row in aggregated_data], label='No Violations')
ax.plot(dates_of_data, [row['Low Risk'] for row in aggregated_data], label = 'Low Risk')
ax.plot(dates_of_data, [row['Moderate Risk'] for row in aggregated_data], label = 'Moderate Risk')
ax.plot(dates_of_data, [row['High Risk'] for row in aggregated_data], label = 'High Risk')
ax.legend()