# packages

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 2015-2018 LA restaurant inspections

Found a 2015-2018 dataset at https://data.lacity.org/Community-Economic-Development/Restaurant-and-Market-Health-Inspections/29fd-3paw, exported as CSV file. Can also access json entries via their API.

In [50]:
df = pd.read_csv('Restaurant_and_Market_Health_Inspections.csv')

In [51]:
df.columns

Index(['serial_number', 'activity_date', 'facility_name', 'score', 'grade',
       'service_code', 'service_description', 'employee_id',
       'facility_address', 'facility_city', 'facility_id', 'facility_state',
       'facility_zip', 'owner_id', 'owner_name', 'pe_description',
       'program_element_pe', 'program_name', 'program_status', 'record_id'],
      dtype='object')

In [52]:
df.facility_address

0               3708 N EAGLE ROCK BLVD
1                  100 WORLD WAY # 120
2            6801 HOLLYWOOD BLVD # 253
3                   1814 W SUNSET BLVD
4                   2100 ECHO PARK AVE
                     ...              
67568    104 JAPANESE VILLAGE PLZ MALL
67569           4050 W WASHINGTON BLVD
67570                     4312 WALL ST
67571               2012 SAWTELLE BLVD
67572        3600 E CESAR E CHAVEZ AVE
Name: facility_address, Length: 67573, dtype: object

In [53]:
df.activity_date = pd.to_datetime(df.activity_date)

In [54]:
print(min(df.activity_date))
print(max(df.activity_date))

2015-07-01 00:00:00
2018-07-31 00:00:00


In [55]:
len(pd.unique(df.facility_name))

12071

In [65]:
df[["score","grade"]].groupby('grade').agg({"score": [min, np.mean, max]})

Unnamed: 0_level_0,score,score,score
Unnamed: 0_level_1,min,mean,max
grade,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
,65,65.0,65
A,90,94.087338,100
B,80,83.788204,89
C,64,74.048193,79


In [22]:
df.facility_name

0          HABITAT COFFEE SHOP
1                     REILLY'S
2               STREET CHURROS
3            TRINITI ECHO PARK
4                       POLLEN
                 ...          
67568             RAMEN MARUYA
67569              KANG WON DO
67570             OASIS MARKET
67571         TEN TEN YU RAMEN
67572    SUPERIOR GROCERS #113
Name: facility_name, Length: 67573, dtype: object

In [23]:
df.value_counts('facility_name')

facility_name
DODGER STADIUM           464
STAPLES CENTER (LEVY)    290
LA COLISEUM              225
WHOLE FOODS MARKET       208
SUBWAY                   155
                        ... 
THE PIANO BAR              1
ANZIO CAFE                 1
FUEGO NIGHT CLUB           1
QDOBA MEXICAN GRILL        1
MARKS LIQUOR               1
Length: 12071, dtype: int64

In [26]:
df.value_counts('facility_id')

facility_id
FA0019271    464
FA0006427    290
FA0065100    225
FA0024180    117
FA0156500    103
            ... 
FA0247928      1
FA0042091      1
FA0163817      1
FA0042050      1
FA0262447      1
Length: 13545, dtype: int64

In [30]:
df.facility_address == "3708 N EAGLE ROCK BLVD"

0         True
1        False
2        False
3        False
4        False
         ...  
67568    False
67569    False
67570    False
67571    False
67572    False
Name: facility_address, Length: 67573, dtype: bool

# more recent LA restaurant inspections

More recent but smaller dataset at https://ehservices.publichealth.lacounty.gov/ezsearch

In [38]:
columns = ['facility_name','inspection_date','score','facility_address','facility_city']
df = pd.read_csv('inspections.csv',names=columns)

In [42]:
?df.reset_index

[1;31mSignature:[0m
[0mdf[0m[1;33m.[0m[0mreset_index[0m[1;33m([0m[1;33m
[0m    [0mlevel[0m[1;33m:[0m [1;34m'Hashable | Sequence[Hashable] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdrop[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0minplace[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mcol_level[0m[1;33m:[0m [1;34m'Hashable'[0m [1;33m=[0m [1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mcol_fill[0m[1;33m:[0m [1;34m'Hashable'[0m [1;33m=[0m [1;34m''[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [1;34m'DataFrame | None'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Reset the index, or a level of it.

Reset the index of the DataFrame, and use the default one instead.
If the DataFrame has a MultiIndex, this method can remove one or more
levels.

Parameters
----------
level : int, str, tuple, or list, default None
    Only rem

In [46]:
df.dropna().reset_index(drop=True)

Unnamed: 0,facility_name,inspection_date,score,facility_address,facility_city
0,EAGLE CATERING,2020-08-06,90.0,7782 SAN FERNANDO RD,SUN VALLEY
1,WORLD OIL,2021-04-08,97.0,478 W ARROW HWY,COVINA
2,LOWE'S #1852,2022-05-03,99.0,13500 PAXTON ST,PACOIMA
3,LA VERNE CAR WASH,2021-12-06,97.0,914 W FOOTHILL BLVD,LA VERNE
4,THE LOOP,2021-08-25,99.0,1100 W COVINA BLVD,SAN DIMAS
...,...,...,...,...,...
44997,E & J SEAFOOD EXPRESS,2022-05-20,91.0,424 E MANCHESTER BLVD,INGLEWOOD
44998,BANH MI MY THO,2022-05-20,96.0,7434 GARVEY,ROSEMEAD
44999,CARSON MINI TRUCK STOP,2022-05-20,98.0,101 W VICTORIA BLVD,GARDENA
45000,UNCLE TETSU #31008,2022-05-19,98.0,17170 COLIMA RD # D,HACIENDA HEIGHTS


In [45]:
df

Unnamed: 0,facility_name,inspection_date,score,facility_address,facility_city
0,ARIEL COURT APTS SPA POOL,2020-01-31,,535 GAYLEY AVE,LOS ANGELES
1,EAGLE CATERING,2020-08-06,90.0,7782 SAN FERNANDO RD,SUN VALLEY
2,WILSHIRE BORGATA SPA POOL,2018-08-08,,12222 WILSHIRE BLVD,LOS ANGELES
3,WORLD OIL,2021-04-08,97.0,478 W ARROW HWY,COVINA
4,NEWCASTLE GARDENS SPA POOL,2020-06-15,,5461 NEWCASTLE AVE,ENCINO
...,...,...,...,...,...
125214,CARSON MINI TRUCK STOP,2022-05-20,98.0,101 W VICTORIA BLVD,GARDENA
125215,720 03RD ST APTS,2022-05-05,,720 3RD ST,HERMOSA BEACH
125216,GILLMAN APTS.,2022-05-20,,2869 SAN MARINO ST,LOS ANGELES
125217,UNCLE TETSU #31008,2022-05-19,98.0,17170 COLIMA RD # D,HACIENDA HEIGHTS


In [36]:
df.columns

Index(['ARIEL COURT APTS SPA POOL', '2020-01-31', 'Unnamed: 2',
       '535 GAYLEY AVE ', 'LOS ANGELES'],
      dtype='object')

In [7]:
df.facility_address

0               3708 N EAGLE ROCK BLVD
1                  100 WORLD WAY # 120
2            6801 HOLLYWOOD BLVD # 253
3                   1814 W SUNSET BLVD
4                   2100 ECHO PARK AVE
                     ...              
67568    104 JAPANESE VILLAGE PLZ MALL
67569           4050 W WASHINGTON BLVD
67570                     4312 WALL ST
67571               2012 SAWTELLE BLVD
67572        3600 E CESAR E CHAVEZ AVE
Name: facility_address, Length: 67573, dtype: object

## etc