# Food Recalls in the United States

## Data-gathering and Preparation
text

### Initial installations

In [None]:
# installing packages
pip install pandas bokeh pyproj
pip install "pandas>=1.2.0,<1.2.3" "bokeh>=2.0.0,<2.3.0" "pyproj>=3.0,<3.0.1"

### Using Pandas to Read in CSV Data

In [33]:
# loading data in pandas
import pandas as pd

file = "recalls.csv"
df = pd.read_csv(file)
print(df)

      start_date    end_date               id  \
0     2010-01-09  2012-06-03         001-2010   
1     2010-01-11  2013-06-03         002-2010   
2     2010-01-15  2013-06-03         003-2010   
3     2010-01-18  2013-06-03         004-2010   
4     2010-01-20  2013-06-03         005-2010   
...          ...         ...              ...   
1333  2022-09-23         NaN         033-2022   
1334  2022-09-24         NaN         034-2022   
1335  2022-10-01         NaN  PHA-10012022-01   
1336  2022-10-13         NaN  PHA-10132022-01   
1337  2022-10-16         NaN         035-2022   

                                                  title  \
0     California Firm Recalls Pork Skin Products Due...   
1     Massachusetts Firm Recalls Beef Products Due t...   
2     New York Firm Recalls Beef Carcass That Contai...   
3     California Firm Recalls Beef Products Due to P...   
4     Indiana Firm Recalls Frozen Chicken Pot Pie Pr...   
...                                                 ...  

In [34]:
df.columns.tolist()

['start_date',
 'end_date',
 'id',
 'title',
 'url',
 'reasons',
 'status',
 'risk_level',
 'establishment_id',
 'establishment_slug',
 'establishment_name',
 'establishment_address',
 'establishment_telephone',
 'establishment_grant_date',
 'establishment_activities',
 'quantity_recovered',
 'quantity_unit',
 'states']

## Code
text

### Using Bokeh to Visualize the Data (Categorical Bar Chart)

In [35]:
# creating a scatter plot
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool

from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap

output_file('risk_levels.html')

In [54]:
# using groupby dataframe method
grouped = df.groupby('risk_level')[['quantity_recovered']].sum()

In [55]:
# printing out data
print(grouped)

            quantity_recovered
risk_level                    
High                45877378.0
Low                  5924153.0
Marginal              814947.0


In [56]:
# convert to tons
grouped = grouped / 2000

In [57]:
# create a ColumnDataSource from grouped data and create a figure
source = ColumnDataSource(grouped)
risk_levels = source.data['risk_level'].tolist()
p = figure(x_range=risk_levels)

In [58]:
# add title and label axes
color_map = factor_cmap(field_name='risk_level',
                    palette=Spectral5, factors=risk_levels)

p.vbar(x='risk_level', top='quantity_recovered', source=source, width=0.70, color=color_map)

p.title.text ='Quantity of FDA Recalled Foods by Risk Level'
p.xaxis.axis_label = 'Risk Level'
p.yaxis.axis_label = 'Quantity Recovered (in tons)'

In [59]:
# add interactive hovering element
hover = HoverTool()
hover.tooltips = [
    ("Totals", "@quantity_recovered Quantity Recovered")]

hover.mode = 'vline'

p.add_tools(hover)

show(p)

## Artifacts
data visualization and map

## Written Discussion
Contextualize your research question

Describe your data-set and data preparation

Discuss any issues with your code

Interpret your artifact

Provide a conclusion to the project