In [10]:
import requests
import pandas as pd
import plotly.express as px
from pymongo import MongoClient
from datetime import datetime

# Define the API endpoint and parameters
url = "https://api.fda.gov/food/enforcement.json?search=report_date:[20200101+TO+20241005]&limit=500"

# Send a GET request to the API
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    recalls = data.get('results', [])
    recall_df = pd.DataFrame(recalls)

    clean_df = recall_df.rename(columns={
        "status": "Status", 
        "city": "City",
        "state": "State",
        "country":"Country",
        "classification":"Classification",
        "openfda":"Open FDA",
        "product_type":"Product Type",
        "event_id":"Event ID",
        "recalling_firm":"Recalling Firm",
        "address_1":"Address 1",
        "address_2":"Address 2",
        "postal_code":"Postal Code",
        "voluntary_mandated":"Voluntary Mandated",
        "initial_firm_notification":"Initial Firm Notification",
        "distribution_pattern":"Distribution Pattern",
        "recall_number":"Recall Number",
        "product_description":"Product Description",
        "product_quantity":"Product Quantity",
        "reason_for_recall":"Reason for Recall",
        "recall_initiation_date":"Recall Initiation Date",
        "center_classification_date":"Center Classification Date",
        "termination_date":"Termination Date",
        "report_date":"Report Date",
        "code_info":"Code Info",
        "more_code_info":"More Code Info",
    })

    # Convert DataFrame to dictionary format for MongoDB
    recall_records = clean_df.to_dict("records")

    # Connect to MongoDB
    client = MongoClient('mongodb://localhost:27017/')  # Or use your MongoDB URI
    db = client['fda_recall_data']  # Database name
    collection = db['recalls']  # Collection name

    # Insert modified data into MongoDB
    collection.insert_many(recall_records)

    # Example: Query data back from MongoDB
    recalls_from_db = list(collection.find())

    # Convert back to DataFrame if needed
    db_df = pd.DataFrame(recalls_from_db)

    # Close the connection
    client.close()
else:
    print(f"Failed to retrieve data: {response.status_code}")



In [11]:
import pandas as pd
import plotly.express as px
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')  # Replace with your MongoDB URI if necessary
db = client['fda_recall_data']  # Database name
collection = db['recalls']  # Collection name

# Query MongoDB and load the data into a pandas DataFrame
recalls_cursor = collection.find({"State": {"$ne": ""}}, {"State": 1, "_id": 0})  # Fetch state data only
recalls_df = pd.DataFrame(list(recalls_cursor))

# Ensure the 'state' column exists and clean it
if 'State' in recalls_df.columns:
    recalls_df['State'] = recalls_df['State'].str.upper()  # Ensure all state abbreviations are uppercase

# Group by state and count the number of recalls per state
state_counts = recalls_df.groupby('State').size().reset_index(name='Count')

# Create Plotly choropleth map
fig = px.choropleth(
    state_counts, 
    locations='State', 
    locationmode="USA-states",  # Use state abbreviations for location matching
    color='Count', 
    color_continuous_scale="YlGnBu",  # Green-Blue color scale
    scope="usa",  # Limit the map to USA
    title="FDA Recalls by State"
)

# Customize the layout (optional)
fig.update_layout(
    geo=dict(
        lakecolor='rgb(255, 255, 255)'  # Set lake color to white
    )
)

# Save the figure as an interactive HTML file
fig.write_html("./output_data/fda_recalls_by_state.html")

# Optional: Show the figure in a notebook or browser (for testing or interactive use)
fig.show()



In [12]:
import pandas as pd
from pymongo import MongoClient

# List of all 50 U.S. state abbreviations
us_states = [
    'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME',
    'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA',
    'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC'
]

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')  # Replace with your MongoDB URI if necessary
db = client['fda_recall_data']  # Database name
collection = db['recalls']  # Collection name

# Query MongoDB and load the data into a pandas DataFrame
recalls_cursor = collection.find({"State": {"$ne": ""}}, {"State": 1, "_id": 0})  # Fetch state data only
recalls_df = pd.DataFrame(list(recalls_cursor))

# Ensure the 'State' column exists and clean it
if 'State' in recalls_df.columns:
    recalls_df['State'] = recalls_df['State'].str.upper()  # Ensure all state abbreviations are uppercase

    # Get unique states present in the dataset
    states_in_dataset = recalls_df['State'].unique()

    # Find states that are missing from the dataset
    missing_states = sorted(set(us_states) - set(states_in_dataset))

    # Print the missing states
    print("States not present in the dataset:", missing_states)
else:
    print("No state data available in the dataset.")

States not present in the dataset: ['AK', 'DC', 'HI', 'LA', 'ME', 'MT', 'RI', 'SD', 'TN', 'WV']


**With the dataset from Jan 2020 through Oct 2024, there has been 10 states that have not had any FDA Food related recalls.**
1. Alaska
2. Montana
3. South Dakota
4. Hawaii
5. Louisiana
6. Tennessee
7. Washington D.C.
8. Maine
9. West Virginia
10. Rhode Island

**The code below will take about 5mins to run to create the HTML file since it is using Geopy Library to find latitude and longitude values from the 'City' and 'State' columns from the dataset**

In [13]:
import requests
import pandas as pd
import plotly.express as px
from pymongo import MongoClient
from datetime import datetime
from geopy.geocoders import Nominatim
import time

client = MongoClient('mongodb://localhost:27017/')
db = client['fda_recalls']
collection = db['food_enforcement']

# Get unique cities and states for geocoding
city_state_df = clean_df[['City', 'State']].drop_duplicates()

# Geocoding cities to get latitude and longitude
geolocator = Nominatim(user_agent="fda_recall_app")
latitudes = []
longitudes = []

for index, row in city_state_df.iterrows():
    city_state = f"{row['City']}, {row['State']}, USA"
    try:
        location = geolocator.geocode(city_state)
        if location:
            latitudes.append(location.latitude)
            longitudes.append(location.longitude)
        else:
            latitudes.append(None)
            longitudes.append(None)
    except:
        latitudes.append(None)
        longitudes.append(None)
    time.sleep(1)  # Add delay to avoid overwhelming the geocoding API

    # Add latitude and longitude to the DataFrame
city_state_df['Latitude'] = latitudes
city_state_df['Longitude'] = longitudes

    # Filter out rows where geocoding failed
city_state_df = city_state_df.dropna(subset=['Latitude', 'Longitude'])

    # Merge latitude and longitude back to the main DataFrame
clean_df = pd.merge(clean_df, city_state_df, on=['City', 'State'], how='left')

    # Create a map with city markers using Plotly
fig = px.scatter_mapbox(
    clean_df,
    lat="Latitude",
    lon="Longitude",
    hover_name="City",
    hover_data=["State", "Recall Number", "Reason for Recall"],
    zoom=3,
    height=1000,
    width=2000,
    title="FDA Recalls by City"
)

    # Set mapbox style
fig.update_layout(
    mapbox_style="open-street-map",
    margin={"r": 0, "t": 50, "l": 0, "b": 0}  # Adjusted margins for full-width display
)

    # Show or save the figure as an interactive HTML file
fig.write_html("./output_data/fda_recalls_by_city.html")
print("Map with city markers saved as HTML file.")

fig.show()

Map with city markers saved as HTML file.


In [14]:
us_recall_df = clean_df[clean_df['Country'] == "United States"]

In [15]:
us_recall_df.to_csv('output_data/fda_food_enforcement_data.csv', index=False)