In [22]:
import requests
import pandas as pd
import plotly.express as px
from pymongo import MongoClient
from datetime import datetime

# Define the API endpoint and parameters
url = "https://api.fda.gov/food/enforcement.json?search=report_date:[20200101+TO+20241005]&limit=500"

# Send a GET request to the API
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    recalls = data.get('results', [])
    recall_df = pd.DataFrame(recalls)

    clean_df = recall_df.rename(columns={
        "status": "Status", 
        "city": "City",
        "state": "State",
        "country":"Country",
        "classification":"Classification",
        "openfda":"Open FDA",
        "product_type":"Product Type",
        "event_id":"Event ID",
        "recalling_firm":"Recalling Firm",
        "address_1":"Address 1",
        "address_2":"Address 2",
        "postal_code":"Postal Code",
        "voluntary_mandated":"Voluntary Mandated",
        "initial_firm_notification":"Initial Firm Notification",
        "distribution_pattern":"Distribution Pattern",
        "recall_number":"Recall Number",
        "product_description":"Product Description",
        "product_quantity":"Product Quantity",
        "reason_for_recall":"Reason for Recall",
        "recall_initiation_date":"Recall Initiation Date",
        "center_classification_date":"Center Classification Date",
        "termination_date":"Termination Date",
        "report_date":"Report Date",
        "code_info":"Code Info",
        "more_code_info":"More Code Info",
    })

    # Convert DataFrame to dictionary format for MongoDB
    recall_records = clean_df.to_dict("records")

    # Modify records to ensure the correct structure
    for record in recall_records:
        record['recall_number'] = record.get('recall_number', '')  # Primary key
        record['recall_initiation_date'] = datetime.strptime(record.get('recall_initiation_date', '1900-01-01'), '%Y%m%d') if record.get('recall_initiation_date') else None
        record['recall_classification'] = record.get('classification', '')  # Classification as VARCHAR(50)
        record['status'] = record.get('status', '')  # Status as VARCHAR(50)
        record['product_description'] = record.get('product_description', '')  # Description as TEXT
        record['code_info'] = record.get('code_info', '')  # Code info as TEXT
        record['recalling_firm'] = record.get('recalling_firm', '')  # Firm as VARCHAR(50)
        record['reason_for_recall'] = record.get('reason_for_recall', '')  # Reason as TEXT
        record['distribution_pattern'] = record.get('distribution_pattern', '')  # Pattern as TEXT
        record['state'] = record.get('state', '')  # State as VARCHAR(50)
        record['report_date'] = datetime.strptime(record.get('report_date', '1900-01-01'), '%Y%m%d') if record.get('report_date') else None
        record['voluntary_mandated'] = record.get('voluntary_mandated', '')  # Voluntary/Mandated as VARCHAR(50)
        record['event_id'] = record.get('event_id', '')  # Event ID as VARCHAR(50)

    # Connect to MongoDB
    client = MongoClient('mongodb://localhost:27017/')  # Or use your MongoDB URI
    db = client['fda_recall_data']  # Database name
    collection = db['recalls']  # Collection name

    # Insert modified data into MongoDB
    collection.insert_many(recall_records)

    # Example: Query data back from MongoDB
    recalls_from_db = list(collection.find())

    # Convert back to DataFrame if needed
    db_df = pd.DataFrame(recalls_from_db)

    # Close the connection
    client.close()
else:
    print(f"Failed to retrieve data: {response.status_code}")



In [27]:
import requests
import pandas as pd
import plotly.express as px

# Define the API endpoint and parameters
url = "https://api.fda.gov/food/enforcement.json?search=report_date:[20200101+TO+20241005]&limit=500"

# Send a GET request to the API
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    recalls = data.get('results', [])
    recall_df = pd.DataFrame(recalls)

    # Rename columns as you have done
    clean_df = recall_df.rename(columns={
        "status": "Status", 
        "city": "City",
        "state": "State",
        "country":"Country",
        "classification":"Classification",
        "openfda":"Open FDA",
        "product_type":"Product Type",
        "event_id":"Event ID",
        "recalling_firm":"Recalling Firm",
        "address_1":"Address 1",
        "address_2":"Address 2",
        "postal_code":"Postal Code",
        "voluntary_mandated":"Voluntary Mandated",
        "initial_firm_notification":"Initial Firm Notification",
        "distribution_pattern":"Distribution Pattern",
        "recall_number":"Recall Number",
        "product_description":"Product Description",
        "product_quantity":"Product Quantity",
        "reason_for_recall":"Reason for Recall",
        "recall_initiation_date":"Recall Initiation Date",
        "center_classification_date":"Center Classification Date",
        "termination_date":"Termination Date",
        "report_date":"Report Date",
        "code_info":"Code Info",
        "more_code_info":"More Code Info",
    })

    # Group by state and count the number of records per state
    us_recall_df = clean_df[clean_df['Country'] == "United States"]
    state_counts = us_recall_df['State'].value_counts().reset_index()
    state_counts.columns = ['State', 'Count']

    # Plotly choropleth map using state postal abbreviations
    fig = px.choropleth(
        state_counts, 
        locations='State', 
        locationmode="USA-states",  # This sets the matching on US state abbreviations
        color='Count', 
        color_continuous_scale="YlOrRd",  # Yellow-Red color scale
        scope="usa",  # Scope limited to the US map
        title="FDA Recalls by State",
    )

    fig.update_layout(
        geo=dict(
            lakecolor='rgb(255, 255, 255)'
        )
    )

    # Show the plot in an interactive window or save to a file
    fig.show()
else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")


In [28]:
state_counts
#print(clean_df.head()) 

Unnamed: 0,State,Count
0,CA,54
1,NY,48
2,MD,44
3,OH,42
4,IL,30
5,FL,28
6,GA,21
7,WA,19
8,OR,15
9,TX,15


In [None]:
us_recall_df = clean_df[clean_df['Country'] == "United States"]

In [None]:
us_recall_df.to_csv('output_data/fda_food_enforcement_data.csv', index=False)