In [159]:
import requests
import pandas as pd
from pymongo import MongoClient
from datetime import datetime

# Define the API endpoint and parameters
url = "https://api.fda.gov/food/enforcement.json?search=report_date:[20200101+TO+20241005]&limit=500"

# Send a GET request to the API
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    recalls = data.get('results', [])
    recall_df = pd.DataFrame(recalls)

    clean_df = recall_df.rename(columns={
        "status": "Status", 
        "city": "City",
        "state": "State",
        "country":"Country",
        "classification":"Classification",
        "openfda":"Open FDA",
        "product_type":"Product Type",
        "event_id":"Event ID",
        "recalling_firm":"Recalling Firm",
        "address_1":"Address 1",
        "address_2":"Address 2",
        "postal_code":"Postal Code",
        "voluntary_mandated":"Voluntary Mandated",
        "initial_firm_notification":"Initial Firm Notification",
        "distribution_pattern":"Distribution Pattern",
        "recall_number":"Recall Number",
        "product_description":"Product Description",
        "product_quantity":"Product Quantity",
        "reason_for_recall":"Reason for Recall",
        "recall_initiation_date":"Recall Initiation Date",
        "center_classification_date":"Center Classification Date",
        "termination_date":"Termination Date",
        "report_date":"Report Date",
        "code_info":"Code Info",
        "more_code_info":"More Code Info",
    })

    # Convert DataFrame to dictionary format for MongoDB
    recall_records = clean_df.to_dict("records")

    # Modify records to ensure the correct structure
    for record in recall_records:
        record['recall_number'] = record.get('recall_number', '')  # Primary key
        record['recall_initiation_date'] = datetime.strptime(record.get('recall_initiation_date', '1900-01-01'), '%Y%m%d') if record.get('recall_initiation_date') else None
        record['recall_classification'] = record.get('classification', '')  # Classification as VARCHAR(50)
        record['status'] = record.get('status', '')  # Status as VARCHAR(50)
        record['product_description'] = record.get('product_description', '')  # Description as TEXT
        record['code_info'] = record.get('code_info', '')  # Code info as TEXT
        record['recalling_firm'] = record.get('recalling_firm', '')  # Firm as VARCHAR(50)
        record['reason_for_recall'] = record.get('reason_for_recall', '')  # Reason as TEXT
        record['distribution_pattern'] = record.get('distribution_pattern', '')  # Pattern as TEXT
        record['state'] = record.get('state', '')  # State as VARCHAR(50)
        record['report_date'] = datetime.strptime(record.get('report_date', '1900-01-01'), '%Y%m%d') if record.get('report_date') else None
        record['voluntary_mandated'] = record.get('voluntary_mandated', '')  # Voluntary/Mandated as VARCHAR(50)
        record['event_id'] = record.get('event_id', '')  # Event ID as VARCHAR(50)

    # Connect to MongoDB
    client = MongoClient('mongodb://localhost:27017/')  # Or use your MongoDB URI
    db = client['fda_recall_data']  # Database name
    collection = db['recalls']  # Collection name

    # Insert modified data into MongoDB
    collection.insert_many(recall_records)

    # Example: Query data back from MongoDB
    recalls_from_db = list(collection.find())

    # Convert back to DataFrame if needed
    db_df = pd.DataFrame(recalls_from_db)

    # Close the connection
    client.close()
else:
    print(f"Failed to retrieve data: {response.status_code}")
   


In [160]:
# Ensure Classification type exist
if 'Classification' not in df.columns:
    raise ValueError("Data is missing necessary column: 'Classification'.")

# Check distribution
classification_counts = df['Classification'].value_counts()

# Output count 
print("Class I count:", classification_counts.get('Class I', 0))
print("Class II count:", classification_counts.get('Class II', 0))
print("Class III count:", classification_counts.get('Class III', 0))

# Create graph
fig = px.bar(
    classification_counts,
    x=classification_counts.index,  # Class name
    y=classification_counts.values,  # Recall count
    labels={'x': 'Recall Classification', 'y': 'Total Recalls'},
    title="Total Recalls by Classification (Class I, II, III)"
)

# Update graph
fig.update_layout(
    xaxis_title="Classification",
    yaxis_title="Total Recalls",
    title_x=0.5,  
    height=600,
    width=800
)

# Display
fig.show()




Class I count: 9768
Class II count: 11088
Class III count: 1144


#Class I:
#Definition: A situation where there is a reasonable probability that the use of or exposure to a violative product will cause serious adverse health consequences or death.

#Class II:
#Definition: A situation where the use of or exposure to a violative product may cause temporary or medically reversible adverse health consequences, or where the probability of serious adverse health consequences is remote.

#Class III:
#Definition: A situation where the use of or exposure to a violative product is not likely to cause adverse health consequences..

In [163]:
# Class I map
fig_class_i = go.Figure(go.Choropleth(
    locations=class_distribution['State'],
    locationmode='USA-states',
    z=class_distribution['Class I'],
    colorscale='Reds',  # red
    colorbar_title="Class I",
    marker_line_color='white'
))

# update
fig_class_i.update_layout(
    title_text="Distribution of Class I Recalls across USA",
    geo=dict(
        scope='usa',
        projection=go.layout.geo.Projection(type='albers usa'),
        lakecolor='rgb(255, 255, 255)',
    ),
    margin={"r": 0, "t": 50, "l": 0, "b": 0}
)

# Save Class I map as HTML
fig_class_i.write_html("./output_data/class_i_recalls.html")

# Class II map
fig_class_ii = go.Figure(go.Choropleth(
    locations=class_distribution['State'],
    locationmode='USA-states',
    z=class_distribution['Class II'],
    colorscale='Blues',  # blue
    colorbar_title="Class II",
    marker_line_color='white'
))

# Update
fig_class_ii.update_layout(
    title_text="Distribution of Class II Recalls across USA",
    geo=dict(
        scope='usa',
        projection=go.layout.geo.Projection(type='albers usa'),
        lakecolor='rgb(255, 255, 255)',
    ),
    margin={"r": 0, "t": 50, "l": 0, "b": 0}
)

# Save Class II map as HTML
fig_class_ii.write_html("./output_data/class_ii_recalls.html")

# Class III map
fig_class_iii = go.Figure(go.Choropleth(
    locations=class_distribution['State'],
    locationmode='USA-states',
    z=class_distribution['Class III'],
    colorscale='Greens',  # Green
    colorbar_title="Class III",
    marker_line_color='white'
))

# Update
fig_class_iii.update_layout(
    title_text="Distribution of Class III Recalls across USA",
    geo=dict(
        scope='usa',
        projection=go.layout.geo.Projection(type='albers usa'),
        lakecolor='rgb(255, 255, 255)',
    ),
    margin={"r": 0, "t": 50, "l": 0, "b": 0}
)

# Save Class III map as HTML
fig_class_iii.write_html("./output_data/class_iii_recalls.html")

# Optionally, display the maps
fig_class_i.show()
fig_class_ii.show()
fig_class_iii.show()



In [162]:

if 'State' in df.columns and 'Classification' in df.columns:
    df_clean = df.dropna(subset=['State', 'Classification'])
    df_clean['State'] = df_clean['State'].str.upper()  

    # Find top 10 state
    state_totals = df_clean.groupby('State').size().sort_values(ascending=False).head(10)

    # Top 10 State
    top_10_states_df = df_clean[df_clean['State'].isin(state_totals.index)]

    # Count by class
    heatmap_data = top_10_states_df.groupby(['State', 'Classification']).size().unstack(fill_value=0)

    # Create heat map
    fig = go.Figure(data=go.Heatmap(
        z=[heatmap_data['Class I'], heatmap_data['Class II'], heatmap_data['Class III']],  # 分别表示 Class I, II, III 的计数
        x=heatmap_data.index,  
        y=['Class I', 'Class II', 'Class III'],  
        colorscale='Reds'  
    ))

    # Title
    fig.update_layout(
        title="Heatmap of Recalls by State and Classification (Top 10 States)",
        xaxis_title="State",
        yaxis_title="Recall Classification",
        height=600,
        width=800
    )

    # Display
    fig.show()
else:
    print("Data does not contain 'State' or 'Classification' fields.")


