In [1]:
## LA County mpox cases 
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime



In [2]:
def fetch_mpx_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    script_tag = soup.find('script', {'type': 'application/json'})
    if not script_tag:
        raise ValueError("Could not find the data in the script tag")
    return json.loads(script_tag.string)

def process_data(data):
    plot_data = data['x']['data'][1]
    dates = pd.to_datetime(plot_data['x'])
    cases = plot_data['y']
    
    df = pd.DataFrame({
        'date': dates,
        'cases': cases
    })
    
    full_date_range = pd.date_range(start=df['date'].min(), end=df['date'].max())
    df = df.set_index('date').reindex(full_date_range, fill_value=0).rename_axis('date').reset_index()
    df = df.sort_values('date').reset_index(drop=True)
    
    return df

def main():
    url = "http://publichealth.lacounty.gov/media/monkeypox/data/mpx-graph-wkly-widget_Alltime.html"
    
    try:
        raw_data = fetch_mpx_data(url)
        df = process_data(raw_data)
        
        print(df)
        
        df.to_csv('monkeypox_data.csv', index=False)
        print("Data saved to monkeypox_data.csv")
    
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

          date  cases
0   2022-05-19      1
1   2022-05-20      0
2   2022-05-21      0
3   2022-05-22      0
4   2022-05-23      0
..         ...    ...
892 2024-10-27      0
893 2024-10-28      0
894 2024-10-29      0
895 2024-10-30      1
896 2024-10-31      0

[897 rows x 2 columns]
Data saved to monkeypox_data.csv
