In [7]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Step 1: Define the API endpoint for the PM2.5
url = "https://api-open.data.gov.sg/v2/real-time/api/pm25"

# Step 2: Define the date range
start_date = datetime(2023, 10, 1)  # Start date
end_date = datetime(2024, 9, 30)    # End date

# Create an empty list to store PM2.5 data
pm_data = []

# Step 3: Loop through the date range
current_date = start_date
while current_date <= end_date:
    # Format date in correct ISO format
    date_str = current_date.strftime("%Y-%m-%d")  # Use YYYY-MM-DD format
    params = {
        "date": date_str  # Use 'date' parameter
    }

    # Step 4: Send a GET request to the API
    response = requests.get(url, params=params)

    # Step 5: Check if the request was successful
    if response.status_code == 200:
        json_data = response.json()

        # Check if 'data' and 'items' are available to avoid missing data
        if json_data.get("code") == 0 and "data" in json_data:
            items = json_data["data"].get("items", [])
            if not items:
                print(f"No PM2.5 readings returned for {date_str}.")
            else:
                # Step 6: Extract PM2.5 readings
                for item in items:
                    timestamp = item.get("timestamp")
                    readings = item.get("readings", {}).get("pm25_one_hourly", {})

                    # Collect readings by region
                    for region, value in readings.items():
                        pm_record = {
                            'region': region,
                            'value': value,
                            'timestamp': timestamp,
                            'date': date_str  # Add the date for each reading
                        }
                        pm_data.append(pm_record)

        else:
            print(f"No PM2.5 data found for {date_str}. Code: {json_data.get('code')}")
    else:
        print(f"Failed to fetch data for {date_str}. Status code: {response.status_code}")

    # Move to the next day
    current_date += timedelta(days=1)

# Step 7: Create a DataFrame from the collected data
if pm_data:
    combined_df = pd.DataFrame(pm_data)

    # Preview the DataFrame
    print(combined_df.head())
    print(combined_df.info())
    combined_df.to_csv('/Users/yvonne/Downloads/pm25_data.csv', index=False)  # Save to CSV file
else:
    print("No PM2.5 data collected.")

Failed to fetch data for 2023-10-08. Status code: 404
Failed to fetch data for 2023-11-04. Status code: 404
Failed to fetch data for 2023-11-05. Status code: 404
    region  value                  timestamp        date
0     west      8  2023-10-01T23:00:00+08:00  2023-10-01
1     east     21  2023-10-01T23:00:00+08:00  2023-10-01
2  central     29  2023-10-01T23:00:00+08:00  2023-10-01
3    south     13  2023-10-01T23:00:00+08:00  2023-10-01
4    north     11  2023-10-01T23:00:00+08:00  2023-10-01
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42768 entries, 0 to 42767
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   region     42768 non-null  object
 1   value      42768 non-null  int64 
 2   timestamp  42768 non-null  object
 3   date       42768 non-null  object
dtypes: int64(1), object(3)
memory usage: 1.3+ MB
None
