In [7]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Step 1: Define the API endpoint
url = "https://api.data.gov.sg/v1/environment/air-temperature"

# Step 2: Define the date range
start_date = datetime(2023, 10, 1) #YYYY-MM-DD
end_date = datetime(2024, 9, 30) #YYYY-MM-DD

# Create an empty list to store DataFrames
data_frames = []

# Step 3: Loop through the date range
current_date = start_date
while current_date <= end_date:
    # Format date in correct ISO format
    date_time_str = current_date.strftime("%Y-%m-%dT12:00:00")  # Using noon for consistency
    params = {
        "date_time": date_time_str
    }

    # Step 4: Send a GET request to the API
    response = requests.get(url, params=params)

    # Step 5: Check if the request was successful
    if response.status_code == 200:
        json_data = response.json()

        # Check if 'items' is available to avoid missing data
        items = json_data.get("items", [])
        if not items:
            print(f"No data returned for {date_time_str}.")
            current_date += timedelta(days=1)  # Move to the next date
            continue  # Continue to the next iteration

        # Step 6: Parse the JSON to extract temperature readings
        for item in items:
            readings = item.get("readings", [])
            if readings:  # Only proceed if there are readings
                # Add the readings to the list as a DataFrame
                temp_df = pd.DataFrame(readings)

                # Add the date as a new column
                temp_df['date'] = current_date.date()  # Add current date for each reading
                
                # Append the DataFrame with the date to the list
                data_frames.append(temp_df)
            else:
                print(f"No readings found for {date_time_str}.")

    else:
        print(f"Failed to fetch data for {date_time_str}. Status code: {response.status_code}")

    # Move to the next day
    current_date += timedelta(days=1)

# Step 7: Combine all DataFrames into one
if data_frames:
    combined_df = pd.concat(data_frames, ignore_index=True)

    # Convert 'date' column to string format YYYY-MM-DD
    combined_df['date'] = pd.to_datetime(combined_df['date']).dt.strftime('%Y-%m-%d')

    # Add metadata, like station information
    stations = pd.DataFrame(json_data["metadata"]["stations"])
    combined_df = combined_df.merge(stations, left_on="station_id", right_on="id", how="left")
    combined_df.drop(columns=["id"], inplace=True)

    # Preview the DataFrame
    print(combined_df.head())
    print(combined_df.info())
    combined_df.to_csv('/Users/yvonne/Downloads/airtempacrosssg.csv', index=False)  # Save to CSV file
else:
    print("No data collected.")

  station_id  value        date device_id                 name  \
0       S109   31.9  2023-10-01      S109  Ang Mo Kio Avenue 5   
1       S117   30.7  2023-10-01      S117          Banyan Road   
2       S107   30.0  2023-10-01      S107   East Coast Parkway   
3        S43   31.0  2023-10-01       S43       Kim Chuan Road   
4        S44   31.5  2023-10-01       S44       Nanyang Avenue   

                                        location  
0    {'latitude': 1.3764, 'longitude': 103.8492}  
1      {'latitude': 1.256, 'longitude': 103.679}  
2    {'latitude': 1.3135, 'longitude': 103.9625}  
3    {'latitude': 1.3399, 'longitude': 103.8878}  
4  {'latitude': 1.34583, 'longitude': 103.68166}  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4741 entries, 0 to 4740
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   station_id  4741 non-null   object 
 1   value       4741 non-null   float64
 2   date        4741 non-n