In [13]:
import requests  # For sending HTTP requests
import pandas as pd  # For data manipulation
from datetime import datetime, timedelta  # For handling dates
from sqlalchemy import create_engine, text  # For database operations

# Define constants
API_URL = "https://api.data.gov.sg/v1/environment/air-temperature"
DB_USER = 'postgres'  # Update with your PostgreSQL username
DB_PASS = 'admin'      # Update with your PostgreSQL password
DB_HOST = 'localhost'  # Update with your database host
DB_PORT = '5432'       # Update with your database port
DB_NAME = 'data_gov_project'  # Update with your PostgreSQL database name
START_DATE = datetime(2023, 10, 1)  # Start date for data collection
END_DATE = datetime(2024, 9, 30)    # End date for data collection

def fetch_air_temperature_data(api_url, start_date, end_date):
    """Fetch air temperature data from the API for a given date range."""
    data_frames = []  # List to store DataFrames for each day's temperature readings
    current_date = start_date

    while current_date <= end_date:
        # Format current date for the API request
        date_time_str = current_date.strftime("%Y-%m-%dT12:00:00")
        params = {"date_time": date_time_str}

        # Send a GET request to the API
        response = requests.get(api_url, params=params)

        if response.status_code == 200:
            json_data = response.json()
            print(json_data)  # Debugging: Print the full JSON response
            items = json_data.get("items", [])

            if not items:
                print(f"No data returned for {date_time_str}.")
            else:
                # Process the readings from the data structure
                process_items(items, json_data, data_frames)  # Pass json_data here
        else:
            print(f"Failed to fetch data for {date_time_str}. Status code: {response.status_code}")

        current_date += timedelta(days=1)

    return data_frames

def process_items(items, json_data, data_frames):
    """Process the fetched items and store temperature readings in DataFrames."""
    for item in items:
        readings = item.get('readings', [])
        timestamp = item.get('timestamp')  # Access the timestamp from the current item

        for sensor in readings:
            # Creating a DataFrame for each reading
            airtemp_df = create_reading_dataframe(sensor, timestamp)

            # Get station metadata
            station_metadata = get_station_metadata(sensor['station_id'], json_data)
            if station_metadata:
                station_info(airtemp_df, station_metadata)

            # Append the list of DataFrames
            data_frames.append(airtemp_df)

def create_reading_dataframe(sensor, timestamp):
    """Create a DataFrame from the reading."""
    return pd.DataFrame({
       'station_id': [sensor['station_id']],
        'temperature': [sensor['value']],
        'timestamp': [timestamp]  # Use the timestamp from the item directly
    })

def get_station_metadata(station_id, json_data):
    """Fetch station metadata for a given station ID."""
    for station in json_data['metadata']['stations']:
        if station['id'] == station_id:
            return {
                'name': station['name'],
                'latitude': station['location']['latitude'],
                'longitude': station['location']['longitude']
            }
    return None

def station_info(df, station_metadata):
    """Add latitude, longitude, and station_name to the DataFrame."""
    df['latitude'] = station_metadata.get('latitude')
    df['longitude'] = station_metadata.get('longitude')
    df['station_name'] = station_metadata.get('name')

def load_data_to_postgres(data_frame):
    """
    Load the provided pandas DataFrame into the 'air_temperature' table in your PostgreSQL database.
    """
    engine = create_engine(f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
    try:
        data_frame.to_sql('air_temperature', engine, if_exists='append', index=False)  # Append mode
        print(f"Successfully loaded {len(data_frame)} records to PostgreSQL table.")
    except Exception as e:
        print(f"Error loading data into PostgreSQL: {e}")
        
def verify_data_in_db():
    """
    Retrieves number of rows from 'air_temperature' table to verify data was loaded successfully.
    """
    engine = create_engine(f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
    try:
        with engine.connect() as connection:
            result = connection.execute(text("SELECT COUNT(*) FROM air_temperature"))
            count = result.fetchone()[0]
            print(f"Total records in 'air_temperature' table: {count}")  # Show count of rows
    except Exception as e:
        print(f"Error verifying data in PostgreSQL: {e}")

def verify_database_connection():
    """
    Verifies the database connection by printing a message if successful.
    """
    engine = create_engine(f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
    try:
        with engine.connect() as connection:
            print("Database connection successful")
    except Exception as e:
        print(f"Error connecting to PostgreSQL: {e}")

def create_table():
    """
    Creates the 'air_temperature' table in the database.
    """
    engine = create_engine(f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
    try:
        with engine.connect() as connection:
            connection.execute(text("""
                CREATE TABLE IF NOT EXISTS air_temperature (
                    id SERIAL PRIMARY KEY,
                    timestamp TIMESTAMP,
                    temperature REAL,
                    station_id VARCHAR(255),
                    station_name VARCHAR(255),
                    latitude REAL,
                    longitude REAL
                )
            """))
            print("Table created successfully")
    except Exception as e:
        print(f"Error creating table in PostgreSQL: {e}")

def main():
    print("Starting the script...")

    verify_database_connection() # Verify database connection

    create_table() # Create the table

    data_frames = fetch_air_temperature_data(API_URL, START_DATE, END_DATE) # Fetch the data
    print(f"Fetched data frames: {len(data_frames)}")

    if data_frames:
        combined_df = pd.concat(data_frames, ignore_index=True)

        # Clean up DataFrame
        combined_df['timestamp'] = pd.to_datetime(combined_df['timestamp'])  # Convert timestamp to datetime
        combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]  # Remove duplicates
        combined_df = combined_df[['timestamp', 'temperature','station_id','station_name', 'latitude', 'longitude']]  # Rearrange columns

        load_data_to_postgres(combined_df) # Load the data into PostgreSQL

        verify_data_in_db() # Verify the data in the database
    else:
        print("No data collected.")
    
    print("Script completed.")

if __name__ == "__main__":
    main()

Starting the script...
Database connection successful
Table created successfully
{'metadata': {'stations': [{'id': 'S109', 'device_id': 'S109', 'name': 'Ang Mo Kio Avenue 5', 'location': {'latitude': 1.3764, 'longitude': 103.8492}}, {'id': 'S117', 'device_id': 'S117', 'name': 'Banyan Road', 'location': {'latitude': 1.256, 'longitude': 103.679}}, {'id': 'S107', 'device_id': 'S107', 'name': 'East Coast Parkway', 'location': {'latitude': 1.3135, 'longitude': 103.9625}}, {'id': 'S43', 'device_id': 'S43', 'name': 'Kim Chuan Road', 'location': {'latitude': 1.3399, 'longitude': 103.8878}}, {'id': 'S44', 'device_id': 'S44', 'name': 'Nanyang Avenue', 'location': {'latitude': 1.34583, 'longitude': 103.68166}}, {'id': 'S121', 'device_id': 'S121', 'name': 'Old Choa Chu Kang Road', 'location': {'latitude': 1.37288, 'longitude': 103.72244}}, {'id': 'S111', 'device_id': 'S111', 'name': 'Scotts Road', 'location': {'latitude': 1.31055, 'longitude': 103.8365}}, {'id': 'S60', 'device_id': 'S60', 'name': 