In [None]:
import requests  # For sending HTTP requests
import pandas as pd  # For data manipulation
from datetime import datetime, timedelta  # For handling dates
from sqlalchemy import create_engine  # For database operations

# Define constants
API_URL = "https://api.data.gov.sg/v1/environment/air-temperature"
DB_USER = 'postgres'  # Update with your PostgreSQL username
DB_PASS = 'admin'      # Update with your PostgreSQL password
DB_HOST = 'localhost'  # Update with your database host
DB_PORT = '5432'       # Update with your database port
DB_NAME = 'data_gov_project'  # Update with your PostgreSQL database name
START_DATE = datetime(2023, 10, 1, 0, 0)  # Start date
END_DATE = datetime(2024, 9, 30, 23, 59)  # End date

def fetch_air_temperature_data(api_url, start_date, end_date):
    """Fetch air temperature data from the API for a given date range."""
    data_frames = []  # List to store DataFrames
    current_date = start_date

    while current_date <= end_date:
        # Format current date for the API request
        date_time_str = current_date.strftime("%Y-%m-%dT%H:%M:%S")
        params = {"date_time": date_time_str}

        # Send a GET request to the API
        response = requests.get(api_url, params=params)

        if response.status_code == 200:
            json_data = response.json()
            print(json_data)  # Debugging: Print the full JSON response
            items = json_data.get("items", [])

            if not items:
                print(f"No data returned for {date_time_str}.")
            else:
                # Process the readings from the data structure
                process_items(items, json_data, data_frames)
        else:
            print(f"Failed to fetch data for {date_time_str}. Status code: {response.status_code}")

        current_date += timedelta(hours=1)  # Increment by hour

    return data_frames

def process_items(items, json_data, data_frames):
    """Process the fetched items and store temperature readings in DataFrames."""
    for item in items:
        readings = item.get('readings', [])
        timestamp = item.get('timestamp')  # Access the timestamp from the current item

        for sensor in readings:
            # Creating a DataFrame for each reading
            airtemp_df = create_reading_dataframe(sensor, timestamp)

            # Append the list of DataFrames
            data_frames.append(airtemp_df)

def create_reading_dataframe(sensor, timestamp):
    """Create a DataFrame from the reading."""
    return pd.DataFrame({
        'station_id': [sensor['station_id']],
        'temperature': [sensor['value']],
        'airtemp_date': [timestamp]  # Ensure this matches your table's column name
    })

def load_data_to_postgres(data_frame):
    """Load the provided pandas DataFrame into the 'air_temp' table in your PostgreSQL database."""
    engine = create_engine(f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
    try:
        data_frame.to_sql('air_temp', engine, if_exists='append', index=False)  # Append mode
        print(f"Successfully loaded {len(data_frame)} records to PostgreSQL table.")
    except Exception as e:
        print(f"Error loading data into PostgreSQL: {e}")

def verify_data_in_db():
    """Retrieves the number of rows from 'air_temp' table to verify data was loaded successfully."""
    engine = create_engine(f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
    try:
        with engine.connect() as connection:
            result = connection.execute(text("SELECT COUNT(*) FROM air_temp"))
            count = result.fetchone()[0]
            print(f"Total records in 'air_temp' table: {count}")  # Show count of rows
    except Exception as e:
        print(f"Error verifying data in PostgreSQL: {e}")

def verify_database_connection():
    """Verifies the database connection by printing a message if successful."""
    engine = create_engine(f'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
    try:
        with engine.connect() as connection:
            print("Database connection successful")
    except Exception as e:
        print(f"Error connecting to PostgreSQL: {e}")

def main():
    print("Starting the script...")

    verify_database_connection()  # Verify database connection

    data_frames = fetch_air_temperature_data(API_URL, START_DATE, END_DATE)  # Fetch the data
    print(f"Fetched data frames: {len(data_frames)}")

    if data_frames:
        combined_df = pd.concat(data_frames, ignore_index=True)

        # Clean up DataFrame
        combined_df['airtemp_date'] = pd.to_datetime(combined_df['airtemp_date'])  # Convert airtemp_date to datetime
        combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]  # Remove duplicates
        combined_df = combined_df[['station_id', 'temperature', 'airtemp_date']]  # Rearrange columns

        load_data_to_postgres(combined_df)  # Load the data into PostgreSQL
        verify_data_in_db()  # Verify the data in the database
    else:
        print("No data collected.")

    print("Script completed.")

if __name__ == "__main__":
    main()