In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from sqlalchemy import create_engine, text
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define constants
API_URL = "https://api.data.gov.sg/v1/environment/air-temperature"
DB_CONFIG = {
    'user': 'postgres',
    'password': 'admin',
    'host': 'localhost',
    'port': '5432',
    'database': 'data_gov_project'
}
START_DATE = datetime(2023, 10, 1)
END_DATE = datetime(2024, 9, 30, 23, 59)

def fetch_air_temperature_data(date):
    """Fetch air temperature data from the API for a given date."""
    date_time_str = date.strftime("%Y-%m-%dT%H:%M:%S")
    params = {"date_time": date_time_str}
    
    try:
        response = requests.get(API_URL, params=params)
        response.raise_for_status()
        json_data = response.json()
        items = json_data.get("items", [])
        
        if not items:
            logging.warning(f"No data returned for {date_time_str}.")
            return None
        
        return process_items(items)
    except requests.RequestException as e:
        logging.error(f"Failed to fetch data for {date_time_str}. Error: {e}")
        return None

def process_items(items):
    """Process the fetched items and return a DataFrame."""
    data = []
    for item in items:
        readings = item.get('readings', [])
        timestamp = item.get('timestamp')
        for sensor in readings:
            data.append({
                'station_id': sensor['station_id'],
                'temperature': sensor['value'],
                'airtemp_date': timestamp
            })
    return pd.DataFrame(data)

def load_data_to_postgres(data_frame, engine):
    """Load the provided pandas DataFrame into the 'air_temp' table."""
    try:
        data_frame.to_sql('air_temp', engine, if_exists='append', index=False)
        logging.info(f"Successfully loaded {len(data_frame)} records to PostgreSQL table.")
    except Exception as e:
        logging.error(f"Error loading data into PostgreSQL: {e}")

def verify_data_in_db(engine):
    """Retrieves the number of rows from 'air_temp' table."""
    try:
        with engine.connect() as connection:
            result = connection.execute(text("SELECT COUNT(*) FROM air_temp"))
            count = result.fetchone()[0]
            logging.info(f"Total records in 'air_temp' table: {count}")
    except Exception as e:
        logging.error(f"Error verifying data in PostgreSQL: {e}")

def main():
    logging.info("Starting the script...")

    engine = create_engine(f'postgresql://{DB_CONFIG["user"]}:{DB_CONFIG["password"]}@{DB_CONFIG["host"]}:{DB_CONFIG["port"]}/{DB_CONFIG["database"]}')
    
    try:
        engine.connect()
        logging.info("Database connection successful")
    except Exception as e:
        logging.error(f"Error connecting to PostgreSQL: {e}")
        return

    date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='H')
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_date = {executor.submit(fetch_air_temperature_data, date): date for date in date_range}
        data_frames = []
        
        for future in as_completed(future_to_date):
            df = future.result()
            if df is not None:
                data_frames.append(df)

    if data_frames:
        combined_df = pd.concat(data_frames, ignore_index=True)
        combined_df['airtemp_date'] = pd.to_datetime(combined_df['airtemp_date'])
        combined_df = combined_df.drop_duplicates()
        
        load_data_to_postgres(combined_df, engine)
        verify_data_in_db(engine)
    else:
        logging.warning("No data collected.")

    logging.info("Script completed.")

if __name__ == "__main__":
    main()

2024-11-26 22:12:51,255 - INFO - Starting the script...
2024-11-26 22:12:51,321 - INFO - Database connection successful
  date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='H')
2024-11-26 22:14:35,128 - INFO - Successfully loaded 113203 records to PostgreSQL table.
2024-11-26 22:14:35,135 - INFO - Total records in 'air_temp' table: 113203
2024-11-26 22:14:35,135 - INFO - Script completed.
