In [None]:
!pip install sqlalchemy
!pip install psycopg2

In [None]:
!pip install sqlalchemy_utils

In [88]:
# v2 imports temperature records for a date range across 1 year using the datetime library
import requests
import pandas as pd
from datetime import datetime, timedelta
import sqlalchemy as db
from sqlalchemy_utils import create_database

In [90]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Step 1: Define the API endpoint
url = "https://api.data.gov.sg/v1/environment/air-temperature"

# Step 2: Define the date range
start_date = datetime(2024, 9, 1) #YYYY-MM-DD
end_date = datetime(2024, 9, 30) #YYYY-MM-DD

# Create an empty list to store DataFrames
data_frames = []

# Step 3: Loop through the date range
current_date = start_date
while current_date <= end_date:
    # Format date in correct ISO format
    date_time_str = current_date.strftime("%Y-%m-%dT12:00:00")  # Using noon for consistency
    params = {
        "date_time": date_time_str
    }

    # Step 4: Send a GET request to the API
    response = requests.get(url, params=params)

    # Step 5: Check if the request was successful
    if response.status_code == 200:
        json_data = response.json()

        # Check if 'items' is available to avoid missing data
        items = json_data.get("items", [])
        if not items:
            print(f"No data returned for {date_time_str}.")
            current_date += timedelta(days=1)  # Move to the next date
            continue  # Continue to the next iteration

        # Step 6: Parse the JSON to extract temperature readings
        for item in items:
            readings = item.get("readings", [])
            if readings:  # Only proceed if there are readings
                # Add the readings to the list as a DataFrame
                temp_df = pd.DataFrame(readings)

                # Add the date as a new column
                temp_df['date'] = current_date.date()  # Add current date for each reading
                
                # Append the DataFrame with the date to the list
                data_frames.append(temp_df)
            else:
                print(f"No readings found for {date_time_str}.")

    else:
        print(f"Failed to fetch data for {date_time_str}. Status code: {response.status_code}")

    # Move to the next day
    current_date += timedelta(days=1)



# Step 7: Combine all DataFrames into one
if data_frames:
    combined_df = pd.concat(data_frames, ignore_index=True)

    # Convert 'date' column to string format YYYY-MM-DD
    combined_df['date'] = pd.to_datetime(combined_df['date']).dt.strftime('%Y-%m-%d')

    # Add metadata, like station information
    stations = pd.DataFrame(json_data["metadata"]["stations"])
    combined_df = combined_df.merge(stations, left_on="station_id", right_on="id", how="left")
    combined_df.drop(columns=["id"], inplace=True)
    
    # Expand the 'location' column into separate columns
    combined_df[['latitude', 'longitude']] = pd.json_normalize(combined_df['location'])

    # Drop the original 'location' column if desired 
    combined_df.drop(columns=['location'], inplace=True) 

    combined_df = combined_df.reset_index()
    combined_df = combined_df.rename(columns={"index":"id"})
    combined_df['id'] = combined_df.index 

    combined_df = combined_df.rename(columns={'id':'id','station_Id':'station_id',
                        'value':'temperature','date':'weather_date','device_id':'device_id','name':'station_name',
                            'latitude':'latitude','longitude':'longitude'})
    
    # Preview the DataFrame
    display(combined_df.head())
    print(combined_df.info())
    #combined_df.to_csv('airtempacrosssg.csv', index=False)  # Save to CSV file
else:
    print("No data collected.")

Unnamed: 0,id,station_id,temperature,weather_date,device_id,station_name,latitude,longitude
0,0,S109,31.7,2024-09-01,S109,Ang Mo Kio Avenue 5,1.3764,103.8492
1,1,S117,31.2,2024-09-01,S117,Banyan Road,1.256,103.679
2,2,S50,31.3,2024-09-01,S50,Clementi Road,1.3337,103.7768
3,3,S107,29.4,2024-09-01,S107,East Coast Parkway,1.3135,103.9625
4,4,S43,31.8,2024-09-01,S43,Kim Chuan Road,1.3399,103.8878


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 408 entries, 0 to 407
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            408 non-null    int64  
 1   station_id    408 non-null    object 
 2   temperature   408 non-null    float64
 3   weather_date  408 non-null    object 
 4   device_id     379 non-null    object 
 5   station_name  379 non-null    object 
 6   latitude      379 non-null    float64
 7   longitude     379 non-null    float64
dtypes: float64(3), int64(1), object(4)
memory usage: 25.6+ KB
None


In [92]:
# Create connection engine

# Using username postgres, password admin, and EXISTING database testdb
engine = db.create_engine('postgresql://postgres:admin@localhost:5432/testdb') 

conn = engine.raw_connection()

# Create new tables in PostgreSQL

commands = (# TABLE 1: WEATHER
            '''
            DROP TABLE IF EXISTS weather;
            CREATE TABLE weather(id SERIAL PRIMARY KEY,
                                                station_id VARCHAR,
                                                temperature VARCHAR,
                                                weather_date DATE,
                                                device_id VARCHAR,
                                                station_name VARCHAR,
                                                latitude VARCHAR,
                                                longitude VARCHAR
                                                );
            ''')
            
# Initialize connection to PostgreSQL
cur = conn.cursor()

# Create cursor to execute SQL commands

#for command in commands:
cur.execute(commands)

# Commit changes
conn.commit()

# Close communication with server
cur.close()
conn.close()

In [94]:
combined_df.to_sql(name= 'weather', con = engine, if_exists= 'append', index= False) 

408