In [None]:
!pip install sqlalchemy
!pip install psycopg2

In [None]:
!pip install sqlalchemy_utils

In [1]:
# v2 imports temperature records for a date range across 1 year using the datetime library
import requests
import pandas as pd
from datetime import datetime, timedelta
import sqlalchemy as db
from sqlalchemy_utils import create_database

In [6]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Step 1: Define the API endpoint
url = "https://api.data.gov.sg/v1/environment/air-temperature"

# Define the start and end dates for the range
start_date = datetime(2024, 10, 28, 0, 0)  # start_date = datetime(yyyy, mm, dd, hh, mm)
end_date = datetime(2024, 10, 31, 23, 0)  # end_date = datetime(yyyy, mm, dd, hh, mm)

# List to hold all the data
all_data = []

# Loop through the date range and request data for each day
current_date = start_date
while current_date <= end_date:
    date_str = current_date.strftime("%Y-%m-%dT%H:%M:%S")  # Format date for the API request
    params = {"date_time": date_str}  # Use the 'date_time' parameter for the API request

    # Send the GET request to the API
    response = requests.get(url, params=params)

    if response.status_code == 200:
        json_data = response.json()

        # Extract and flatten the "readings" for each station
        items = json_data["items"]
        readings = [reading for item in items for reading in item["readings"]]

        # Create a DataFrame from the readings
        df = pd.DataFrame(readings)

        # Merge with station metadata
        stations = pd.DataFrame(json_data["metadata"]["stations"])
        df = df.merge(stations, left_on="station_id", right_on="id", how="left")

        # Drop the 'id' column to clean up the DataFrame, and drop 'device_id' column
        df.drop(columns=["id"], inplace=True)
        df.drop(columns=["device_id"], inplace=True)

        # Add the current date to the DataFrame for easy tracking
        df["date"] = date_str

        # Append the data for this day to the all_data list
        all_data.append(df)
    else:
        print(f"Failed to fetch data for {date_str} with status code {response.status_code}")

    # Increment the current date by one hour
    current_date += timedelta(hours=1)

# If any data was fetched, concatenate all data frames into one
if all_data:
    final_df = pd.concat(all_data, ignore_index=True)

    # Convert 'date' column to datetime format
    final_df["date"] = pd.to_datetime(final_df["date"])

        # Expand the 'location' column into separate columns
    final_df[['latitude', 'longitude']] = pd.json_normalize(final_df['location'])

    # Drop the original 'location' column if desired 
    final_df.drop(columns=['location'], inplace=True) 

    final_df = final_df.reset_index()
    final_df = final_df.rename(columns={"index":"id"})
    final_df['id'] = final_df.index 

    final_df = final_df.rename(columns={'id':'id','station_Id':'station_id',
                        'value':'temperature','date':'weather_date','device_id':'device_id','name':'station_name',
                            'latitude':'latitude','longitude':'longitude'})

    # Preview the final DataFrame
    print(final_df.head())
    print(final_df.info())
    #display(final_df)

    # Optional: Save the data to a CSV file
    #final_df.to_csv('airtempacrosssg_hourly.csv', index=False)
else:
    print("No data was fetched during the specified date range.")


   id station_id  temperature         station_name weather_date  latitude  \
0   0       S109         30.3  Ang Mo Kio Avenue 5   2024-10-28    1.3764   
1   1       S117         29.2          Banyan Road   2024-10-28    1.2560   
2   2        S50         29.8        Clementi Road   2024-10-28    1.3337   
3   3       S107         29.6   East Coast Parkway   2024-10-28    1.3135   
4   4        S43         29.7       Kim Chuan Road   2024-10-28    1.3399   

   longitude  
0   103.8492  
1   103.6790  
2   103.7768  
3   103.9625  
4   103.8878  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1314 entries, 0 to 1313
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   id            1314 non-null   int64         
 1   station_id    1314 non-null   object        
 2   temperature   1314 non-null   float64       
 3   station_name  1314 non-null   object        
 4   weather_date  1314 non-null   dateti

In [8]:
# Create connection engine

# Using username postgres, password admin, and EXISTING database testdb
engine = db.create_engine('postgresql://postgres:admin@localhost:5432/testdb') 

conn = engine.raw_connection()

# Create new tables in PostgreSQL

commands = (# TABLE 1: WEATHER
            '''
            DROP TABLE IF EXISTS weather;
            CREATE TABLE weather(id SERIAL PRIMARY KEY,
                                                station_id VARCHAR,
                                                temperature VARCHAR,
                                                weather_date TIMESTAMP,
                                                device_id VARCHAR,
                                                station_name VARCHAR,
                                                latitude VARCHAR,
                                                longitude VARCHAR
                                                );
            ''')
            
# Initialize connection to PostgreSQL
cur = conn.cursor()

# Create cursor to execute SQL commands

#for command in commands:
cur.execute(commands)

# Commit changes
conn.commit()

# Close communication with server
cur.close()
conn.close()

In [10]:
final_df.to_sql(name= 'weather', con = engine, if_exists= 'append', index= False) 

314