# Python script to extract data from data.gov.sg web API

* Extracts the relative humidity readings across Singapore between a given time period and loads it into a data frame using Python
* Exports the data to CSV files
* URL where the data is extracted from: https://data.gov.sg/dataset/realtime-weather-readings?resource_id=59eb2883-2ceb-4d16-85f0-7e3a3176ef46



In [1]:
import requests
import json
import pandas as pd
import numpy as np
import datetime as date
import os


Asks the user to input the start and end dates of the time period required in the format 'yyyy-mm-dd':

In [2]:
while True:
    start_date = input("Enter a start date in the format 'yyyy-mm-dd':")
    if len(start_date) != 10:
        continue
    elif int(start_date[:4]) in range(2024) and int(start_date[5:7]) in range(13) and int(start_date[8:]) in range(32):
        break
    else:
        continue

while True:
    end_date = input("Enter a end date in the format 'yyyy-mm-dd':")
    if len(end_date) != 10:
        continue
    elif not(int(end_date[:4]) in range(2024) and int(end_date[5:7]) in range(13) and int(end_date[8:]) in range(32)):
        continue
    elif date.datetime.strptime(start_date, '%Y-%m-%d').date() <= date.datetime.strptime(end_date, '%Y-%m-%d').date():
        break
    else:
        continue

Make a request and extract data from the web API:

In [3]:
#Create a list of dates from the input time period
datelist = pd.period_range(start=start_date, end=end_date)

#dictionary for readings
readings = {"timestamp": [], "station_id": [], "value": []}
#dictionary for stations
stations = {"id": [], "device_id": [], "name": [], "latitude": [], "longitude": []}

for date in datelist:
    #pull data from api for specified time period
    response_API = requests.get("https://api.data.gov.sg/v1/environment/relative-humidity?date=" + str(date))
    data = response_API.text
    parse_json = json.loads(data)

    #Add readings to dictionary
    for i in parse_json["items"]:
        for j in i["readings"]:
            readings["timestamp"].append(i["timestamp"])
            readings["station_id"].append(j["station_id"])
            readings["value"].append(j["value"])

    #Add stations to dictionary
    for x in parse_json["metadata"]["stations"]:
        stations["id"].append(x["id"])
        stations["device_id"].append(x["device_id"])
        stations["name"].append(x["name"])
        stations["latitude"].append(x["location"]["latitude"])
        stations["longitude"].append(x["location"]["longitude"])


Stores the data into data frames using pandas:

In [4]:
#create a dataframe from the dictionary
readings_df = pd.DataFrame(readings)
readings_df.index = np.arange(1, len(readings_df) + 1)
#create a dataframe from the dictionary
stations_df = pd.DataFrame(stations)
stations_df.index = np.arange(1, len(stations_df) + 1)

View the data frames:

In [5]:
readings_df.head(10)

Unnamed: 0,timestamp,station_id,value
1,2022-12-01T00:01:00+08:00,S109,91.3
2,2022-12-01T00:01:00+08:00,S50,97.3
3,2022-12-01T00:01:00+08:00,S107,87.0
4,2022-12-01T00:01:00+08:00,S43,88.6
5,2022-12-01T00:01:00+08:00,S108,99.4
6,2022-12-01T00:01:00+08:00,S44,92.9
7,2022-12-01T00:01:00+08:00,S121,98.6
8,2022-12-01T00:01:00+08:00,S111,87.9
9,2022-12-01T00:01:00+08:00,S102,84.3
10,2022-12-01T00:01:00+08:00,S60,86.9


In [6]:
stations_df.head(10)

Unnamed: 0,id,device_id,name,latitude,longitude
1,S109,S109,Ang Mo Kio Avenue 5,1.3764,103.8492
2,S50,S50,Clementi Road,1.3337,103.7768
3,S107,S107,East Coast Parkway,1.3135,103.9625
4,S43,S43,Kim Chuan Road,1.3399,103.8878
5,S108,S108,Marina Gardens Drive,1.2799,103.8703
6,S44,S44,Nanyang Avenue,1.34583,103.68166
7,S121,S121,Old Choa Chu Kang Road,1.37288,103.72244
8,S111,S111,Scotts Road,1.31055,103.8365
9,S102,S102,Semakau Landfill,1.189,103.768
10,S60,S60,Sentosa,1.25,103.8279


Export the dataframes to CSV files:

In [None]:
readings_df.to_csv(os.getcwd() + '\\readings_df' + f'_{start_date}.csv', index=True, header=True)
stations_df.to_csv(os.getcwd() + '\\stations_df' + f'_{start_date}.csv', index=True, header=True)
print(f"Export completed! Files are in the path '{os.getcwd()}'.")