# Read Me
* This file contains two sets of weather data for Singapore from January to June 2025. 
* The column names are described here: https://dev.meteostat.net/formats.html#meteorological-parameters
* External data "Cloud coverage (%)" and "solar irradiance (W/m²·h)" have been integrated into the weather data.
* You can find the two datasets in the Output Floder named weather_data.

1. Changi Station Weather Data (weather_changi.csv)

* This dataset contains weather observations from the Changi meteorological station, which is the official and most representative station for Singapore’s overall weather.

* Changi Station data is straightforward and widely used as the standard reference due to its long-term reliability and comprehensive coverage. Use this dataset for general analysis where a single, authoritative source is sufficient to represent Singapore’s climate.

2. Five-Region Weather Data (weather_region.csv)

* This dataset includes weather data from five representative stations across Singapore’s main regions: East (Changi), West (Jurong), South (Harbourfront/Marina South), North (Sembawang), and Central (Marina Centre).

* It captures local variations and microclimates across different parts of the city. Use this dataset when regional weather differences are important, or when a more detailed spatial analysis of Singapore’s climate is needed.

* The Region column marks the source area of each data record.

## Changi Station Weather Data

In [6]:
# Cloud Coverage and Solar Irradiance
import pandas as pd
import requests
from datetime import datetime, timedelta

# Singapore Changi Station
latitude = 1.3521 
longitude = 103.8198

# Timeframe
start_date = "2025-01-01"
end_date = "2025-06-30"

# Open-Meteo API URL
#   - hourly=cloudcover,shortwave_radiation
#   - timezone=Asia/Singapore
#   - start_date, end_date
api_url = (
    f"https://archive-api.open-meteo.com/v1/archive?"
    f"latitude={latitude}&longitude={longitude}"
    f"&start_date={start_date}&end_date={end_date}"
    f"&hourly=cloudcover,shortwave_radiation"
    f"&timezone=Asia/Singapore"
)

# Send requist
response = requests.get(api_url)

# Check requist status
if response.status_code == 200:
    data = response.json()
    
    # Extracting time and metrics data
    times = data['hourly']['time']  # Time
    cloudcover = data['hourly']['cloudcover']  # cloudcover data %
    solar_radiation = data['hourly']['shortwave_radiation']  # shortwave_radiation W/m²·h

    df_cloud_rad = pd.DataFrame({
        'time': pd.to_datetime(times),
        'cloudcover (%)': cloudcover,
        'shortwave_radiation (W/m²·h)': solar_radiation
    })
    
    # Resample to 30 minutes, linear interpolation
    df_cloud_rad['time'] = pd.to_datetime(df_cloud_rad['time'])
    df_cloud_rad.set_index('time', inplace=True)
    time_30min = pd.date_range(start=df_cloud_rad.index.min(), end=df_cloud_rad.index.max(), freq='30min')
    df_cr_30min = df_cloud_rad.reindex(time_30min)
    df_cr_30min_interpolated = df_cr_30min.interpolate(method='time')
    
    # Check data
    print(df_cr_30min_interpolated.head())

else:
    print("Failed：", response.status_code)

                     cloudcover (%)  shortwave_radiation (W/m²·h)
2025-01-01 00:00:00           100.0                           0.0
2025-01-01 00:30:00           100.0                           0.0
2025-01-01 01:00:00           100.0                           0.0
2025-01-01 01:30:00           100.0                           0.0
2025-01-01 02:00:00           100.0                           0.0


In [7]:
# Regular Weather data from Meteostat
# pip install meteostat
from meteostat import Stations, Hourly, Point
import matplotlib.pyplot as plt

In [9]:
# Select Changi as the most representative weather station for Singapore
lat, lon = 1.3521, 103.8198
stations = Stations().nearby(lat, lon)
changi_station = stations.fetch(1)            # Find the nearest station
changi_station_id = changi_station.index[0]          # meteostat Changi station ID

# Download hourly data（2025-01-01 ~ 2025-06-30）
start = datetime(2024,12,31,16,00) # In order to get the data of 2025-1-1 12am, it is necessary to start from 2024-12-31 4pm
end   = datetime(2025,6,30,23,59)
changi_data = Hourly(changi_station_id, start, end)
changi_df_hour = changi_data.fetch()                 # DataFrame，index is UTC

# Turn UTC to（SGT = Asia/Singapore）
if changi_df_hour.index.tz is None:
    changi_df_hour = changi_df_hour.tz_localize('UTC').tz_convert('Asia/Singapore')
else:
    changi_df_hour = changi_df_hour.tz_convert('Asia/Singapore')

# Remove time zone information
changi_df_hour.index = changi_df_hour.index.tz_localize(None)

# Resample to 30 minutes, linear interpolation
changi_df_30 = changi_df_hour.resample('30min').interpolate(method='time')

# Add cloudcover and hortwave_radiation columns
changi_df_30['cloudcover (%)'] = df_cr_30min_interpolated['cloudcover (%)']
changi_df_30['shortwave_radiation (W/m²·h)'] = df_cr_30min_interpolated['shortwave_radiation (W/m²·h)']

In [10]:
print(changi_df_30.head())

                     temp  dwpt  rhum  prcp  snow  wdir  wspd  wpgt    pres  \
time                                                                          
2025-01-01 00:00:00  26.0  24.0  89.0   0.0  <NA>  20.0  11.2  <NA>  1011.0   
2025-01-01 00:30:00  26.0  24.0  89.0   0.0  <NA>  25.0  11.2  <NA>  1011.0   
2025-01-01 01:00:00  26.0  24.0  89.0   0.0  <NA>  30.0  11.2  <NA>  1011.0   
2025-01-01 01:30:00  26.0  24.0  89.0   0.0  <NA>  20.0   9.4  <NA>  1010.5   
2025-01-01 02:00:00  26.0  24.0  89.0   0.0  <NA>  10.0   7.6  <NA>  1010.0   

                     tsun  coco  cloudcover (%)  shortwave_radiation (W/m²·h)  
time                                                                           
2025-01-01 00:00:00  <NA>   3.0           100.0                           0.0  
2025-01-01 00:30:00  <NA>   3.0           100.0                           0.0  
2025-01-01 01:00:00  <NA>   3.0           100.0                           0.0  
2025-01-01 01:30:00  <NA>   3.0           100.

In [11]:
# Modify the time format to align with USEP timestamps
changi_df_period = changi_df_30.reset_index()

changi_df_period['DATE'] = changi_df_period['time'].dt.strftime('%Y/%m/%d')

# Add PERIOD column，range from 1 to 48 (30 min/period for 24 hours)
changi_df_period['PERIOD'] = changi_df_period['time'].dt.hour * 2 + changi_df_period['time'].dt.minute // 30 + 1

cols = ['DATE', 'PERIOD'] + [col for col in changi_df_period.columns if col not in ['time', 'DATE', 'PERIOD']]
changi_df_period = changi_df_period[cols]

print(changi_df_period.head())

         DATE  PERIOD  temp  dwpt  rhum  prcp  snow  wdir  wspd  wpgt    pres  \
0  2025/01/01       1  26.0  24.0  89.0   0.0  <NA>  20.0  11.2  <NA>  1011.0   
1  2025/01/01       2  26.0  24.0  89.0   0.0  <NA>  25.0  11.2  <NA>  1011.0   
2  2025/01/01       3  26.0  24.0  89.0   0.0  <NA>  30.0  11.2  <NA>  1011.0   
3  2025/01/01       4  26.0  24.0  89.0   0.0  <NA>  20.0   9.4  <NA>  1010.5   
4  2025/01/01       5  26.0  24.0  89.0   0.0  <NA>  10.0   7.6  <NA>  1010.0   

   tsun  coco  cloudcover (%)  shortwave_radiation (W/m²·h)  
0  <NA>   3.0           100.0                           0.0  
1  <NA>   3.0           100.0                           0.0  
2  <NA>   3.0           100.0                           0.0  
3  <NA>   3.0           100.0                           0.0  
4  <NA>   3.0           100.0                           0.0  


In [12]:
# Export the weather data of Changi Station to csv
changi_df_period.to_csv('weather_changi.csv', index=False, encoding='utf-8')

## Five-Region Weather Data

In [40]:
# Locating representative weather stations
region_coords = {
    'East': (1.3521, 103.8198),          # East: Changi
    'West': (1.3327, 103.7432),          # West: Jurong
    'South': (1.2734, 103.8198),         # South: Harbourfront / Marina South
    'North': (1.4496, 103.8205),         # North: Sembawang
    'Central': (1.2790, 103.8545)        # Central: Marina Centre
}

# Cloud Coverage and Solar Irradiance
# Timeframe
start_date = "2025-01-01"
end_date = "2025-06-30"

all_data_cr5 = []

for region, (lat, lon) in region_coords.items():

    # Open-Meteo API URL
    #   - hourly=cloudcover,shortwave_radiation
    #   - timezone=Asia/Singapore
    #   - start_date, end_date
    api_url = (
        f"https://archive-api.open-meteo.com/v1/archive?"
        f"latitude={lat}&longitude={lon}"
        f"&start_date={start_date}&end_date={end_date}"
        f"&hourly=cloudcover,shortwave_radiation"
        f"&timezone=Asia/Singapore"
    )
    
    response = requests.get(api_url)
    
    if response.status_code == 200:
        data = response.json()
        times = data['hourly']['time']
        cloudcover = data['hourly']['cloudcover']
        solar_radiation = data['hourly']['shortwave_radiation']
        
        df_cr5 = pd.DataFrame({
            'time': pd.to_datetime(times),
            'cloudcover (%)': cloudcover,
            'shortwave_radiation (W/m²·h)': solar_radiation
        })

        # Resample to 30 minutes, linear interpolation
        df_cr5['time'] = pd.to_datetime(df_cr5['time'])
        df_cr5.set_index('time', inplace=True)
        time_30 = pd.date_range(start=df_cr5.index.min(), end=df_cr5.index.max(), freq='30min')
        df_cr5_30 = df_cr5.reindex(time_30)
        df_cr5_30_interpolated = df_cr5_30.interpolate(method='time')
        
        # Add new column to lable "region"
        df_cr5_30_interpolated['Region'] = region
        all_data_cr5.append(df_cr5_30_interpolated)

    else:
        print(f"{region} Failed：{response.status_code}")

# Merge all regions data
df_all_cr5 = pd.concat(all_data_cr5)
df_all_cr5 = df_all_cr5.reset_index()
print(df_all_cr5.head())

                index  cloudcover (%)  shortwave_radiation (W/m²·h) Region
0 2025-01-01 00:00:00           100.0                           0.0   East
1 2025-01-01 00:30:00           100.0                           0.0   East
2 2025-01-01 01:00:00           100.0                           0.0   East
3 2025-01-01 01:30:00           100.0                           0.0   East
4 2025-01-01 02:00:00           100.0                           0.0   East


In [59]:
# Regular Weather data from Meteostat

# Time Period
start = datetime(2024,12,31,16,00) # In order to get the data of 2025-1-1 12am, it is necessary to start from 2024-12-31 4pm
end = datetime(2025, 6, 30, 23, 59)

all_data = []

for region, (lat, lon) in region_coords.items():
    # Find the nearest weather station ID for each region
    stations = Stations().nearby(lat, lon)
    station = stations.fetch(1)
    station_id = station.index[0]

    # Download hourly data
    data = Hourly(station_id, start, end)
    df_hour = data.fetch()

    # Convert time zone from UTC to Singapore time
    if df_hour.index.tz is None:
        df_hour = df_hour.tz_localize('UTC').tz_convert('Asia/Singapore')
    else:
        df_hour = df_hour.tz_convert('Asia/Singapore')

    # Remove time zone information
    df_hour.index = df_hour.index.tz_localize(None)

    # Resample to 30 minutes, linear interpolation
    df_30 = df_hour.resample('30min').interpolate(method='time')

    # Add new column to lable "region"
    df_30['Region'] = region

    all_data.append(df_30)

# Merge all regions data
df_all = pd.concat(all_data)
df_all_period = df_all.reset_index()

In [60]:
print(df_all_period.head())

                 time  temp  dwpt  rhum  prcp  snow  wdir  wspd  wpgt    pres  \
0 2025-01-01 00:00:00  26.0  24.0  89.0   0.0  <NA>  20.0  11.2  <NA>  1011.0   
1 2025-01-01 00:30:00  26.0  24.0  89.0   0.0  <NA>  25.0  11.2  <NA>  1011.0   
2 2025-01-01 01:00:00  26.0  24.0  89.0   0.0  <NA>  30.0  11.2  <NA>  1011.0   
3 2025-01-01 01:30:00  26.0  24.0  89.0   0.0  <NA>  20.0   9.4  <NA>  1010.5   
4 2025-01-01 02:00:00  26.0  24.0  89.0   0.0  <NA>  10.0   7.6  <NA>  1010.0   

   tsun  coco Region  
0  <NA>   3.0   East  
1  <NA>   3.0   East  
2  <NA>   3.0   East  
3  <NA>   3.0   East  
4  <NA>   3.0   East  


In [61]:
# Add cloudcover and hortwave_radiation columns
cols_to_add = df_all_cr5[['cloudcover (%)', 'shortwave_radiation (W/m²·h)']]

pos_colA1 = len(df_all_period.columns) - 1
pos_colA2 = len(df_all_period.columns)

for i, col in enumerate(cols_to_add.columns):
    insert_pos = pos_colA1 if i == 0 else pos_colA2
    df_all_period.insert(insert_pos, col, cols_to_add[col])

print(df_all_period.head())

                 time  temp  dwpt  rhum  prcp  snow  wdir  wspd  wpgt    pres  \
0 2025-01-01 00:00:00  26.0  24.0  89.0   0.0  <NA>  20.0  11.2  <NA>  1011.0   
1 2025-01-01 00:30:00  26.0  24.0  89.0   0.0  <NA>  25.0  11.2  <NA>  1011.0   
2 2025-01-01 01:00:00  26.0  24.0  89.0   0.0  <NA>  30.0  11.2  <NA>  1011.0   
3 2025-01-01 01:30:00  26.0  24.0  89.0   0.0  <NA>  20.0   9.4  <NA>  1010.5   
4 2025-01-01 02:00:00  26.0  24.0  89.0   0.0  <NA>  10.0   7.6  <NA>  1010.0   

   tsun  coco  cloudcover (%)  shortwave_radiation (W/m²·h) Region  
0  <NA>   3.0           100.0                           0.0   East  
1  <NA>   3.0           100.0                           0.0   East  
2  <NA>   3.0           100.0                           0.0   East  
3  <NA>   3.0           100.0                           0.0   East  
4  <NA>   3.0           100.0                           0.0   East  


In [62]:
# Modify the time format to align with USEP timestamps
df_all_period['DATE'] = df_all_period['time'].dt.strftime('%Y/%m/%d')

# Add PERIOD column，range from 1 to 48 (30 min/period for 24 hours)
df_all_period['PERIOD'] = df_all_period['time'].dt.hour * 2 + df_all_period['time'].dt.minute // 30 + 1

cols = ['DATE', 'PERIOD'] + [col for col in df_all_period.columns if col not in ['time', 'DATE', 'PERIOD']]
df_all_period = df_all_period[cols]

print(df_all_period.head())

         DATE  PERIOD  temp  dwpt  rhum  prcp  snow  wdir  wspd  wpgt    pres  \
0  2025/01/01       1  26.0  24.0  89.0   0.0  <NA>  20.0  11.2  <NA>  1011.0   
1  2025/01/01       2  26.0  24.0  89.0   0.0  <NA>  25.0  11.2  <NA>  1011.0   
2  2025/01/01       3  26.0  24.0  89.0   0.0  <NA>  30.0  11.2  <NA>  1011.0   
3  2025/01/01       4  26.0  24.0  89.0   0.0  <NA>  20.0   9.4  <NA>  1010.5   
4  2025/01/01       5  26.0  24.0  89.0   0.0  <NA>  10.0   7.6  <NA>  1010.0   

   tsun  coco  cloudcover (%)  shortwave_radiation (W/m²·h) Region  
0  <NA>   3.0           100.0                           0.0   East  
1  <NA>   3.0           100.0                           0.0   East  
2  <NA>   3.0           100.0                           0.0   East  
3  <NA>   3.0           100.0                           0.0   East  
4  <NA>   3.0           100.0                           0.0   East  


In [63]:
# Export the weather data to csv
df_all_period.to_csv('weather_region.csv', index=False, encoding='utf-8')