# Get rainfall hourly value and station location from 2024-03-30 to 2024-04-05





In [None]:
import requests
import pandas as pd

# API endpoint
api_endpoint = "https://api.data.gov.sg/v1/environment/rainfall"

# List of dates
dates_list = ["2024-03-30", "2024-03-31", "2024-04-01", "2024-04-02", "2024-04-03", "2024-04-04", "2024-04-05"]

for specified_date in dates_list:
    api_url = f"{api_endpoint}?date={specified_date}"
    response = requests.get(api_url, headers={'accept': 'application/json'})
    if response.status_code == 200:
        data = response.json()
        # Get the locations of rainfall stations
        stations_data = data['metadata']['stations']
        stations_df = pd.DataFrame(stations_data)
        stations_df['latitude'] = stations_df['location'].apply(lambda loc: loc['latitude'])
        stations_df['longitude'] = stations_df['location'].apply(lambda loc: loc['longitude'])
        stations_df.drop('location', axis=1, inplace=True)
        stations_df = stations_df[['id', 'device_id', 'name', 'latitude', 'longitude']]
        date_substr = specified_date[5:].replace("-", "")
        stations_df.to_csv(f'stations_data_{date_substr}.csv', index=False)
        # Get hourly rainfall value of each station
        flattened_data = [
            {'timestamp': item['timestamp'], 'station_id': reading['station_id'], 'value': reading['value']}
            for item in data['items']
            for reading in item['readings']
        ]
        df = pd.DataFrame(flattened_data)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
        hourly_df = df.groupby([df.index.floor('H'), 'station_id']).sum()
        hourly_df.reset_index(inplace=True)
        hourly_df['datehour'] = hourly_df['timestamp'].dt.strftime('%Y-%m-%d %H')
        final_hourly_df = hourly_df[['datehour', 'station_id', 'value']]
        final_hourly_df.to_csv(f'rainfall_hourly_value_{date_substr}.csv', index=False)
    else:
        print(f"Error: {response.status_code}, {response.text}")

In [None]:
# Get all the rainfall hourly data from 0330 to 0405
file_names = [f"rainfall_hourly_value_{'03{:02d}'.format(i)}.csv" for i in range(30, 32)]
file_names += [f"rainfall_hourly_value_{'04{:02d}'.format(i)}.csv" for i in range(1, 6)]

rainfall_hourly_value_0330to0405 = pd.DataFrame()

for file_name in file_names:
    temp_df = pd.read_csv(file_name)
    rainfall_hourly_value_0330to0405 = pd.concat([rainfall_hourly_value_0330to0405, temp_df], ignore_index=True)

rainfall_hourly_value_0330to0405.drop_duplicates(inplace=True)
rainfall_hourly_value_0330to0405.to_csv('0330to0405_rainfall_hourly_value.csv', index=False)

print(rainfall_hourly_value_0330to0405)

            datehour station_id  value
0      2024-03-30 00        S07    0.0
1      2024-03-30 00        S08    0.0
2      2024-03-30 00       S104    0.0
3      2024-03-30 00       S107    0.0
4      2024-03-30 00       S109    0.0
...              ...        ...    ...
10513  2024-04-05 23        S88    0.0
10514  2024-04-05 23        S89    0.0
10515  2024-04-05 23        S90    0.0
10516  2024-04-05 23       S900    0.0
10517  2024-04-05 23        S94    0.0

[10518 rows x 3 columns]


In [None]:
# Get all the rainfall station info
file_names = [f"stations_data_033{i}.csv" for i in range(0, 2)] + [f"stations_data_040{i}.csv" for i in range(1, 6)]

station_data = pd.DataFrame()

for file_name in file_names:
    temp_df = pd.read_csv(file_name)
    station_data = pd.concat([station_data, temp_df], ignore_index=True)

station_data = station_data.drop_duplicates().reset_index(drop=True)
station_data.to_csv('Rainfall_Station_Data.csv', index=False)

print(station_data)

# Check if 'id' column has duplicates
duplicates = station_data['id'].duplicated().any()

if duplicates:
    print("\nThere are duplicates in the 'id' column.")
else:
    print("\nNo duplicates found in the 'id' column.")

      id device_id                         name  latitude  longitude
0    S77       S77               Alexandra Road   1.29370  103.81250
1   S109      S109          Ang Mo Kio Avenue 5   1.37640  103.84920
2   S117      S117                  Banyan Road   1.25600  103.67900
3    S64       S64           Bukit Panjang Road   1.38240  103.76030
4    S90       S90             Bukit Timah Road   1.31910  103.81910
..   ...       ...                          ...       ...        ...
58   S69       S69  Upper Peirce Reservoir Park   1.37000  103.80500
59   S08       S08           Upper Thomson Road   1.37010  103.82710
60  S116      S116           West Coast Highway   1.28100  103.75400
61  S104      S104           Woodlands Avenue 9   1.44387  103.78538
62   S60       S60                      Sentosa   1.25000  103.82790

[63 rows x 5 columns]

No duplicates found in the 'id' column.
