Import ONI data

In [23]:
import pandas as pd
import os

oni_data = pd.read_csv("lawas/oni.txt", sep="\s+")
oni_data

Unnamed: 0,YR,MON,TOTAL,ClimAdjust,ANOM
0,1950,1,24.56,26.18,-1.62
1,1950,2,25.07,26.39,-1.32
2,1950,3,25.88,26.95,-1.07
3,1950,4,26.29,27.39,-1.11
4,1950,5,26.19,27.56,-1.37
...,...,...,...,...,...
893,2024,6,27.91,27.73,0.18
894,2024,7,27.34,27.29,0.05
895,2024,8,26.74,26.86,-0.11
896,2024,9,26.47,26.72,-0.25


Import DID data

In [24]:
import pandas as pd
import os
import re

precipitation_data = pd.DataFrame()
for file in os.listdir("lawas"):
    if re.search(r"rainfall-data\d+\.csv", file):
        df = pd.read_csv(f"lawas/{file}")
        precipitation_data = pd.concat([precipitation_data, df], ignore_index=True)

In [25]:
precipitation_data["DateTime"] = pd.to_datetime(precipitation_data["DateTime"],format="%Y%m%d %H%M")
precipitation_data = precipitation_data[["DateTime", "Rainfall (mm)"]]
precipitation_data

Unnamed: 0,DateTime,Rainfall (mm)
0,1998-07-13 12:00:00,1.0
1,1998-07-13 13:00:00,2.0
2,1998-07-13 14:00:00,0.0
3,1998-07-13 15:00:00,0.0
4,1998-07-13 16:00:00,0.0
...,...,...
200096,2022-04-07 20:00:00,1.0
200097,2022-04-07 21:00:00,0.0
200098,2022-04-07 22:00:00,0.0
200099,2022-04-07 23:00:00,0.0


Merge DID and ONI data

In [26]:
precipitation_data = pd.merge(
    precipitation_data,
    oni_data[["YR", "MON", "ANOM"]],
    left_on=[precipitation_data["DateTime"].dt.year, precipitation_data["DateTime"].dt.month],
    right_on= ["YR", "MON"]
).drop(["YR", "MON"], axis=1)
precipitation_data.set_index("DateTime",inplace=True)
precipitation_data


Unnamed: 0_level_0,Rainfall (mm),ANOM
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1
1998-07-13 12:00:00,1.0,-0.86
1998-07-13 13:00:00,2.0,-0.86
1998-07-13 14:00:00,0.0,-0.86
1998-07-13 15:00:00,0.0,-0.86
1998-07-13 16:00:00,0.0,-0.86
...,...,...
2022-04-07 20:00:00,1.0,-1.11
2022-04-07 21:00:00,0.0,-1.11
2022-04-07 22:00:00,0.0,-1.11
2022-04-07 23:00:00,0.0,-1.11


Import Wunderground data

In [27]:
feature_data = pd.read_csv("lawas/rainfall-feature-wunderground.csv")
feature_data.rename(columns={"Time":"DateTime"},inplace=True)
feature_data["DateTime"] = pd.to_datetime(feature_data["DateTime"], format="%Y-%m-%d %H:%M:%S%z")
feature_data["DateTime"] = feature_data["DateTime"].dt.tz_localize(None)
feature_data.set_index("DateTime",inplace=True)
feature_data.drop(columns={"Wind Gust", "Precip."}, inplace=True)
feature_data.fillna({"Wind Speed":0}, inplace=True)
feature_data.ffill(inplace=True)
feature_data

Unnamed: 0_level_0,Temperature,Dew Point,Humidity,Wind,Wind Speed,Pressure,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2002-08-01 00:00:00,79.0,75.0,89.0,E,3.0,29.69,Mostly Cloudy
2002-08-01 01:00:00,79.0,73.0,83.0,E,2.0,29.69,Mostly Cloudy
2002-08-01 02:00:00,77.0,75.0,94.0,W,2.0,29.66,Mostly Cloudy
2002-08-01 03:00:00,77.0,73.0,89.0,CALM,0.0,29.66,Mostly Cloudy
2002-08-01 04:00:00,79.0,73.0,83.0,NE,1.0,29.63,Mostly Cloudy
...,...,...,...,...,...,...,...
2022-04-07 19:00:00,79.0,75.0,89.0,W,3.0,29.66,Fair
2022-04-07 20:00:00,79.0,77.0,94.0,VAR,1.0,29.69,Fair
2022-04-07 21:00:00,79.0,77.0,94.0,VAR,2.0,29.72,Fair
2022-04-07 22:00:00,77.0,77.0,100.0,CALM,0.0,29.72,Rain


Combine all data

In [28]:
lawas_rainfall = precipitation_data.join(feature_data)
lawas_rainfall.dropna(inplace=True)

Export as .csv file

In [29]:
lawas_rainfall.to_csv("lawas/lawas-rainfall.csv",index=True)