Topographic data

In [None]:
topo_loc = {
    "lawas":
    {
        "lat": 4.847301,
        "lon": 115.406703,
        "elev": 6.5,
    },
    "mulu":
    {
        "lat": 4.049213,
        "lon": 114.810996,
        "elev": 28.7,
    },
    "kuching":
    {
        "lat": 1.487123,
        "lon": 110.341599,
        "elev": 22.3,
    },
    "kota-kinabalu":
    {
        "lat": 5.923283,
        "lon": 116.051239,
        "elev": 5.4,
    },
    "kuantan":
    {
        "lat": 3.780726,
        "lon": 103.215062,
        "elev": 16,
    }
}

Import ONI data

In [1]:
import pandas as pd
import os

oni_data = pd.read_csv("oni.txt", sep="\s+")
oni_data

Unnamed: 0,YR,MON,TOTAL,ClimAdjust,ANOM
0,1950,1,24.56,26.18,-1.62
1,1950,2,25.07,26.39,-1.32
2,1950,3,25.88,26.95,-1.07
3,1950,4,26.29,27.39,-1.11
4,1950,5,26.19,27.56,-1.37
...,...,...,...,...,...
893,2024,6,27.91,27.73,0.18
894,2024,7,27.34,27.29,0.05
895,2024,8,26.74,26.86,-0.11
896,2024,9,26.47,26.72,-0.25


Import DID/MET data

In [2]:
import pandas as pd
import os
import re
folder_path = "mulu"
precipitation_data = pd.DataFrame()
for file in os.listdir(folder_path):
    if re.search(r"rainfall-data\d+\.csv", file):
        df = pd.read_csv(f"{folder_path}/{file}")
        precipitation_data = pd.concat([precipitation_data, df], ignore_index=True)

In [3]:
precipitation_data["DateTime"] = pd.to_datetime(precipitation_data["DateTime"],format="%Y%m%d %H%M")
precipitation_data = precipitation_data[["DateTime", "Rainfall (mm)"]]
precipitation_data.rename(columns={"Rainfall (mm)": "Rainfall"}, inplace=True)
precipitation_data

Unnamed: 0,DateTime,Rainfall
0,2020-01-01 00:00:00,0.0
1,2020-01-01 01:00:00,0.0
2,2020-01-01 02:00:00,0.0
3,2020-01-01 03:00:00,0.0
4,2020-01-01 04:00:00,0.0
...,...,...
22274,2022-07-17 07:00:00,0.0
22275,2022-07-17 08:00:00,0.0
22276,2022-07-17 09:00:00,0.0
22277,2022-07-17 10:00:00,0.0


Merge DID and ONI data

In [4]:
precipitation_data = pd.merge(
    precipitation_data,
    oni_data[["YR", "MON", "TOTAL", "ClimAdjust", "ANOM"]],
    left_on=[precipitation_data["DateTime"].dt.year, precipitation_data["DateTime"].dt.month],
    right_on= ["YR", "MON"]
).drop(["YR", "MON"], axis=1)
precipitation_data.set_index("DateTime",inplace=True)
precipitation_data


Unnamed: 0_level_0,Rainfall,TOTAL,ClimAdjust,ANOM
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01 00:00:00,0.0,27.16,26.55,0.6
2020-01-01 01:00:00,0.0,27.16,26.55,0.6
2020-01-01 02:00:00,0.0,27.16,26.55,0.6
2020-01-01 03:00:00,0.0,27.16,26.55,0.6
2020-01-01 04:00:00,0.0,27.16,26.55,0.6
...,...,...,...,...
2022-07-17 07:00:00,0.0,26.60,27.29,-0.7
2022-07-17 08:00:00,0.0,26.60,27.29,-0.7
2022-07-17 09:00:00,0.0,26.60,27.29,-0.7
2022-07-17 10:00:00,0.0,26.60,27.29,-0.7


Import Wunderground data

In [5]:
feature_data = pd.read_csv(f"{folder_path}/rainfall-feature-wunderground.csv")
feature_data.rename(columns={"Time":"DateTime"},inplace=True)
feature_data["DateTime"] = pd.to_datetime(feature_data["DateTime"], format="%Y-%m-%d %H:%M:%S%z")
feature_data["DateTime"] = feature_data["DateTime"].dt.tz_localize(None)
feature_data.set_index("DateTime",inplace=True)
feature_data.drop(columns={"Wind Gust", "Precip."}, inplace=True)
feature_data.fillna({"Wind Speed":0}, inplace=True)
feature_data.ffill(inplace=True)
feature_data.rename(columns={"Dew Point": "DewPoint", "Wind Speed": "WindSpeed"}, inplace=True)
feature_data

Unnamed: 0_level_0,Temperature,DewPoint,Humidity,Wind,WindSpeed,Pressure,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-01 00:00:00,79.0,77.0,94.0,WSW,2.0,29.69,Mostly Cloudy
2010-01-01 01:00:00,77.0,77.0,100.0,NNW,3.0,29.69,Mostly Cloudy
2010-01-01 02:00:00,77.0,75.0,94.0,NNW,3.0,29.69,Mostly Cloudy
2010-01-01 03:00:00,77.0,75.0,94.0,WNW,1.0,29.66,Mostly Cloudy
2010-01-01 04:00:00,77.0,75.0,94.0,SW,2.0,29.66,Mostly Cloudy
...,...,...,...,...,...,...,...
2022-07-17 19:00:00,81.0,79.0,94.0,WSW,3.0,29.73,Partly Cloudy
2022-07-17 20:00:00,81.0,77.0,89.0,VAR,2.0,29.73,Partly Cloudy
2022-07-17 21:00:00,79.0,77.0,94.0,W,3.0,29.73,Partly Cloudy
2022-07-17 22:00:00,79.0,77.0,94.0,W,2.0,29.73,Fair


Combine all data

In [6]:
lawas_rainfall = precipitation_data.join(feature_data)
lawas_rainfall.dropna(inplace=True)
lawas_rainfall

Unnamed: 0_level_0,Rainfall,TOTAL,ClimAdjust,ANOM,Temperature,DewPoint,Humidity,Wind,WindSpeed,Pressure,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01 00:00:00,0.0,27.16,26.55,0.6,79.0,77.0,94.0,NE,3.0,29.78,Mostly Cloudy
2020-01-01 01:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,VAR,2.0,29.78,Mostly Cloudy
2020-01-01 02:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,VAR,2.0,29.75,Mostly Cloudy
2020-01-01 03:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,VAR,1.0,29.75,Mostly Cloudy
2020-01-01 04:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,VAR,2.0,29.72,Mostly Cloudy
...,...,...,...,...,...,...,...,...,...,...,...
2022-07-17 07:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,ESE,2.0,29.79,Fair
2022-07-17 08:00:00,0.0,26.60,27.29,-0.7,81.0,77.0,89.0,SSW,5.0,29.79,Partly Cloudy
2022-07-17 09:00:00,0.0,26.60,27.29,-0.7,75.0,73.0,94.0,SW,12.0,29.82,Partly Cloudy
2022-07-17 10:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,SSW,3.0,29.82,Light Rain


Check for duplicate data

In [7]:
duplicates = lawas_rainfall.index[lawas_rainfall.index.duplicated()]
if not duplicates.empty:
    print("Duplicate timestamps found:", duplicates)

In [8]:
numeric_cols = lawas_rainfall.select_dtypes(include=['number']).columns
non_numeric_cols = lawas_rainfall.select_dtypes(exclude=['number']).columns

# Handle duplicate index by grouping
# For numeric columns, take the mean
# For non-numeric columns, take the first occurrence (or other appropriate method)
lawas_rainfall = (
    lawas_rainfall.groupby(lawas_rainfall.index)
    .agg({**{col: 'mean' for col in numeric_cols},
          **{col: 'first' for col in non_numeric_cols}})
)
lawas_rainfall

Unnamed: 0_level_0,Rainfall,TOTAL,ClimAdjust,ANOM,Temperature,DewPoint,Humidity,WindSpeed,Pressure,Wind,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01 00:00:00,0.0,27.16,26.55,0.6,79.0,77.0,94.0,3.0,29.78,NE,Mostly Cloudy
2020-01-01 01:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.78,VAR,Mostly Cloudy
2020-01-01 02:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.75,VAR,Mostly Cloudy
2020-01-01 03:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,1.0,29.75,VAR,Mostly Cloudy
2020-01-01 04:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.72,VAR,Mostly Cloudy
...,...,...,...,...,...,...,...,...,...,...,...
2022-07-17 07:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,2.0,29.79,ESE,Fair
2022-07-17 08:00:00,0.0,26.60,27.29,-0.7,81.0,77.0,89.0,5.0,29.79,SSW,Partly Cloudy
2022-07-17 09:00:00,0.0,26.60,27.29,-0.7,75.0,73.0,94.0,12.0,29.82,SW,Partly Cloudy
2022-07-17 10:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,3.0,29.82,SSW,Light Rain


Find the data losses (Training Input)

In [9]:
datetime_range = pd.date_range(start=lawas_rainfall.index.min(), end=lawas_rainfall.index.max(), freq='1h')

missing_date = datetime_range.difference(lawas_rainfall.index)
missing_date

DatetimeIndex(['2020-01-03 09:00:00', '2020-06-06 00:00:00',
               '2020-06-06 01:00:00', '2020-06-06 02:00:00',
               '2020-06-06 03:00:00', '2020-06-06 04:00:00',
               '2020-06-06 05:00:00', '2020-06-06 06:00:00',
               '2020-06-06 07:00:00', '2020-06-06 08:00:00',
               ...
               '2021-11-19 01:00:00', '2022-01-15 14:00:00',
               '2022-01-16 01:00:00', '2022-02-01 08:00:00',
               '2022-02-01 09:00:00', '2022-02-01 10:00:00',
               '2022-02-14 01:00:00', '2022-03-04 12:00:00',
               '2022-05-18 02:00:00', '2022-05-18 03:00:00'],
              dtype='datetime64[ns]', length=112, freq=None)

In [10]:
start_date_input = "2020-01-01"
end_date_input = "2022-07-17 11:00:00"
# There is data losses from Feb 2021 - Dec 2021, as there is major loss. The data will be use from Aug 2002 until Jan 2021

missing_date_input = missing_date[(missing_date >= start_date_input) & (missing_date <= end_date_input)]
missing_date_input

DatetimeIndex(['2020-01-03 09:00:00', '2020-06-06 00:00:00',
               '2020-06-06 01:00:00', '2020-06-06 02:00:00',
               '2020-06-06 03:00:00', '2020-06-06 04:00:00',
               '2020-06-06 05:00:00', '2020-06-06 06:00:00',
               '2020-06-06 07:00:00', '2020-06-06 08:00:00',
               ...
               '2021-11-19 01:00:00', '2022-01-15 14:00:00',
               '2022-01-16 01:00:00', '2022-02-01 08:00:00',
               '2022-02-01 09:00:00', '2022-02-01 10:00:00',
               '2022-02-14 01:00:00', '2022-03-04 12:00:00',
               '2022-05-18 02:00:00', '2022-05-18 03:00:00'],
              dtype='datetime64[ns]', length=112, freq=None)

In [11]:
lawas_rainfall_input = lawas_rainfall.reindex(
                    pd.date_range(start=start_date_input, end=end_date_input,
                    freq='1h'),
                    method='ffill'
                    )
lawas_rainfall_input.index.name = "DateTime"
lawas_rainfall_input

Unnamed: 0_level_0,Rainfall,TOTAL,ClimAdjust,ANOM,Temperature,DewPoint,Humidity,WindSpeed,Pressure,Wind,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01 00:00:00,0.0,27.16,26.55,0.6,79.0,77.0,94.0,3.0,29.78,NE,Mostly Cloudy
2020-01-01 01:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.78,VAR,Mostly Cloudy
2020-01-01 02:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.75,VAR,Mostly Cloudy
2020-01-01 03:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,1.0,29.75,VAR,Mostly Cloudy
2020-01-01 04:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.72,VAR,Mostly Cloudy
...,...,...,...,...,...,...,...,...,...,...,...
2022-07-17 07:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,2.0,29.79,ESE,Fair
2022-07-17 08:00:00,0.0,26.60,27.29,-0.7,81.0,77.0,89.0,5.0,29.79,SSW,Partly Cloudy
2022-07-17 09:00:00,0.0,26.60,27.29,-0.7,75.0,73.0,94.0,12.0,29.82,SW,Partly Cloudy
2022-07-17 10:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,3.0,29.82,SSW,Light Rain


In [12]:
# Check on start and end data
print(f"Head Data:\n{lawas_rainfall_input.head()}\n")
print(f"Tail Data:\n{lawas_rainfall_input.tail()}")

Head Data:
                     Rainfall  TOTAL  ClimAdjust  ANOM  Temperature  DewPoint  \
DateTime                                                                        
2020-01-01 00:00:00       0.0  27.16       26.55   0.6         79.0      77.0   
2020-01-01 01:00:00       0.0  27.16       26.55   0.6         77.0      77.0   
2020-01-01 02:00:00       0.0  27.16       26.55   0.6         77.0      77.0   
2020-01-01 03:00:00       0.0  27.16       26.55   0.6         77.0      77.0   
2020-01-01 04:00:00       0.0  27.16       26.55   0.6         77.0      77.0   

                     Humidity  WindSpeed  Pressure Wind      Condition  
DateTime                                                                
2020-01-01 00:00:00      94.0        3.0     29.78   NE  Mostly Cloudy  
2020-01-01 01:00:00     100.0        2.0     29.78  VAR  Mostly Cloudy  
2020-01-01 02:00:00     100.0        2.0     29.75  VAR  Mostly Cloudy  
2020-01-01 03:00:00     100.0        1.0     29.75  VAR 

In [13]:
lawas_rainfall_input.columns

Index(['Rainfall', 'TOTAL', 'ClimAdjust', 'ANOM', 'Temperature', 'DewPoint',
       'Humidity', 'WindSpeed', 'Pressure', 'Wind', 'Condition'],
      dtype='object')

Downsample data - daily, weekly

In [14]:
lawas_rainfall_input_daily = lawas_rainfall_input.resample('D').agg({
    'Rainfall': 'sum',
    'TOTAL': 'mean',
    'ClimAdjust': 'mean',
    'ANOM': 'mean',
    'Temperature': 'mean',
    'DewPoint': 'mean',
    'Humidity': 'mean',
    'WindSpeed': 'mean',
    'Pressure': 'mean',
    'Wind': lambda x: x.mode()[0],
    'Condition': lambda x: x.mode()[0]
})

lawas_rainfall_input_weekly = lawas_rainfall_input.resample('W').agg({
    'Rainfall': 'sum',
    'TOTAL': 'mean',
    'ClimAdjust': 'mean',
    'ANOM': 'mean',
    'Temperature': 'mean',
    'DewPoint': 'mean',
    'Humidity': 'mean',
    'WindSpeed': 'mean',
    'Pressure': 'mean',
    'Wind': lambda x: x.mode()[0],
    'Condition': lambda x: x.mode()[0]
})

In [15]:
lawas_rainfall_input_daily

Unnamed: 0_level_0,Rainfall,TOTAL,ClimAdjust,ANOM,Temperature,DewPoint,Humidity,WindSpeed,Pressure,Wind,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01,21.0,27.16,26.55,0.6,81.833333,78.583333,90.250000,5.083333,29.77625,VAR,Mostly Cloudy
2020-01-02,2.5,27.16,26.55,0.6,82.000000,77.833333,88.041667,6.708333,29.80125,NE,Mostly Cloudy
2020-01-03,2.0,27.16,26.55,0.6,81.250000,78.083333,90.416667,6.666667,29.76375,NE,Mostly Cloudy
2020-01-04,0.5,27.16,26.55,0.6,80.625000,76.916667,89.041667,9.250000,29.70875,NE,Mostly Cloudy
2020-01-05,0.0,27.16,26.55,0.6,81.833333,77.916667,88.583333,4.750000,29.70000,E,Mostly Cloudy
...,...,...,...,...,...,...,...,...,...,...,...
2022-07-13,16.5,26.60,27.29,-0.7,77.333333,75.083333,93.583333,5.166667,29.71125,WSW,Partly Cloudy
2022-07-14,34.5,26.60,27.29,-0.7,79.458333,75.750000,89.041667,4.666667,29.72250,WNW,Partly Cloudy
2022-07-15,0.0,26.60,27.29,-0.7,81.708333,75.666667,83.666667,2.500000,29.70250,CALM,Partly Cloudy
2022-07-16,0.0,26.60,27.29,-0.7,83.083333,75.750000,79.541667,3.666667,29.70625,VAR,Fair


In [16]:
lawas_rainfall_input_weekly

Unnamed: 0_level_0,Rainfall,TOTAL,ClimAdjust,ANOM,Temperature,DewPoint,Humidity,WindSpeed,Pressure,Wind,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-05,26.0,27.160000,26.550000,0.600000,81.508333,77.866667,89.266667,6.491667,29.750000,NE,Mostly Cloudy
2020-01-12,148.0,27.160000,26.550000,0.600000,80.678571,77.571429,90.767857,5.392857,29.697679,VAR,Mostly Cloudy
2020-01-19,52.5,27.160000,26.550000,0.600000,80.690476,77.714286,90.916667,4.065476,29.737857,VAR,Mostly Cloudy
2020-01-26,18.0,27.160000,26.550000,0.600000,81.922619,78.571429,89.928571,4.678571,29.760536,VAR,Mostly Cloudy
2020-02-02,125.5,27.151429,26.610000,0.534286,81.672619,77.017857,86.910714,6.559524,29.768155,NE,Mostly Cloudy
...,...,...,...,...,...,...,...,...,...,...,...
2022-06-19,146.5,26.980000,27.730000,-0.750000,79.952381,76.690476,90.273810,4.404762,29.731786,VAR,Partly Cloudy
2022-06-26,147.5,26.980000,27.730000,-0.750000,80.345238,76.660714,88.976190,4.172619,29.706250,VAR,Partly Cloudy
2022-07-03,68.5,26.817143,27.541429,-0.728571,82.255952,77.119048,85.285714,5.077381,29.666250,VAR,Fair
2022-07-10,64.5,26.600000,27.290000,-0.700000,80.982143,75.982143,85.494048,5.773810,29.701786,VAR,Partly Cloudy


In [17]:
lawas_rainfall_input

Unnamed: 0_level_0,Rainfall,TOTAL,ClimAdjust,ANOM,Temperature,DewPoint,Humidity,WindSpeed,Pressure,Wind,Condition
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01 00:00:00,0.0,27.16,26.55,0.6,79.0,77.0,94.0,3.0,29.78,NE,Mostly Cloudy
2020-01-01 01:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.78,VAR,Mostly Cloudy
2020-01-01 02:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.75,VAR,Mostly Cloudy
2020-01-01 03:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,1.0,29.75,VAR,Mostly Cloudy
2020-01-01 04:00:00,0.0,27.16,26.55,0.6,77.0,77.0,100.0,2.0,29.72,VAR,Mostly Cloudy
...,...,...,...,...,...,...,...,...,...,...,...
2022-07-17 07:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,2.0,29.79,ESE,Fair
2022-07-17 08:00:00,0.0,26.60,27.29,-0.7,81.0,77.0,89.0,5.0,29.79,SSW,Partly Cloudy
2022-07-17 09:00:00,0.0,26.60,27.29,-0.7,75.0,73.0,94.0,12.0,29.82,SW,Partly Cloudy
2022-07-17 10:00:00,0.0,26.60,27.29,-0.7,77.0,75.0,94.0,3.0,29.82,SSW,Light Rain


Data for validation

In [18]:
# start_date_valid = "2022-01-01"
# end_date_valid = "2022-04-07 23:00:00"
# # There is data losses from Feb 2021 - Dec 2021, as there is major loss. The data will be use from Jan 2002 until Apr 2021

# missing_date_valid = missing_date[(missing_date >= start_date_valid) & (missing_date <= end_date_valid)]

In [19]:
# lawas_rainfall_valid = lawas_rainfall.reindex(
#                     pd.date_range(start=start_date_valid, end=end_date_valid,
#                     freq='1h'),
#                     method='ffill'
#                     )
# lawas_rainfall_valid.index.name = "DateTime"
# lawas_rainfall_valid

In [20]:
# # Check on start and end data
# print(f"Head Data:\n{lawas_rainfall_valid.head()}\n")
# print(f"Tail Data:\n{lawas_rainfall_valid.tail()}")

In [21]:
# lawas_rainfall_valid.columns

Downsample for validation data

In [22]:
# lawas_rainfall_valid_daily = lawas_rainfall_valid.resample('D').agg({
#     'Rainfall': 'sum',
#     'TOTAL': 'mean',
#     'ClimAdjust': 'mean',
#     'ANOM': 'mean',
#     'Temperature': 'mean',
#     'DewPoint': 'mean',
#     'Humidity': 'mean',
#     'WindSpeed': 'mean',
#     'Pressure': 'mean',
#     'Wind': lambda x: x.mode()[0],
#     'Condition': lambda x: x.mode()[0]
# })

# lawas_rainfall_valid_weekly = lawas_rainfall_valid.resample('W').agg({
#     'Rainfall': 'sum',
#     'TOTAL': 'mean',
#     'ClimAdjust': 'mean',
#     'ANOM': 'mean',
#     'Temperature': 'mean',
#     'DewPoint': 'mean',
#     'Humidity': 'mean',
#     'WindSpeed': 'mean',
#     'Pressure': 'mean',
#     'Wind': lambda x: x.mode()[0],
#     'Condition': lambda x: x.mode()[0]
# })

In [23]:
# lawas_rainfall_valid_daily

In [24]:
# lawas_rainfall_valid_weekly

Export as .csv file

In [25]:
# input data
lawas_rainfall_input.to_csv(f"{folder_path}/{folder_path}-rainfall.csv",index=True)
lawas_rainfall_input_daily.to_csv(f"{folder_path}/{folder_path}-rainfall-daily.csv",index=True)
lawas_rainfall_input_weekly.to_csv(f"{folder_path}/{folder_path}-rainfall-weekly.csv",index=True)

# # valid data
# lawas_rainfall_valid.to_csv(f"{folder_path}/{folder_path}-rainfall-valid.csv",index=True)
# lawas_rainfall_valid_daily.to_csv(f"{folder_path}/{folder_path}-rainfall-valid-daily.csv",index=True)
# lawas_rainfall_valid_weekly.to_csv(f"{folder_path}/{folder_path}-rainfall-valid-weekly.csv",index=True)