## Problem Statement
Dalam pengerjaan koreksi satelit, perlu dilakukan windowing berdasarkan kategori hujan, sehingga dibutuhkan tahapan untuk mengubah data hujan berdasarkan kategori hujan dari BMKG. Adapun dalam pengerjaan koreksi data satelit, dibutuhkan data hujan observasi yang berperan sebagai __corrector__ dan adapula data satelit yang ingin dikoreksi di daerah irigasi. Biasanya data satelit digunakan untuk area yang tidak memiliki data ch obs sehingga akan dilakukan koreksi CH satelit menggunakan data CH dari stasiun observasi di lokasi terdekat.

## Objective
- Script ini digunakan untuk mengklasifikasikan data hujan ke dalam kategori ringan, sedang, lebat, dan sangat lebat.

In [8]:
import pandas as pd
import numpy as np
import openpyxl
import glob
from datetime import datetime, timedelta

In [33]:
# read satellite data
path_satellite =r'D:\Happy\satellite_correction\data_satellite'
rainfall_files = glob.glob(path_satellite + "/*.xlsx")

In [36]:
rainfall_sat=pd.read_excel(path_satellite+'/Hasil_CHIRPSStasiunJeneberang.xlsx')
rainfall_obs=pd.read_excel('D:/Happy/satellite_correction/uji_data_hujan_obs/rainfall_QC.xlsx').drop(['Unnamed: 0'], axis=1)
rainfall_sat

Unnamed: 0,date,PCH STASIUN LIMBUNGA,PCH STASIUN MANGEMPANG,PCH STASIUN LENGKESE,PCH STASIUN KAMPILI,PCH STASIUN KD1,PCH STASIUN MALINO,PCH STASIUN PANAIKANG,PCH STASIUN POS1,PCH STASIUN BILI-BILI,PCH STASIUN MACINI SOMBALA
0,1981-01-01,12.907978,13.778302,11.190503,11.194236,13.111584,11.280024,11.280024,10.579425,10.099457,14.523473
1,1981-01-02,15.914540,16.987581,13.797025,21.036146,16.165569,13.907397,13.907397,13.043613,19.308737,19.694216
2,1981-01-03,4.914572,5.245938,4.260662,4.827044,4.992093,4.294746,4.294746,4.028001,5.600019,13.550919
3,1981-01-04,7.415222,7.915195,6.428586,9.992168,7.532186,6.480013,6.480013,6.077542,9.678645,10.989858
4,1981-01-05,0.000000,2.360672,0.000000,7.231175,2.246442,0.000000,0.000000,0.000000,7.024302,7.261736
...,...,...,...,...,...,...,...,...,...,...,...
15486,2023-05-27,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
15487,2023-05-28,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
15488,2023-05-29,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
15489,2023-05-30,0.000000,0.000000,8.590149,3.345497,0.000000,7.826845,7.826845,12.674468,0.000000,0.000000


In [38]:
start_date = '2014-01-01'
end_date = '2018-12-31'

# Select DataFrame rows between two dates
mask = (rainfall_sat['date'] > start_date) & (rainfall_sat['date'] <= end_date)
rainfall_sat = rainfall_sat.loc[mask].drop(['PCH STASIUN MACINI SOMBALA'], axis=1).reset_index(drop=True)
rainfall_sat.head()

Unnamed: 0,date,PCH STASIUN LIMBUNGA,PCH STASIUN MANGEMPANG,PCH STASIUN LENGKESE,PCH STASIUN KAMPILI,PCH STASIUN KD1,PCH STASIUN MALINO,PCH STASIUN PANAIKANG,PCH STASIUN POS1,PCH STASIUN BILI-BILI
0,2014-01-02,9.390553,14.190951,0.0,12.058479,0.0,0.0,0.0,0.0,11.155343
1,2014-01-03,9.390553,0.0,10.02809,12.058479,0.0,9.724977,9.724977,8.186147,11.155343
2,2014-01-04,18.781105,0.0,20.056181,24.116959,14.426726,19.449955,19.449955,24.558439,22.310686
3,2014-01-05,18.781105,28.381903,20.056181,24.116959,28.853453,19.449955,19.449955,16.372293,22.310686
4,2014-01-06,14.561491,11.964704,10.550665,19.995438,11.64155,10.292566,10.292566,9.130646,25.466198


In [48]:
rainfall_sat[i]

0       11.155343
1       11.155343
2       22.310686
3       22.310686
4       25.466198
          ...    
1820    47.869717
1821    47.869717
1822    15.956573
1823     0.000000
1824    47.869717
Name: PCH STASIUN BILI-BILI, Length: 1825, dtype: float64

In [49]:
rainfall_obs[i]

0        7
1        0
2       22
3        0
4       22
        ..
1820    12
1821    24
1822     6
1823     0
1824     8
Name: PCH STASIUN BILI-BILI, Length: 1825, dtype: int64

In [46]:
# read excel satellite data
 
#Klasifikasi hujan
# loop berdasarkan urutan di excel stasiun gpm per gage tiap model (per nama stasiun, agar yg dipanggil bukan indeks kolom)
for i in (rainfall_sat.columns[1:]):
    gage=rainfall_sat[i]

    #create dataframe with obs data
    sat_tmp={"date":rainfall_sat.loc[:,"date"],'satellite':gage}
    globals()["gage_"+i[12:]]=pd.DataFrame(sat_tmp)

    #klasifikasi obs
    conditions = [
        (gage >= 0.) & (gage< 20),
        (gage >= 20) & (gage < 50),
        (gage >= 50) & (gage < 100),
        (gage >= 100)
        ]

    # create a list of the values we want to assign for each condition
    values = ['Ringan','Sedang','Lebat','Sangat_Lebat']

    # create a new column and use np.select to assign values to it using our lists as arguments
    globals()["gage_"+i[12:]]['Klasifikasi'] = np.select(conditions, values)
    globals()["gage_"+i[12:]][i] = rainfall_obs[i]

    print ('Done: '+i)
    
#Split gpm table based on rainfall classification for each ch area
for k in (rainfall_sat.columns[1:]) :
    for value in (values) :
        globals()[value+"_gage"+k[12:]] = \
        globals()["gage_"+k[12:]][globals()["gage_"+k[12:]]['Klasifikasi']==value].reset_index(drop=True)

#save into excel per model dan klasifikasi
for value in values:
    with pd.ExcelWriter('D:/Happy/satellite_correction/klasifikasi_hujan/classified_CHIRPS/acuan_koreksi/Hasil_CHIRPSStasiunJeneberang_classified_'
                        +value+'.xlsx') as writer: 
        for gage_name in (rainfall_sat.columns[1:]):
            globals()[value+"_gage"+gage_name[12:]].to_excel(writer, sheet_name=gage_name)
    
    

Done: PCH STASIUN LIMBUNGA
Done: PCH STASIUN MANGEMPANG
Done: PCH STASIUN LENGKESE
Done: PCH STASIUN KAMPILI
Done: PCH STASIUN KD1
Done: PCH STASIUN MALINO
Done: PCH STASIUN PANAIKANG
Done: PCH STASIUN POS1
Done: PCH STASIUN BILI-BILI


In [32]:
globals()["gage_"+i[12:]]

Unnamed: 0,date,gage,Klasifikasi
0,2014-01-02,7,Ringan
1,2014-01-03,69,Lebat
2,2014-01-04,43,Sedang
3,2014-01-05,20,Sedang
4,2014-01-06,53,Lebat
...,...,...,...
1820,2018-12-27,12,Ringan
1821,2018-12-28,52,Lebat
1822,2018-12-29,48,Sedang
1823,2018-12-30,26,Sedang


## Kebutuhan Air Irigasi

In [27]:
rainfall_obs=pd.read_excel('D:/Happy/satellite_correction/uji_data_hujan_obs/rainfall_QC.xlsx').drop(['Unnamed: 0'], axis=1)
rainfall_obs

Unnamed: 0,TANGGAL,PCH STASIUN BILI-BILI,PCH STASIUN KAMPILI,PCH STASIUN KD1,PCH STASIUN LENGKESE,PCH STASIUN LIMBUNGA,PCH STASIUN MALINO,PCH STASIUN MANGEMPANG,PCH STASIUN PANAIKANG,PCH STASIUN POS1
0,2014-01-02,7,1.0,13,4,11,3,0.011297,0,7
1,2014-01-03,0,2.0,17,53,1,21,0.111354,29,69
2,2014-01-04,22,5.0,15,46,16,29,0.069395,29,43
3,2014-01-05,0,0.0,9,28,14,16,0.032277,12,20
4,2014-01-06,22,1.0,9,77,14,46,9.000000,62,53
...,...,...,...,...,...,...,...,...,...,...
1820,2018-12-27,12,4.0,6,10,10,0,0.000000,0,12
1821,2018-12-28,24,17.0,50,50,53,0,0.000000,1,52
1822,2018-12-29,6,4.0,28,44,30,0,0.000000,0,48
1823,2018-12-30,0,0.0,14,25,12,0,0.000000,4,26


In [30]:
# read excel obsellite data
 
#Klasifikasi hujan
# loop berdasarkan urutan di excel stasiun obs per gage tiap model
for i in (rainfall_obs.columns[1:]):
    gage=rainfall_obs[i]

    #create dataframe with obs data
    obs_tmp={"date":rainfall_obs.loc[:,"TANGGAL"],'gage':gage}
    globals()["gage_"+i[12:]]=pd.DataFrame(obs_tmp)

    #klasifikasi obs
    conditions = [
        (gage >= 0.) & (gage< 20),
        (gage >= 20) & (gage < 50),
        (gage >= 50) & (gage < 100),
        (gage >= 100)
        ]

    # create a list of the values we want to assign for each condition
    values = ['Ringan','Sedang','Lebat','Sangat_Lebat']

    # create a new column and use np.select to assign values to it using our lists as arguments
    globals()["gage_"+i[12:]]['Klasifikasi'] = np.select(conditions, values)

    print ('Done: '+i)
    
#Split gpm table based on rainfall classification for each ch area
for k in (rainfall_obs.columns[1:]) :
    for value in (values) :
        globals()[value+"_gage"+k[12:]] = \
        globals()["gage_"+k[12:]][globals()["gage_"+k[12:]]['Klasifikasi']==value].reset_index(drop=True)

#save into excel per model dan klasifikasi
for n in values:
    with pd.ExcelWriter('D:/Happy/satellite_correction/klasifikasi_hujan/classified_obs/Rainfall_obs_classified_'
                        +n+'.xlsx') as writer: 
        for gage_name in (rainfall_obs.columns[1:]):
            globals()[value+"_gage"+gage_name[12:]].to_excel(writer, sheet_name=gage_name)
    
    

Done: PCH STASIUN BILI-BILI
Done: PCH STASIUN KAMPILI
Done: PCH STASIUN KD1
Done: PCH STASIUN LENGKESE
Done: PCH STASIUN LIMBUNGA
Done: PCH STASIUN MALINO
Done: PCH STASIUN MANGEMPANG
Done: PCH STASIUN PANAIKANG
Done: PCH STASIUN POS1
