In [69]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from tqdm.auto import tqdm

In [70]:
DATA = Path("../../data")

In [71]:
train = pd.read_csv(DATA / "fog_train_time-interpolation.csv")
test = pd.read_csv(DATA / "fog_test_time-interpolation.csv")

In [72]:
train

Unnamed: 0,년도,월,일,시간,분,지점번호,지점번호1,지점번호2,NaN유무,시정구간,년월일시분,10분평균풍향,10분평균풍속,10분평균기온,강수유무,10분평균상대습도,10분일사량합,10분평균지면온도,10분평균시정
0,I,1,1,0,10,AA,A,A,0,4,2020-01-01 00:10:00,0.0,0.0,-6.4,0.0,38.9,0.0,-2.8,20000.0
1,I,1,1,0,20,AA,A,A,0,4,2020-01-01 00:20:00,0.0,0.0,-6.3,0.0,37.9,0.0,-2.7,20000.0
2,I,1,1,0,30,AA,A,A,0,4,2020-01-01 00:30:00,0.0,0.0,-6.3,0.0,40.0,0.0,-2.6,20000.0
3,I,1,1,0,40,AA,A,A,0,4,2020-01-01 00:40:00,27.8,0.4,-6.2,0.0,39.5,0.0,-2.6,20000.0
4,I,1,1,0,50,AA,A,A,0,4,2020-01-01 00:50:00,59.7,0.5,-6.1,0.0,39.8,0.0,-2.5,20000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3156455,K,12,31,23,10,EC,E,C,0,4,2022-12-31 23:10:00,305.3,7.7,2.5,0.0,50.2,0.0,-0.9,20000.0
3156456,K,12,31,23,20,EC,E,C,0,4,2022-12-31 23:20:00,293.8,5.7,2.3,0.0,50.1,0.0,-1.3,20000.0
3156457,K,12,31,23,30,EC,E,C,0,4,2022-12-31 23:30:00,274.2,4.9,2.2,0.0,51.0,0.0,-1.4,20000.0
3156458,K,12,31,23,40,EC,E,C,0,4,2022-12-31 23:40:00,270.3,4.6,2.1,0.0,51.7,0.0,-1.6,20000.0


In [73]:
# 일교차 (일 최고기온과 일 최저기온을 data leakage로 인해 확인 할 수 없으므로 실시간 일교차를 계산함)

def cal_real_time_tempt(temperature, mode='hour'):
    min_temperature = []
    max_temperature = []
    dtf = []
    mean_temperature = []
    day_temperature = []
    count = 0
    
    temperature_iterator = tqdm(enumerate(temperature), total = len(temperature), desc=f"real-time calculating(mode:{mode})")
    if mode == 'hour':
        for _, temp in temperature_iterator:
            if count == 6:
                count = 0
                day_temperature = []
            day_temperature.append(temp)
            max_temperature.append(max(day_temperature))
            min_temperature.append(min(day_temperature))
            dtf.append(max(day_temperature) - min(day_temperature))
            mean_temperature.append(np.mean(np.array(day_temperature)))
            count += 1
    
    elif mode == 'day':
        for _, temp in temperature_iterator:
            if count == 6 * 24:
                count = 0
                day_temperature = []
            day_temperature.append(temp)
            max_temperature.append(max(day_temperature))
            min_temperature.append(min(day_temperature))
            dtf.append(max(day_temperature) - min(day_temperature))
            mean_temperature.append(np.mean(np.array(day_temperature)))
            count += 1
    
    return min_temperature, max_temperature, mean_temperature, dtf

In [74]:
h_min_t, h_max_t, h_mean_t, h_dtf =  cal_real_time_tempt(train["10분평균기온"], mode='hour')

real-time calculating(mode:hour):   0%|          | 0/3156460 [00:00<?, ?it/s]

In [75]:
train["1시간실시간최저기온"] = h_min_t
train["1시간실시간최고기온"] = h_max_t
train["1시간실시간평균기온"] = h_mean_t
train["1시간실시간일교차"] = h_dtf

In [76]:
train[["1시간실시간최저기온","1시간실시간최고기온","1시간실시간평균기온","1시간실시간일교차"]]

Unnamed: 0,1시간실시간최저기온,1시간실시간최고기온,1시간실시간평균기온,1시간실시간일교차
0,-6.4,-6.4,-6.400000,0.0
1,-6.4,-6.3,-6.350000,0.1
2,-6.4,-6.3,-6.333333,0.1
3,-6.4,-6.2,-6.300000,0.2
4,-6.4,-6.1,-6.260000,0.3
...,...,...,...,...
3156455,2.3,2.5,2.350000,0.2
3156456,2.3,2.3,2.300000,0.0
3156457,2.2,2.3,2.250000,0.1
3156458,2.1,2.3,2.200000,0.2


In [77]:
d_min_t, d_max_t, d_mean_t, d_dtf =  cal_real_time_tempt(train["10분평균기온"], mode='day')

real-time calculating(mode:day):   0%|          | 0/3156460 [00:00<?, ?it/s]

In [78]:
train["1일실시간최저기온"] = d_min_t
train["1일실시간최고기온"] = d_max_t
train["1일실시간평균기온"] = d_mean_t
train["1일실시간일교차"] = d_dtf

In [79]:
train[["1일실시간최저기온","1일실시간최고기온","1일실시간평균기온","1일실시간일교차"]]

Unnamed: 0,1일실시간최저기온,1일실시간최고기온,1일실시간평균기온,1일실시간일교차
0,-6.4,-6.4,-6.400000,0.0
1,-6.4,-6.3,-6.350000,0.1
2,-6.4,-6.3,-6.333333,0.1
3,-6.4,-6.2,-6.300000,0.2
4,-6.4,-6.1,-6.260000,0.3
...,...,...,...,...
3156455,-0.1,6.8,3.128333,6.9
3156456,-0.1,6.8,3.121488,6.9
3156457,-0.1,6.8,3.113934,6.9
3156458,-0.1,6.8,3.105691,6.9


In [80]:
train

Unnamed: 0,년도,월,일,시간,분,지점번호,지점번호1,지점번호2,NaN유무,시정구간,...,10분평균지면온도,10분평균시정,1시간실시간최저기온,1시간실시간최고기온,1시간실시간평균기온,1시간실시간일교차,1일실시간최저기온,1일실시간최고기온,1일실시간평균기온,1일실시간일교차
0,I,1,1,0,10,AA,A,A,0,4,...,-2.8,20000.0,-6.4,-6.4,-6.400000,0.0,-6.4,-6.4,-6.400000,0.0
1,I,1,1,0,20,AA,A,A,0,4,...,-2.7,20000.0,-6.4,-6.3,-6.350000,0.1,-6.4,-6.3,-6.350000,0.1
2,I,1,1,0,30,AA,A,A,0,4,...,-2.6,20000.0,-6.4,-6.3,-6.333333,0.1,-6.4,-6.3,-6.333333,0.1
3,I,1,1,0,40,AA,A,A,0,4,...,-2.6,20000.0,-6.4,-6.2,-6.300000,0.2,-6.4,-6.2,-6.300000,0.2
4,I,1,1,0,50,AA,A,A,0,4,...,-2.5,20000.0,-6.4,-6.1,-6.260000,0.3,-6.4,-6.1,-6.260000,0.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3156455,K,12,31,23,10,EC,E,C,0,4,...,-0.9,20000.0,2.3,2.5,2.350000,0.2,-0.1,6.8,3.128333,6.9
3156456,K,12,31,23,20,EC,E,C,0,4,...,-1.3,20000.0,2.3,2.3,2.300000,0.0,-0.1,6.8,3.121488,6.9
3156457,K,12,31,23,30,EC,E,C,0,4,...,-1.4,20000.0,2.2,2.3,2.250000,0.1,-0.1,6.8,3.113934,6.9
3156458,K,12,31,23,40,EC,E,C,0,4,...,-1.6,20000.0,2.1,2.3,2.200000,0.2,-0.1,6.8,3.105691,6.9


In [90]:
train["10분평균기온차"] = train["10분평균기온"].diff()
train.loc[0, "10분평균기온차"] = 0

In [89]:
train["10분평균지면온도차"] = train["10분평균지면온도"].diff()
train.loc[0, "10분평균지면온도차"] = 0

In [92]:
train["1분당기온변화율"] = train["10분평균기온차"] / 10

In [94]:
train["1분당지면온도변화율"] = train["10분평균지면온도차"] / 10

In [97]:
train["10분평균상대습도차"] = train["10분평균상대습도"].diff()
train.loc[0, "10분평균상대습도차"] = 0

In [99]:
train["1분당상대습도변화율"] = train["10분평균상대습도차"] / 10

In [100]:
train

Unnamed: 0,년도,월,일,시간,분,지점번호,지점번호1,지점번호2,NaN유무,시정구간,...,1일실시간최저기온,1일실시간최고기온,1일실시간평균기온,1일실시간일교차,10분평균기온차,10분평균지면온도차,1분당기온변화율,1분당지면온도변화율,10분평균상대습도차,1분당상대습도변화율
0,I,1,1,0,10,AA,A,A,0,4,...,-6.4,-6.4,-6.400000,0.0,0.0,0.0,0.00,0.00,0.0,0.00
1,I,1,1,0,20,AA,A,A,0,4,...,-6.4,-6.3,-6.350000,0.1,0.1,0.1,0.01,0.01,-1.0,-0.10
2,I,1,1,0,30,AA,A,A,0,4,...,-6.4,-6.3,-6.333333,0.1,0.0,0.1,0.00,0.01,2.1,0.21
3,I,1,1,0,40,AA,A,A,0,4,...,-6.4,-6.2,-6.300000,0.2,0.1,0.0,0.01,0.00,-0.5,-0.05
4,I,1,1,0,50,AA,A,A,0,4,...,-6.4,-6.1,-6.260000,0.3,0.1,0.1,0.01,0.01,0.3,0.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3156455,K,12,31,23,10,EC,E,C,0,4,...,-0.1,6.8,3.128333,6.9,0.1,0.0,0.01,0.00,-0.1,-0.01
3156456,K,12,31,23,20,EC,E,C,0,4,...,-0.1,6.8,3.121488,6.9,-0.2,-0.4,-0.02,-0.04,-0.1,-0.01
3156457,K,12,31,23,30,EC,E,C,0,4,...,-0.1,6.8,3.113934,6.9,-0.1,-0.1,-0.01,-0.01,0.9,0.09
3156458,K,12,31,23,40,EC,E,C,0,4,...,-0.1,6.8,3.105691,6.9,-0.1,-0.2,-0.01,-0.02,0.7,0.07


In [129]:
import math
train["10분평균포화수증기압"] = train["10분평균기온"].apply(lambda x: 6.112*math.exp((17.67*x)))

In [130]:
train

Unnamed: 0,년도,월,일,시간,분,지점번호,지점번호1,지점번호2,NaN유무,시정구간,...,1일실시간평균기온,1일실시간일교차,10분평균기온차,10분평균지면온도차,1분당기온변화율,1분당지면온도변화율,10분평균상대습도차,1분당상대습도변화율,10분평균절대습도,10분평균포화수증기압
0,I,1,1,0,10,AA,A,A,0,4,...,-6.400000,0.0,0.0,0.0,0.00,0.00,0.0,0.00,1.199016,4.706402e-49
1,I,1,1,0,20,AA,A,A,0,4,...,-6.350000,0.1,0.1,0.1,0.01,0.01,-1.0,-0.10,1.176724,2.754783e-48
2,I,1,1,0,30,AA,A,A,0,4,...,-6.333333,0.1,0.0,0.1,0.00,0.01,2.1,0.21,1.241925,2.754783e-48
3,I,1,1,0,40,AA,A,A,0,4,...,-6.300000,0.2,0.1,0.0,0.01,0.00,-0.5,-0.05,1.235348,1.612448e-47
4,I,1,1,0,50,AA,A,A,0,4,...,-6.260000,0.3,0.1,0.1,0.01,0.01,0.3,0.03,1.253804,9.438088e-47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3156455,K,12,31,23,10,EC,E,C,0,4,...,3.128333,6.9,0.1,0.0,0.01,0.00,-0.1,-0.01,2.887060,9.357118e+19
3156456,K,12,31,23,20,EC,E,C,0,4,...,3.121488,6.9,-0.2,-0.4,-0.02,-0.04,-0.1,-0.01,2.842656,2.731149e+18
3156457,K,12,31,23,30,EC,E,C,0,4,...,3.113934,6.9,-0.1,-0.1,-0.01,-0.01,0.9,0.09,2.874223,4.666025e+17
3156458,K,12,31,23,40,EC,E,C,0,4,...,3.105691,6.9,-0.1,-0.2,-0.01,-0.02,0.7,0.07,2.894023,7.971658e+16


In [106]:

def cal_abs_humidity(data):
    return (data["10분평균상대습도"]*6.112*math.exp((17.67*data["10분평균기온"])/(data["10분평균기온"]+243.5))*2.1674) / (data["10분평균기온"] + 273.15)

In [107]:
train["10분평균절대습도"] = train[["10분평균상대습도", "10분평균기온"]].apply(cal_abs_humidity, axis=1)

In [108]:
train

Unnamed: 0,년도,월,일,시간,분,지점번호,지점번호1,지점번호2,NaN유무,시정구간,...,1일실시간최고기온,1일실시간평균기온,1일실시간일교차,10분평균기온차,10분평균지면온도차,1분당기온변화율,1분당지면온도변화율,10분평균상대습도차,1분당상대습도변화율,10분평균절대습도
0,I,1,1,0,10,AA,A,A,0,4,...,-6.4,-6.400000,0.0,0.0,0.0,0.00,0.00,0.0,0.00,1.199016
1,I,1,1,0,20,AA,A,A,0,4,...,-6.3,-6.350000,0.1,0.1,0.1,0.01,0.01,-1.0,-0.10,1.176724
2,I,1,1,0,30,AA,A,A,0,4,...,-6.3,-6.333333,0.1,0.0,0.1,0.00,0.01,2.1,0.21,1.241925
3,I,1,1,0,40,AA,A,A,0,4,...,-6.2,-6.300000,0.2,0.1,0.0,0.01,0.00,-0.5,-0.05,1.235348
4,I,1,1,0,50,AA,A,A,0,4,...,-6.1,-6.260000,0.3,0.1,0.1,0.01,0.01,0.3,0.03,1.253804
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3156455,K,12,31,23,10,EC,E,C,0,4,...,6.8,3.128333,6.9,0.1,0.0,0.01,0.00,-0.1,-0.01,2.887060
3156456,K,12,31,23,20,EC,E,C,0,4,...,6.8,3.121488,6.9,-0.2,-0.4,-0.02,-0.04,-0.1,-0.01,2.842656
3156457,K,12,31,23,30,EC,E,C,0,4,...,6.8,3.113934,6.9,-0.1,-0.1,-0.01,-0.01,0.9,0.09,2.874223
3156458,K,12,31,23,40,EC,E,C,0,4,...,6.8,3.105691,6.9,-0.1,-0.2,-0.01,-0.02,0.7,0.07,2.894023


In [131]:
train[train["10분평균상대습도"]==0]["10분평균상대습도"]
# train[train["10분평균상대습도"]<=2]

2893505    0.0
Name: 10분평균상대습도, dtype: float64

In [138]:
train.loc[2893505, "10분평균상대습도"] = round(64.8 + (67.7-64.8)*(1/2), 1)

In [None]:
train["10분평균상대습도"]

In [136]:
train.loc[2893505, "10분평균상대습도"]

66.25