In [35]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import warnings

In [36]:
# Korean
plt.rcParams['font.family'] = 'NanumGothic'

In [37]:
#load csv
train1 = pd.read_csv('data/OBS_AWS_TIM_20231112111131.csv', encoding='cp949')
train2 = pd.read_csv('data/OBS_AWS_TIM_20231112110248.csv', encoding='cp949')

In [38]:
merged_df = pd.concat([train1, train2], ignore_index=True)

In [39]:
merged_df

Unnamed: 0,지점,지점명,일시,기온(°C),풍속(m/s),강수량(mm),습도(%)
0,116,관악(레),2022-10-01 01:00,19.8,1.4,0.0,21.0
1,116,관악(레),2022-10-01 02:00,19.1,0.9,0.0,20.0
2,116,관악(레),2022-10-01 03:00,18.9,0.2,0.0,20.0
3,116,관악(레),2022-10-01 04:00,18.5,0.9,0.0,19.0
4,116,관악(레),2022-10-01 05:00,18.9,2.3,0.0,23.0
...,...,...,...,...,...,...,...
271957,889,현충원,2023-11-10 20:00,3.2,1.8,0.0,42.0
271958,889,현충원,2023-11-10 21:00,2.5,1.6,0.0,47.0
271959,889,현충원,2023-11-10 22:00,2.3,1.9,0.0,47.0
271960,889,현충원,2023-11-10 23:00,2.0,1.7,0.0,47.0


In [40]:
merged_df.to_csv('data/weather.csv', index=False)

In [41]:
train = pd.read_csv('data/weather.csv')

In [42]:
train.head()

Unnamed: 0,지점,지점명,일시,기온(°C),풍속(m/s),강수량(mm),습도(%)
0,116,관악(레),2022-10-01 01:00,19.8,1.4,0.0,21.0
1,116,관악(레),2022-10-01 02:00,19.1,0.9,0.0,20.0
2,116,관악(레),2022-10-01 03:00,18.9,0.2,0.0,20.0
3,116,관악(레),2022-10-01 04:00,18.5,0.9,0.0,19.0
4,116,관악(레),2022-10-01 05:00,18.9,2.3,0.0,23.0


In [43]:
train.tail()

Unnamed: 0,지점,지점명,일시,기온(°C),풍속(m/s),강수량(mm),습도(%)
271957,889,현충원,2023-11-10 20:00,3.2,1.8,0.0,42.0
271958,889,현충원,2023-11-10 21:00,2.5,1.6,0.0,47.0
271959,889,현충원,2023-11-10 22:00,2.3,1.9,0.0,47.0
271960,889,현충원,2023-11-10 23:00,2.0,1.7,0.0,47.0
271961,889,현충원,2023-11-11 00:00,1.5,1.6,0.0,50.0


In [44]:
train.shape

(271962, 7)

In [45]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271962 entries, 0 to 271961
Data columns (total 7 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   지점       271962 non-null  int64  
 1   지점명      271962 non-null  object 
 2   일시       271962 non-null  object 
 3   기온(°C)   271393 non-null  float64
 4   풍속(m/s)  271507 non-null  float64
 5   강수량(mm)  269944 non-null  float64
 6   습도(%)    251409 non-null  float64
dtypes: float64(4), int64(1), object(2)
memory usage: 14.5+ MB


In [46]:
train1.isnull().sum()

지점             0
지점명            0
일시             0
기온(°C)       325
풍속(m/s)      360
강수량(mm)     1777
습도(%)      18293
dtype: int64

In [47]:
# create a new 'date' & 'time' column by slicing the '일시' column
train['date'] = train['일시'].str.slice(0,10)
train['time'] = train['일시'].str.slice(11,16)

In [48]:
train.head()

Unnamed: 0,지점,지점명,일시,기온(°C),풍속(m/s),강수량(mm),습도(%),date,time
0,116,관악(레),2022-10-01 01:00,19.8,1.4,0.0,21.0,2022-10-01,01:00
1,116,관악(레),2022-10-01 02:00,19.1,0.9,0.0,20.0,2022-10-01,02:00
2,116,관악(레),2022-10-01 03:00,18.9,0.2,0.0,20.0,2022-10-01,03:00
3,116,관악(레),2022-10-01 04:00,18.5,0.9,0.0,19.0,2022-10-01,04:00
4,116,관악(레),2022-10-01 05:00,18.9,2.3,0.0,23.0,2022-10-01,05:00


In [49]:
train['date'] = pd.to_datetime(train['date']).dt.strftime('%Y%m%d')
train = train.drop('일시', axis=1)

In [50]:
train.head()

Unnamed: 0,지점,지점명,기온(°C),풍속(m/s),강수량(mm),습도(%),date,time
0,116,관악(레),19.8,1.4,0.0,21.0,20221001,01:00
1,116,관악(레),19.1,0.9,0.0,20.0,20221001,02:00
2,116,관악(레),18.9,0.2,0.0,20.0,20221001,03:00
3,116,관악(레),18.5,0.9,0.0,19.0,20221001,04:00
4,116,관악(레),18.9,2.3,0.0,23.0,20221001,05:00


## Rain or Not

In [51]:
train['rain'] = (train['강수량(mm)'] > 0).astype(int)
# train = train.drop('강수량(mm)', axis=1)

In [52]:
train.head()

Unnamed: 0,지점,지점명,기온(°C),풍속(m/s),습도(%),date,time,rain
0,116,관악(레),19.8,1.4,21.0,20221001,01:00,0
1,116,관악(레),19.1,0.9,20.0,20221001,02:00,0
2,116,관악(레),18.9,0.2,20.0,20221001,03:00,0
3,116,관악(레),18.5,0.9,19.0,20221001,04:00,0
4,116,관악(레),18.9,2.3,23.0,20221001,05:00,0


## Hot or Hottest / Cold or Coldest

### 체감온도

In [53]:
def calculate_wind_chill(temperature, relative_humidity, wind_speed):
    # 기온과 상대습도로 습구온도(Tw) 계산
    Tw = temperature * np.arctan(0.151977 * np.sqrt(relative_humidity + 8.313659)) + \
         np.arctan(temperature + relative_humidity) - np.arctan(relative_humidity - 1.67633) + \
         0.00391838 * np.power(relative_humidity, 1.5) * np.arctan(0.023101 * relative_humidity) - 4.686035

    # 체감온도 공식 계산
    wind_chill = -0.2442 + 0.55399 * Tw + 0.45535 * temperature - \
                  0.0022 * np.power(Tw, 2) + 0.00278 * Tw * temperature + 3.0
    
    return wind_chill

### 폭염 주의보
폭염으로 인하여 다음 중 어느 하나에 해당하는 경우  
① 일최고체감온도 33℃이상인 상태가 2일 이상 지속될 것으로 예상될 때  
② 급격한 체감온도 상승 또는 폭염 장기화 등으로 중대한 피해발생이 예상될 때  

In [54]:
def check_heat_warning(wind_chill, consecutive_days, severe_heat):
    # 일최고체감온도가 33℃ 이상인 상태가 2일 이상 지속될 것으로 예상되는 경우
    condition1 = wind_chill >= 33 and consecutive_days >= 2

    # 급격한 체감온도 상승 또는 폭염 장기화 등으로 중대한 피해 발생이 예상되는 경우
    condition2 = severe_heat

    return 1 if condition1 or condition2 else 0


### 폭염 경보
폭염으로 인하여 다음 중 어느 하나에 해당하는 경우  
① 일최고체감온도 35℃이상인 상태가 2일 이상 지속될 것으로 예상될 때  
② 급격한 체감온도 상승 또는 폭염 장기화 등으로 광범위한 지역에서 중대한 피해발생이 예상될 때  

In [55]:
def check_heat_alert(wind_chill, consecutive_days, severe_heat):
    # 일최고체감온도가 35℃ 이상인 상태가 2일 이상 지속될 것으로 예상되는 경우
    condition1 = wind_chill >= 35 and consecutive_days >= 2

    # 급격한 체감온도 상승 또는 폭염 장기화 등으로 광범위한 지역에서 중대한 피해 발생이 예상되는 경우
    condition2 = severe_heat

    return 2 if condition1 or condition2 else 0

In [56]:
train['wind_chill'] = calculate_wind_chill(train['기온(°C)'], train['습도(%)'], train['풍속(m/s)'])

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [57]:
train.head()

Unnamed: 0,지점,지점명,기온(°C),풍속(m/s),습도(%),date,time,rain,wind_chill
0,116,관악(레),19.8,1.4,21.0,20221001,01:00,0,17.156784
1,116,관악(레),19.1,0.9,20.0,20221001,02:00,0,16.444482
2,116,관악(레),18.9,0.2,20.0,20221001,03:00,0,16.271167
3,116,관악(레),18.5,0.9,19.0,20221001,04:00,0,15.823618
4,116,관악(레),18.9,2.3,23.0,20221001,05:00,0,16.56754


### Hot or Hottest

In [58]:
train['heat_warning'] = train.apply(lambda row: check_heat_warning(row['wind_chill'], 2, False), axis=1)
train['heat_alert'] = train.apply(lambda row: check_heat_alert(row['wind_chill'], 2, False), axis=1)


In [59]:
train.loc[train['heat_alert'] == 2, 'heat_warning'] = 0

In [60]:
train

Unnamed: 0,지점,지점명,기온(°C),풍속(m/s),습도(%),date,time,rain,wind_chill,heat_warning,heat_alert
0,116,관악(레),19.8,1.4,21.0,20221001,01:00,0,17.156784,0,0
1,116,관악(레),19.1,0.9,20.0,20221001,02:00,0,16.444482,0,0
2,116,관악(레),18.9,0.2,20.0,20221001,03:00,0,16.271167,0,0
3,116,관악(레),18.5,0.9,19.0,20221001,04:00,0,15.823618,0,0
4,116,관악(레),18.9,2.3,23.0,20221001,05:00,0,16.567540,0,0
...,...,...,...,...,...,...,...,...,...,...,...
271957,889,현충원,3.2,1.8,42.0,20231110,20:00,0,3.518115,0,0
271958,889,현충원,2.5,1.6,47.0,20231110,21:00,0,3.033929,0,0
271959,889,현충원,2.3,1.9,47.0,20231110,22:00,0,2.847590,0,0
271960,889,현충원,2.0,1.7,47.0,20231110,23:00,0,2.568197,0,0


### 한파 주의보
10월~4월에 다음 중 하나에 해당하는 경우  
① 아침최저기온이 전날보다 10℃ 이상 하강하여 3℃ 이하이고 평년값보다 3℃가 낮을 것으로 예상될때  
② 아침최저기온이 -12℃ 이하가 2일 이상 지속될 것이 예상될 때  
③ 급격한 저온현상으로 중대한 피해가 예상될 때  

In [61]:
def check_cold_warning(temperature, forecast_temperature, consecutive_days):
    # 아침 최저기온이 전날보다 10℃ 이상 하강하여 3℃ 이하이고 평년값보다 3℃가 낮을 경우
    condition1 = temperature - forecast_temperature >= 10 and temperature <= 3 and forecast_temperature < -3

    # 아침 최저기온이 -12℃ 이하가 2일 이상 지속될 경우
    condition2 = temperature <= -12 and consecutive_days >= 2

    return -1 if condition1 or condition2 else 0

### 한파경보
10월~4월에 다음 중 하나에 해당하는 경우  
① 아침최저기온이 전날보다 15℃ 이상 하강하여 3℃ 이하이고 평년값보다 3℃가 낮을 것으로 예상될 때  
② 아침최저기온이 -15℃ 이하가 2일 이상 지속될 것이 예상될 때  
③ 급격한 저온현상으로 광범위한 지역에서 중대한 피해가 예상될 때  

In [62]:
def check_cold_alert(temperature, forecast_temperature, consecutive_days):
    # 아침 최저기온이 전날보다 15℃ 이상 하강하여 3℃ 이하이고 평년값보다 3℃가 낮을 경우
    condition1 = temperature - forecast_temperature >= 15 and temperature <= 3 and forecast_temperature < -3

    # 아침 최저기온이 -15℃ 이하가 2일 이상 지속될 경우
    condition2 = temperature <= -15 and consecutive_days >= 2

    return -2 if condition1 or condition2 else 0

In [63]:
train['cold_warning'] = train.apply(lambda row: check_cold_warning(row['기온(°C)'], row['기온(°C)'], 3), axis=1)
train['cold_alert'] = train.apply(lambda row: check_cold_alert(row['기온(°C)'], row['기온(°C)'], 3), axis=1)


In [64]:
train.loc[train['cold_alert'] == -2, 'cold_warning'] = 0

In [65]:
train

Unnamed: 0,지점,지점명,기온(°C),풍속(m/s),습도(%),date,time,rain,wind_chill,heat_warning,heat_alert,cold_warning,cold_alert
0,116,관악(레),19.8,1.4,21.0,20221001,01:00,0,17.156784,0,0,0,0
1,116,관악(레),19.1,0.9,20.0,20221001,02:00,0,16.444482,0,0,0,0
2,116,관악(레),18.9,0.2,20.0,20221001,03:00,0,16.271167,0,0,0,0
3,116,관악(레),18.5,0.9,19.0,20221001,04:00,0,15.823618,0,0,0,0
4,116,관악(레),18.9,2.3,23.0,20221001,05:00,0,16.567540,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
271957,889,현충원,3.2,1.8,42.0,20231110,20:00,0,3.518115,0,0,0,0
271958,889,현충원,2.5,1.6,47.0,20231110,21:00,0,3.033929,0,0,0,0
271959,889,현충원,2.3,1.9,47.0,20231110,22:00,0,2.847590,0,0,0,0
271960,889,현충원,2.0,1.7,47.0,20231110,23:00,0,2.568197,0,0,0,0


In [66]:
train.head()

Unnamed: 0,지점,지점명,기온(°C),풍속(m/s),습도(%),date,time,rain,wind_chill,heat_warning,heat_alert,cold_warning,cold_alert
0,116,관악(레),19.8,1.4,21.0,20221001,01:00,0,17.156784,0,0,0,0
1,116,관악(레),19.1,0.9,20.0,20221001,02:00,0,16.444482,0,0,0,0
2,116,관악(레),18.9,0.2,20.0,20221001,03:00,0,16.271167,0,0,0,0
3,116,관악(레),18.5,0.9,19.0,20221001,04:00,0,15.823618,0,0,0,0
4,116,관악(레),18.9,2.3,23.0,20221001,05:00,0,16.56754,0,0,0,0


In [67]:
train.to_csv('preprocessed_weather.csv', index=False)

In [68]:
train

Unnamed: 0,지점,지점명,기온(°C),풍속(m/s),습도(%),date,time,rain,wind_chill,heat_warning,heat_alert,cold_warning,cold_alert
0,116,관악(레),19.8,1.4,21.0,20221001,01:00,0,17.156784,0,0,0,0
1,116,관악(레),19.1,0.9,20.0,20221001,02:00,0,16.444482,0,0,0,0
2,116,관악(레),18.9,0.2,20.0,20221001,03:00,0,16.271167,0,0,0,0
3,116,관악(레),18.5,0.9,19.0,20221001,04:00,0,15.823618,0,0,0,0
4,116,관악(레),18.9,2.3,23.0,20221001,05:00,0,16.567540,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
271957,889,현충원,3.2,1.8,42.0,20231110,20:00,0,3.518115,0,0,0,0
271958,889,현충원,2.5,1.6,47.0,20231110,21:00,0,3.033929,0,0,0,0
271959,889,현충원,2.3,1.9,47.0,20231110,22:00,0,2.847590,0,0,0,0
271960,889,현충원,2.0,1.7,47.0,20231110,23:00,0,2.568197,0,0,0,0
