In [1]:
import os
import numpy as np
import pandas as pd

from datetime import datetime

import matplotlib.pyplot as plt 
import seaborn as sns

In [2]:
import platform
import matplotlib.font_manager as fm

_os_name = platform.system()

# font_path는 사용할 폰트에 따라 변경하면 됨
_font_path = 'c:/Windows/Fonts/malgun.ttf' if _os_name=='Windows' \
    else '/usr/share/fonts/truetype/nanum/NanumGothic.ttf'
_font_family = fm.FontProperties(fname=_font_path).get_name()

# font 설정
plt.rcParams['font.family'] = _font_family

# 폰트캐시까지 삭제 후 한글폰트가 사용될 수 있도록 캐시 삭제
# --> 이렇게 해야 비로소 한글이 표현되는 경우가 많음
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = [_font_family]

_fe = fm.FontEntry(
    fname='/usr/share/fonts/truetype/nanum/NanumGothic.ttf',
    name='NanumGothic'
)
fm.fontManager.ttflist.insert(0, _fe)

In [3]:
_PATH_BASE = os.path.join(os.getcwd(), 'data', '41-turbin')
get_path = lambda turbin: os.path.join(_PATH_BASE, f'{turbin}_turbin.csv')

In [4]:
df_data = pd.read_csv(get_path('b'))

In [5]:
# 처음 2개와 마지막 4개
df_data = df_data.iloc[2:-4]

# 기존 인덱스를 컬럼으로 변경하고 새 인덱스를 생성함
df_data.reset_index(inplace=True)
# 기존 인덱스 제거
df_data.drop('index', axis=1, inplace=True)

In [6]:
df_data_back = df_data.copy()

In [7]:
target_col = 'ACTIVE_POWER'

In [8]:
# 결측치 처리
# 발전량이 음수인 컬럼의 풍향/풍속/발전량 값을 NaN으로 변경
df_data.loc[df_data[target_col] < 0, ['WIND_SPEED', 'WIND_DIR']+[target_col]] = np.nan

In [9]:
# 시계열성값을 이용한 결측치 처리
df_data = df_data.interpolate()

In [10]:
df_data.isna().sum()

TURBINE_TIME    0
WIND_SPEED      0
WIND_DIR        0
ACTIVE_POWER    0
month           0
hour            0
dtype: int64

In [19]:
df_temp = df_data.head(24*10)

In [20]:
df_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   TURBINE_TIME  240 non-null    object 
 1   WIND_SPEED    240 non-null    float64
 2   WIND_DIR      240 non-null    float64
 3   ACTIVE_POWER  240 non-null    float64
 4   month         240 non-null    int64  
 5   hour          240 non-null    int64  
dtypes: float64(3), int64(2), object(1)
memory usage: 11.4+ KB


In [21]:
df_temp['TURBINE_TIME'] = pd.to_datetime(df_temp['TURBINE_TIME'])
df_temp.set_index('TURBINE_TIME', inplace=True)
df_temp.index.name='index'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['TURBINE_TIME'] = pd.to_datetime(df_temp['TURBINE_TIME'])


In [22]:
df_temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 240 entries, 2020-01-16 00:00:00 to 2020-01-25 23:00:00
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   WIND_SPEED    240 non-null    float64
 1   WIND_DIR      240 non-null    float64
 2   ACTIVE_POWER  240 non-null    float64
 3   month         240 non-null    int64  
 4   hour          240 non-null    int64  
dtypes: float64(3), int64(2)
memory usage: 11.2 KB


In [25]:
df_temp_daily = df_temp.resample('D').agg({
    'WIND_SPEED': ['mean', 'sum', 'min', 'max'], 
    'WIND_DIR': ['mean', 'sum', 'min', 'max'], 
    'ACTIVE_POWER': ['mean', 'sum', 'min', 'max']
})

df_temp_daily.columns = [f'{col}_{stat}' for col, stat in df_temp_daily.columns]

In [24]:
df_temp_daily.head()

Unnamed: 0_level_0,WIND_SPEED,WIND_SPEED,WIND_SPEED,WIND_SPEED,WIND_DIR,WIND_DIR,WIND_DIR,WIND_DIR,ACTIVE_POWER,ACTIVE_POWER,ACTIVE_POWER,ACTIVE_POWER
Unnamed: 0_level_1,mean,sum,min,max,mean,sum,min,max,mean,sum,min,max
index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2020-01-16,5.292346,127.016299,3.59,7.61,0.160285,3.846831,-4.751667,5.603333,616.109519,14786.628457,125.82,1693.653333
2020-01-17,5.445016,130.680395,3.231667,7.315,0.026999,0.647966,-3.991667,5.363333,696.514745,16716.35387,84.915,1493.343333
2020-01-18,4.53094,108.742548,2.958065,6.525,-1.173481,-28.163552,-10.466667,2.945,379.876276,9117.030624,29.477778,1023.4
2020-01-19,4.192735,100.625641,2.7,6.881667,-0.484117,-11.618802,-11.2,4.105,296.911928,7125.88626,2.9,1152.365
2020-01-20,5.905877,141.741045,4.381667,7.446667,0.624342,14.984209,-3.241667,5.63,790.340208,18968.165,284.12,1551.218333
