In [20]:
import pandas as pd
import calendar as cal
import datetime
import re
from glob import glob
from PIL import Image

## 0. 데이터 가져오기
- 1) 당진시 날씨 예보 데이터
- 2) 당진시 날씨 관측 데이터
- 3) 동서발전 발전소별 발전량 데이터
- 4) sample
- 5) 울산광역시 날씨 예보 데이터
- 6) 울산광역시 날씨 관측 데이터

In [21]:
path = "./data/"
files = sorted(glob(path+"*.csv"))
files

['./data\\dangjin_fcst_data.csv',
 './data\\dangjin_obs_data.csv',
 './data\\energy.csv',
 './data\\sample_submission.csv',
 './data\\site_info.csv',
 './data\\ulsan_fcst_data.csv',
 './data\\ulsan_obs_data.csv']

In [22]:
# 1. 당진 발전소 동네 날씨 정보
dj_fcst = pd.read_csv(files[0])
dj_fcst.head()

# Forecast time: 예보 발표 시점 2018-03-01 11:00:00
# forcast: 예보 시간 11시에 발표해서 forcast 만큼 지난 시간 +4면 15:00:00
# Temperature: 온도 (도씨)
# Humidity: 습도 (%)
# WindSpeed: 풍속 (m/s)
# WindDirection: 풍향
# Cloud: 하늘 상태
# 1-맑음 / 2-구름보통 / 3-구름많음 / 4-흐림


Unnamed: 0,Forecast time,forecast,Temperature,Humidity,WindSpeed,WindDirection,Cloud
0,2018-03-01 11:00:00,4.0,0.0,60.0,7.3,309.0,2.0
1,2018-03-01 11:00:00,7.0,-2.0,60.0,7.1,314.0,1.0
2,2018-03-01 11:00:00,10.0,-2.0,60.0,6.7,323.0,1.0
3,2018-03-01 11:00:00,13.0,-2.0,55.0,6.7,336.0,1.0
4,2018-03-01 11:00:00,16.0,-4.0,55.0,5.5,339.0,1.0


In [23]:
# 2. 당진 발전소 인근 기상 관측 정보
dj_obs = pd.read_csv(files[1])
dj_obs.head()

# 지점: 지점 코드
# 지점명: 관측소 지점 (서산)
# 일시: 관측 시간 Y-M-D H:M
# 기온 , 풍속, 풍향, 습도
# 전운량: 낮을수록 구름이 적다.

Unnamed: 0,지점,지점명,일시,기온(°C),풍속(m/s),풍향(16방위),습도(%),전운량(10분위)
0,129,서산,2018-03-01 00:00,3.1,3.6,340.0,96.0,
1,129,서산,2018-03-01 01:00,2.8,0.7,140.0,97.0,
2,129,서산,2018-03-01 02:00,2.6,3.2,320.0,95.0,
3,129,서산,2018-03-01 03:00,2.0,1.9,230.0,97.0,
4,129,서산,2018-03-01 04:00,2.2,2.1,180.0,97.0,


In [24]:
# 3. 발전소별 발전량
energy = pd.read_csv(files[2])
energy[8:18]

# time: 1시간 단위 계량된 시간 1:00:00 이면 0:00:00 ~ 1:00:00
# dangjin_floating: 당진수상태양광 발전량(KW)
# dangjin_warehouse: 당진자재창고태양광 발전량(KW)
# dangjin: 당진태양광 발전량(KW)
# ulsan: 울산태양광 발전량(KW)


Unnamed: 0,time,dangjin_floating,dangjin_warehouse,dangjin,ulsan
8,2018-03-01 9:00:00,36.0,33.0,37,35
9,2018-03-01 10:00:00,313.0,209.0,318,71
10,2018-03-01 11:00:00,532.0,296.0,490,82
11,2018-03-01 12:00:00,607.0,315.0,550,334
12,2018-03-01 13:00:00,614.0,474.0,727,372
13,2018-03-01 14:00:00,608.0,544.0,733,346
14,2018-03-01 15:00:00,641.0,496.0,672,318
15,2018-03-01 16:00:00,536.0,391.0,546,258
16,2018-03-01 17:00:00,348.0,271.0,364,160
17,2018-03-01 18:00:00,134.0,80.0,110,30


In [25]:
# 4. 예측한 발전량 제출 양식
sample_sub = pd.read_csv(files[3])
sample_sub.head(2)

# time: 지난 한시간동안 발전량 예측
# dangjin_floating: 당진수상태양광 예측 발전량 (KW)
# dangjin_warehouse: 당진자재창고태양광 예측 발전량 (KW)
# ulsan: 울산태양광 예측 발전량

Unnamed: 0,time,dangjin_floating,dangjin_warehouse,dangjin,ulsan
0,2021-02-01 01:00:00,0,0,0,0
1,2021-02-01 02:00:00,0,0,0,0


In [26]:
# 5. 발전소 정보
site = pd.read_csv(files[4])
site

# Id: 발전소 식별자
# Capacity: 발전소 발전용량(MW)
# Address: 주소
# InstallationAngle: 설치각
# IncidentAngle: 입사각
# Latitude: 위도
# Longitude: 경도

Unnamed: 0,Id,Capacity,Address,InstallationAngle,IncidentAngle,Latitude,Longitude
0,당진수상태양광,1.0,충남 당진시 석문면 교로길 30,30.0,30.0,37.050753,126.510299
1,당진자재창고태양광,0.7,충남 당진시 석문면 교로길 30,30.0,30.0,37.050753,126.510299
2,당진태양광,1.0,충남 당진시 석문면 교로길 30,30.0,30.0,37.050753,126.510299
3,울산태양광,0.5,울산광역시 남구 용잠로 623,20.0,20.0,35.477651,129.380778


In [27]:
# 6. 울산 발전소 동네 날씨 정보
us_fcst = pd.read_csv(files[5])
us_fcst.head()

# Forecast time: 예보 발표 시점 2018-03-01 11:00:00
# forcast: 예보 시간 11시에 발표해서 forcast 만큼 지난 시간 +4면 15:00:00
# Temperature: 온도 (도씨)
# Humidity: 습도 (%)
# WindSpeed: 풍속 (m/s)
# WindDirection: 풍향
# Cloud: 하늘 상태
# 1-맑음 / 2-구름보통 / 3-구름많음 / 4-흐림


Unnamed: 0,Forecast time,forecast,Temperature,Humidity,WindSpeed,WindDirection,Cloud
0,2018-03-01 11:00:00,4.0,8.0,20.0,14.0,298.0,2.0
1,2018-03-01 11:00:00,7.0,4.0,20.0,4.3,298.0,2.0
2,2018-03-01 11:00:00,10.0,3.0,30.0,1.9,309.0,2.0
3,2018-03-01 11:00:00,13.0,0.0,40.0,1.5,318.0,2.0
4,2018-03-01 11:00:00,16.0,-1.0,45.0,1.8,308.0,2.0


In [28]:
# 7. 울산 발전소 인근 기상 관측 정보
us_obs = pd.read_csv(files[6])
us_obs.head()

# 지점: 지점 코드
# 지점명: 관측소 지점 (울산)
# 일시: 관측 시간 Y-M-D H:M
# 기온 , 풍속, 풍향, 습도
# 전운량: 낮을수록 구름이 적다.

Unnamed: 0,지점,지점명,일시,기온(°C),풍속(m/s),풍향(16방위),습도(%),전운량(10분위)
0,152,울산,2018-03-01 00:00,8.2,3.9,340.0,98.0,10.0
1,152,울산,2018-03-01 01:00,7.0,4.1,320.0,97.0,10.0
2,152,울산,2018-03-01 02:00,6.5,5.9,290.0,80.0,
3,152,울산,2018-03-01 03:00,6.2,4.6,320.0,79.0,3.0
4,152,울산,2018-03-01 04:00,6.7,4.5,320.0,73.0,1.0


---

## 1. 데이터 정제
### 1) 날씨 정보 컬럼명 변경

In [39]:
# 울산광역시 us_fcst
# 날씨 예보 데이터 컬럼명 변경
# 읽기 쉽게 한글로 변경
us_fcst.rename(columns={us_fcst.columns[0]: "예보발표시간",
us_fcst.columns[1]:"시간", # 예측시각
us_fcst.columns[2]:"기온",
us_fcst.columns[3]:"습도",
us_fcst.columns[4]:"풍속",
us_fcst.columns[5]:"풍향",
us_fcst.columns[6]:"날씨코드"}, inplace=True)
us_fcst.head()


Unnamed: 0,예보발표시간,시간,기온,습도,풍속,풍향,날씨코드
0,2018-03-01 11:00:00,4.0,8.0,20.0,14.0,298.0,2.0
1,2018-03-01 11:00:00,7.0,4.0,20.0,4.3,298.0,2.0
2,2018-03-01 11:00:00,10.0,3.0,30.0,1.9,309.0,2.0
3,2018-03-01 11:00:00,13.0,0.0,40.0,1.5,318.0,2.0
4,2018-03-01 11:00:00,16.0,-1.0,45.0,1.8,308.0,2.0


In [38]:
# 울산광역시 us_obs
# 날씨 관측 데이터 컬럼명 변경
us_obs.rename(columns={us_obs.columns[0]: "지점코드",
us_obs.columns[1]:"지점명",
us_obs.columns[2]:"시간", # 관측시간
us_obs.columns[3]:"기온",
us_obs.columns[4]:"풍속",
us_obs.columns[5]:"풍향",
us_obs.columns[6]:"습도",
us_obs.columns[7]:"전운량"}, inplace=True)
us_obs.head()

Unnamed: 0,지점코드,지점명,시간,기온,풍속,풍향,습도,전운량
0,152,울산,2018-03-01 00:00,8.2,3.9,340.0,98.0,10.0
1,152,울산,2018-03-01 01:00,7.0,4.1,320.0,97.0,10.0
2,152,울산,2018-03-01 02:00,6.5,5.9,290.0,80.0,
3,152,울산,2018-03-01 03:00,6.2,4.6,320.0,79.0,3.0
4,152,울산,2018-03-01 04:00,6.7,4.5,320.0,73.0,1.0


----
### 2) 예보시간 TimeStamp로 나타내기
- 2018-03-01 11시에 관측하여
- 시간 컬럼이 만약에 4라면
- 2018-03-01 15시 의 timestamp 값을 가지도록

In [16]:
# 울산 광역시 us_fcst
# 날씨 예보 데이터 시간 컬럼 -> TimeStamp
us_fcst.head()

Unnamed: 0,예보발표시간,시간,기온,습도,풍속,풍향,날씨코드
0,2018-03-01 11:00:00,4.0,8.0,20.0,14.0,298.0,2.0
1,2018-03-01 11:00:00,7.0,4.0,20.0,4.3,298.0,2.0
2,2018-03-01 11:00:00,10.0,3.0,30.0,1.9,309.0,2.0
3,2018-03-01 11:00:00,13.0,0.0,40.0,1.5,318.0,2.0
4,2018-03-01 11:00:00,16.0,-1.0,45.0,1.8,308.0,2.0


In [33]:
# 예보발표시간 날짜에 해당하는 월의 마지막 일 수 구하기
#  2018 3월 ~ 2021 3월
months = [i for i in range(1, 13)]
lastDays = {2018: [], 2019: [], 2020: [], 2021: []}

days = []
for year in lastDays:
    for month in months:
        lastDays[year].append(cal.monthrange(year, month)[1])

print(lastDays)

{2018: [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], 2019: [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], 2020: [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], 2021: [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]}


In [39]:
# 울산광역시 us_fcst
# 반복문을 사용하여 timestamp로 변경

# # for i in range(162147, 162152):
# # for i in range(153414, 153417):
# for i in range(162208):
#     announceTime = us_fcst["예보발표시간"][i]
#     predictTime = us_fcst["시간"][i]
#     annTS = pd.Timestamp(announceTime) # 예보발표시간 to TimeStamp

#     year = annTS.year
#     month = annTS.month
#     day = annTS.day
#     hour = annTS.hour

#     preHour = int(hour + predictTime) # 예보시간의 시간 구하기
#     # print(year, month, day, hour)
#     # print("preHour:", preHour)

#     plusDay = preHour // 24 # 더해지는 일수
#     hour = preHour % 24 # 나머지 시간
#     # print("plusDay:", plusDay,"hour:", hour)

#     if(plusDay > 0):
#         day += plusDay # 해당 일 + 더해지는 일 수

#         # 만약 month가 1, 3, 5, 7, 8, 10, 12 라면: 31일
#         if(month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12):
#             if(day > 31):
#                 month += 1
#                 day %= 31
#                 if(month> 12):
#                     year += 1
#                     month = 1
#             # print("month:", month,"day:", day)
#         # 만약 month가 2, 4, 6, 9, 11 이라면: 31일 X
#         else:
#             lastDay = lastDays[year][month-1]
#             if(day > lastDay):
#                 month += 1
#                 day %= lastDay
#                 if(month> 12):
#                     year += 1
#                     month = 1
#             # print("month:", month,"day:", day)

#         preTS = pd.Timestamp(year, month, day, hour)
#         preTS = datetime.datetime.strftime(preTS, '%Y-%m-%d %H:%M')
#         # print(preTS)
#         us_fcst['시간'][i] = preTS
#     else:
#         preTS = pd.Timestamp(year, month, day, hour)
#         preTS = datetime.datetime.strftime(preTS, '%Y-%m-%d %H:%M')
#         # print(preTS)
#         us_fcst['시간'][i] = preTS

#     # print(us_fcst['예보시간'][i])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_fcst['시간'][i] = preTS
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_fcst['시간'][i] = preTS


In [40]:
# 시간 컬럼 -> TimeStamp 변경 결과
us_fcst.head()

Unnamed: 0,예보발표시간,시간,기온,습도,풍속,풍향,날씨코드
0,2018-03-01 11:00:00,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0
1,2018-03-01 11:00:00,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0
2,2018-03-01 11:00:00,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0
3,2018-03-01 11:00:00,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0
4,2018-03-01 11:00:00,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0


In [41]:
# 변경된 데이터 파일로 저장하기
# # 파일로 변환
# us_fcst.to_csv('ulsan_fcst_convert.csv', mode='w', encoding='utf-8-sig', index=False)

In [40]:
# 저장된 파일 가져오기
us_fcst_convert = pd.read_csv('./ulsan_fcst_convert.csv')
us_fcst_convert.head()

Unnamed: 0,예보발표시간,시간,기온,습도,풍속,풍향,날씨코드
0,2018-03-01 11:00:00,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0
1,2018-03-01 11:00:00,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0
2,2018-03-01 11:00:00,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0
3,2018-03-01 11:00:00,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0
4,2018-03-01 11:00:00,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0


------
### 3) 날씨 예보 데이터 & 날씨 관측 데이터 Merge
- 필요없는 컬럼명 drop

In [21]:
# merge_df = pd.merge(us_fcst_convert, us_obs, on="시간")
# merge_df.head()
df1 = pd.DataFrame(us_obs.loc[15])
df2 = pd.DataFrame(us_fcst_convert.loc[0])
df3 = pd.DataFrame({'시간': df1.loc['시간'], '기온':1})
df4 = pd.DataFrame({'시간': df2.loc['시간'], '기온':3})
# merge_df = pd.merge(df1, df2, on='시간')
time1 = df3['시간']
time2 = df4['시간']

time3 = df3['시간']
print(time1)
print(time2)

15    2018-03-01 15:00
Name: 시간, dtype: object
0    2018-03-01 00:00
Name: 시간, dtype: object


In [22]:
# # 시간 컬럼을 기준으로 merge 한다.
# merge_us = pd.merge(us_fcst_convert, us_obs, on='시간')
# merge_us.head()

Unnamed: 0,예보발표시간,시간,기온_x,습도_x,풍속_x,풍향_x,날씨코드,지점코드,지점명,기온_y,풍속_y,풍향_y,습도_y,전운량
0,2018-03-01 11:00:00,2018-03-01 00:00,8.0,20.0,14.0,298.0,2.0,152,울산,8.2,3.9,340.0,98.0,10.0
1,2018-03-01 11:00:00,2018-03-01 00:00,4.0,20.0,4.3,298.0,2.0,152,울산,8.2,3.9,340.0,98.0,10.0
2,2018-03-01 11:00:00,2018-03-01 00:00,3.0,30.0,1.9,309.0,2.0,152,울산,8.2,3.9,340.0,98.0,10.0
3,2018-03-01 14:00:00,2018-03-01 00:00,4.0,20.0,4.3,298.0,2.0,152,울산,8.2,3.9,340.0,98.0,10.0
4,2018-03-01 14:00:00,2018-03-01 00:00,3.0,30.0,1.9,309.0,2.0,152,울산,8.2,3.9,340.0,98.0,10.0


In [41]:
# 울산광역시 us_obs
# 날씨 관측 데이터 필요없는 컬럼 삭제
# 지점코드, 지점명 삭제
us_obs_drop = us_obs.drop(['지점코드', '지점명'], axis=1)
us_obs_drop.head()

Unnamed: 0,시간,기온,풍속,풍향,습도,전운량
0,2018-03-01 00:00,8.2,3.9,340.0,98.0,10.0
1,2018-03-01 01:00,7.0,4.1,320.0,97.0,10.0
2,2018-03-01 02:00,6.5,5.9,290.0,80.0,
3,2018-03-01 03:00,6.2,4.6,320.0,79.0,3.0
4,2018-03-01 04:00,6.7,4.5,320.0,73.0,1.0


In [42]:
# 울산광역시 us_fcst
# 날씨 관측 데이터 필요없는 컬럼명 삭제
# 예보발표시간
us_fcst_drop = us_fcst_convert.drop(['예보발표시간'], axis=1)
us_fcst_drop.head()

Unnamed: 0,시간,기온,습도,풍속,풍향,날씨코드
0,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0
1,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0
2,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0
3,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0
4,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0


In [43]:
# 울산광역시 us_fcst, us_obs
# 예보 데이터 , 관측 데이터 시간 컬럼을 기준으로 Merge
# _x : 예보 데이터
# _y : 관측 데이터
merge_us = pd.merge(us_fcst_drop, us_obs_drop, how='left', left_on='시간', right_on='시간')
merge_us.head()

Unnamed: 0,시간,기온_x,습도_x,풍속_x,풍향_x,날씨코드,기온_y,풍속_y,풍향_y,습도_y,전운량
0,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0,9.9,7.2,290.0,19.0,5.0
1,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0,6.6,2.3,360.0,19.0,2.0
2,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0,4.0,2.8,340.0,25.0,8.0
3,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0,1.3,3.4,340.0,37.0,7.0
4,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0,-0.9,2.0,20.0,48.0,9.0


In [44]:
# merge한 데이터 컬럼명 변경
# 예측 / 관측 컬럼
merge_us = merge_us.rename(columns={merge_us.columns[1]:"예측기온", merge_us.columns[2]:"예측습도",merge_us.columns[3]:"예측풍속",
merge_us.columns[4]:"예측풍향",
merge_us.columns[6]:"관측기온",merge_us.columns[7]:"관측풍속",
merge_us.columns[8]:"관측풍향",merge_us.columns[9]:"관측습도",})
merge_us

Unnamed: 0,시간,예측기온,예측습도,예측풍속,예측풍향,날씨코드,관측기온,관측풍속,관측풍향,관측습도,전운량
0,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0,9.9,7.2,290.0,19.0,5.0
1,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0,6.6,2.3,360.0,19.0,2.0
2,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0,4.0,2.8,340.0,25.0,8.0
3,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0,1.3,3.4,340.0,37.0,7.0
4,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0,-0.9,2.0,20.0,48.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...
162203,2021-03-03 12:00,9.0,45.0,3.1,83.0,3.0,,,,,
162204,2021-03-03 15:00,9.0,45.0,3.0,111.0,3.0,,,,,
162205,2021-03-03 18:00,8.0,55.0,2.2,122.0,3.0,,,,,
162206,2021-03-03 21:00,6.0,65.0,0.9,131.0,3.0,,,,,


In [45]:
# merge한 데이터 NaN 값 삭제
# 행 기준
merge_us = merge_us.dropna(axis=0)
merge_us

Unnamed: 0,시간,예측기온,예측습도,예측풍속,예측풍향,날씨코드,관측기온,관측풍속,관측풍향,관측습도,전운량
0,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0,9.9,7.2,290.0,19.0,5.0
1,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0,6.6,2.3,360.0,19.0,2.0
2,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0,4.0,2.8,340.0,25.0,8.0
3,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0,1.3,3.4,340.0,37.0,7.0
4,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0,-0.9,2.0,20.0,48.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...
157917,2021-01-31 18:00,9.0,50.0,2.5,191.0,3.0,9.3,3.4,180.0,43.0,0.0
157918,2021-01-31 21:00,8.0,65.0,2.2,219.0,4.0,8.4,2.4,230.0,51.0,7.0
157936,2021-01-31 18:00,9.0,50.0,2.5,191.0,3.0,9.3,3.4,180.0,43.0,0.0
157937,2021-01-31 21:00,8.0,65.0,2.2,219.0,4.0,8.4,2.4,230.0,51.0,7.0


---
### 4) 에너지 데이터 가공
- 울산 데이터

In [61]:
# # 울산광역시 us_fcst, us_obs
# # 병합한 예보 / 관측 데이터 파일로 저장
# merge_us_test.to_csv('predict_observe.csv', mode='w', encoding='utf-8-sig', index=False)

In [55]:
# 동서발전 발전소별 발전량 데이터 energy
# 울산 데이터만 뽑기
energy_us = energy.drop(['dangjin_floating', 'dangjin_warehouse', 'dangjin'], axis=1)
energy_us = energy_us.rename(columns={energy_us.columns[0]:'시간',energy_us.columns[1]:'울산발전량'}, )
energy_us.head()


Unnamed: 0,시간,울산발전량
0,2018-03-01 1:00:00,0
1,2018-03-01 2:00:00,0
2,2018-03-01 3:00:00,0
3,2018-03-01 4:00:00,0
4,2018-03-01 5:00:00,0


In [56]:
# 에너지 데이터 시간 변경
# 타임스탬프 변경
# 24:00:00 시 변경해야함 그러려면
for i in range(len(energy_us)):
    energy_us['시간'][i] = re.sub("24:00:00", "00:00:00", energy_us['시간'][i])
    time = pd.Timestamp(energy_us['시간'][i])
    time = datetime.datetime.strftime(time, '%Y-%m-%d %H:%M')
    energy_us['시간'][i] = time

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  energy_us['시간'][i] = re.sub("24:00:00", "00:00:00", energy_us['시간'][i])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  energy_us['시간'][i] = time


In [57]:
# 데이터 변경 확인
energy_us.loc[22:26]

Unnamed: 0,시간,울산발전량
22,2018-03-01 23:00,0
23,2018-03-01 00:00,0
24,2018-03-02 01:00,0
25,2018-03-02 02:00,0
26,2018-03-02 03:00,0


In [59]:
# 발전량이 0인 데이터 삭제
energy_us = energy_us[energy_us['울산발전량'] != 0]
energy_us.head()

Unnamed: 0,시간,울산발전량
7,2018-03-01 08:00,4
8,2018-03-01 09:00,35
9,2018-03-01 10:00,71
10,2018-03-01 11:00,82
11,2018-03-01 12:00,334


In [159]:
# import re
# data = "2021-04-01 24:00:00"
# # p = re.compile('(\d{2}):(\d{2}):(\d{2})')
# # d = p.sub("\g<1> 00:00:00", data)
# # s = p.search(data)
# d = re.sub('24:00:00', '00:00:00', data)
# d

'2021-04-01 00:00:00'

---
### 5) energy 데이터와 병합

In [54]:
pre_obs = pd.read_csv('./predict_observe.csv')
pre_obs.head()
# merge_us = pd.merge(us_fcst_drop, us_obs_drop, how='left', left_on='시간', right_on='시간')
# merge_us.head()

Unnamed: 0,시간,예측기온,예측습도,예측풍속,예측풍향,날씨코드,관측기온,관측풍속,관측풍향,관측습도,전운량
0,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0,9.9,7.2,290.0,19.0,5.0
1,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0,6.6,2.3,360.0,19.0,2.0
2,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0,4.0,2.8,340.0,25.0,8.0
3,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0,1.3,3.4,340.0,37.0,7.0
4,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0,-0.9,2.0,20.0,48.0,9.0


In [60]:
# 시간 별 날씨와 발전량
wheather_energy = pd.merge(pre_obs, energy_us, how='left', left_on='시간', right_on='시간')
wheather_energy

Unnamed: 0,시간,예측기온,예측습도,예측풍속,예측풍향,날씨코드,관측기온,관측풍속,관측풍향,관측습도,전운량,울산발전량
0,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0,9.9,7.2,290.0,19.0,5.0,318.0
1,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0,6.6,2.3,360.0,19.0,2.0,30.0
2,2018-03-01 21:00,3.0,30.0,1.9,309.0,2.0,4.0,2.8,340.0,25.0,8.0,
3,2018-03-02 00:00,0.0,40.0,1.5,318.0,2.0,1.3,3.4,340.0,37.0,7.0,
4,2018-03-02 03:00,-1.0,45.0,1.8,308.0,2.0,-0.9,2.0,20.0,48.0,9.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
157760,2021-01-31 18:00,9.0,50.0,2.5,191.0,3.0,9.3,3.4,180.0,43.0,0.0,8.0
157761,2021-01-31 21:00,8.0,65.0,2.2,219.0,4.0,8.4,2.4,230.0,51.0,7.0,
157762,2021-01-31 18:00,9.0,50.0,2.5,191.0,3.0,9.3,3.4,180.0,43.0,0.0,8.0
157763,2021-01-31 21:00,8.0,65.0,2.2,219.0,4.0,8.4,2.4,230.0,51.0,7.0,


In [62]:
# NaN 값 삭제
wheather_energy = wheather_energy.dropna(axis=0)
wheather_energy

Unnamed: 0,시간,예측기온,예측습도,예측풍속,예측풍향,날씨코드,관측기온,관측풍속,관측풍향,관측습도,전운량,울산발전량
0,2018-03-01 15:00,8.0,20.0,14.0,298.0,2.0,9.9,7.2,290.0,19.0,5.0,318.0
1,2018-03-01 18:00,4.0,20.0,4.3,298.0,2.0,6.6,2.3,360.0,19.0,2.0,30.0
6,2018-03-02 09:00,0.0,35.0,1.9,321.0,1.0,-0.1,2.7,340.0,46.0,1.0,102.0
7,2018-03-02 12:00,6.0,30.0,1.4,324.0,1.0,6.2,3.0,230.0,28.0,0.0,278.0
8,2018-03-02 15:00,9.0,30.0,0.8,40.0,1.0,8.7,3.8,180.0,50.0,0.0,319.0
...,...,...,...,...,...,...,...,...,...,...,...,...
157756,2021-01-31 15:00,12.0,30.0,3.7,175.0,3.0,10.6,3.9,140.0,39.0,0.0,259.0
157757,2021-01-31 18:00,9.0,50.0,2.5,191.0,3.0,9.3,3.4,180.0,43.0,0.0,8.0
157759,2021-01-31 15:00,12.0,30.0,3.7,175.0,3.0,10.6,3.9,140.0,39.0,0.0,259.0
157760,2021-01-31 18:00,9.0,50.0,2.5,191.0,3.0,9.3,3.4,180.0,43.0,0.0,8.0


In [63]:
wheather_energy.to_csv('날씨별발전량_울산.csv', mode='w', encoding='utf-8-sig', index=False)