# Data & Library

In [None]:
!pip install bayesian-optimization
import numpy as np
import pandas as pd
import random
import os
import glob
import warnings
warnings.filterwarnings("ignore")

from lightgbm import LGBMRegressor
#from bayes_opt import BayesianOptimization

from tqdm.notebook import tqdm

Collecting bayesian-optimization
  Downloading https://files.pythonhosted.org/packages/bb/7a/fd8059a3881d3ab37ac8f72f56b73937a14e8bb14a9733e68cc8b17dbe3c/bayesian-optimization-1.2.0.tar.gz
Building wheels for collected packages: bayesian-optimization
  Building wheel for bayesian-optimization (setup.py) ... [?25l[?25hdone
  Created wheel for bayesian-optimization: filename=bayesian_optimization-1.2.0-cp36-none-any.whl size=11685 sha256=9359f16ea50e5971ee8231023a207f67468c4c8c35bcd4fb6aebca3b3803fede
  Stored in directory: /root/.cache/pip/wheels/5a/56/ae/e0e3c1fc1954dc3ec712e2df547235ed072b448094d8f94aec
Successfully built bayesian-optimization
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.2.0


In [None]:
train = pd.read_csv('/content/drive/MyDrive/DACON/태양광/train.csv')
print(train.shape)

(52560, 9)


In [None]:
test_files = []
import re
numbers = re.compile(r'(\d+)')
def numericalSort(value):
       parts = numbers.split(value)
       parts[1::2] = map(int, parts[1::2])
       return parts

for infile in sorted(glob.glob('/content/drive/MyDrive/DACON/태양광/test/*csv'), key=numericalSort):
  test_files.append(infile)

In [None]:
test_files

['/content/drive/MyDrive/DACON/태양광/test/0.csv',
 '/content/drive/MyDrive/DACON/태양광/test/1.csv',
 '/content/drive/MyDrive/DACON/태양광/test/2.csv',
 '/content/drive/MyDrive/DACON/태양광/test/3.csv',
 '/content/drive/MyDrive/DACON/태양광/test/4.csv',
 '/content/drive/MyDrive/DACON/태양광/test/5.csv',
 '/content/drive/MyDrive/DACON/태양광/test/6.csv',
 '/content/drive/MyDrive/DACON/태양광/test/7.csv',
 '/content/drive/MyDrive/DACON/태양광/test/8.csv',
 '/content/drive/MyDrive/DACON/태양광/test/9.csv',
 '/content/drive/MyDrive/DACON/태양광/test/10.csv',
 '/content/drive/MyDrive/DACON/태양광/test/11.csv',
 '/content/drive/MyDrive/DACON/태양광/test/12.csv',
 '/content/drive/MyDrive/DACON/태양광/test/13.csv',
 '/content/drive/MyDrive/DACON/태양광/test/14.csv',
 '/content/drive/MyDrive/DACON/태양광/test/15.csv',
 '/content/drive/MyDrive/DACON/태양광/test/16.csv',
 '/content/drive/MyDrive/DACON/태양광/test/17.csv',
 '/content/drive/MyDrive/DACON/태양과

In [None]:
test0 = pd.read_csv(test_files[0])
test = []
test = pd.DataFrame(columns=test0.columns)

for i in range(0,len(test_files)):
  test = pd.concat([test, pd.read_csv(test_files[i])])

In [None]:
test.shape

(27216, 9)

# Feature Engineering

DHI+DNI 변수 생성

In [None]:
train['DHI_DNI']=train['DHI']+train['DNI']
test['DHI_DNI']=test['DHI']+test['DNI']

태양의 유무(binary variable) 변수 생성


---
* TARGET, DHI, DNI 모두 태양의 유무와 직접적인 관계를 지닌다고 판단해서 3개 변수에 대해 생성
* 변수명은 'Sun_{}'으로 통일

In [None]:
# 태양의 유무 변수 생성
train['Sun_TARGET'] = train['TARGET']
train['Sun_DHI'] = train['DHI']
train['Sun_DNI'] = train['DNI']
train['Sun_DHI_DNI'] = train['DHI_DNI']

test['Sun_TARGET'] = test['TARGET']
test['Sun_DHI'] = test['DHI']
test['Sun_DNI'] = test['DNI']
test['Sun_DHI_DNI'] = test['DHI_DNI']


def SUN(x): # 태양의 유무 결정하는 함수
  if x == 0:
    return 0
  else:
    return 1

# apply
train['Sun_TARGET'] = train['Sun_TARGET'].apply(SUN)
train['Sun_DHI'] = train['Sun_DHI'].apply(SUN)
train['Sun_DNI'] = train['Sun_DNI'].apply(SUN)
train['Sun_DHI_DNI'] = train['Sun_DHI_DNI'].apply(SUN)

test['Sun_TARGET'] = test['Sun_TARGET'].apply(SUN)
test['Sun_DHI'] = test['Sun_DHI'].apply(SUN)
test['Sun_DNI'] = test['Sun_DNI'].apply(SUN)
test['Sun_DHI_DNI'] = test['Sun_DHI_DNI'].apply(SUN)

해가 떠 있는 시간에 대한 변수 생성


---
* TARGET, DHI, DNI 모두 해가 떠 있는 시간 직접적인 관계를 지닌다고 판단해서 3개 변수에 대해 생성
* 변수명은 'Sun_{}_hour'로 통일
* 변수명은 hour이지만 실제 단위는 30min

In [None]:
# Test셋에 'Sun_{}_hour' 변수를 생성하기 위해 임시로 Day_temp 컬럼 추가
test = test.reset_index(drop=True)
day_temp = train['Day'][:567*48]
day_temp = pd.DataFrame(day_temp)
day_temp.columns = ['Day_temp']
test = pd.concat([test, day_temp],axis=1)

train['Sun_TARGET_hour'] = train['Sun_TARGET']
train['Sun_DHI_hour'] = train['Sun_DHI']
train['Sun_DNI_hour'] = train['Sun_DNI']
train['Sun_DHI_DNI_hour'] = train['Sun_DHI_DNI']

for i in range(1095):
  train.loc[train.Day==i,'Sun_TARGET_hour']=train.loc[train.Day==i,'Sun_TARGET'].sum()

for i in range(1095):
  train.loc[train.Day==i,'Sun_DHI_hour']=train.loc[train.Day==i,'Sun_DHI'].sum()

for i in range(1095):
  train.loc[train.Day==i,'Sun_DNI_hour']=train.loc[train.Day==i,'Sun_DNI'].sum()

for i in range(1095):
  train.loc[train.Day==i,'Sun_DHI_DNI_hour']=train.loc[train.Day==i,'Sun_DHI_DNI'].sum()



test['Sun_TARGET_hour'] = test['Sun_TARGET']
test['Sun_DHI_hour'] = test['Sun_DHI']
test['Sun_DNI_hour'] = test['Sun_DNI']
test['Sun_DHI_DNI_hour'] = test['Sun_DHI_DNI']


for i in range(567):
  test.loc[test.Day_temp==i,'Sun_TARGET_hour']=test.loc[test.Day_temp==i,'Sun_TARGET'].sum()

for i in range(567):
  test.loc[test.Day_temp==i,'Sun_DHI_hour']=test.loc[test.Day_temp==i,'Sun_DHI'].sum()

for i in range(567):
  test.loc[test.Day_temp==i,'Sun_DNI_hour']=test.loc[test.Day_temp==i,'Sun_DNI'].sum()

for i in range(567):
  test.loc[test.Day_temp==i,'Sun_DHI_DNI_hour']=test.loc[test.Day_temp==i,'Sun_DHI_DNI'].sum()

DNI_DHI_plus 변수 생성

---
* DHI와 DNI이 0이 아닌 행의 개수로 누적으로 해가 떠있는 시간을 구하고 싶었으나
결측치 같이 중간에 빈값이 존재하여, 두 값들을 더하여 누적 시간을 구하기로함
* TARGET이 0이 아닐때 DHI,DNI 두 값이 동시에 0인 경우는 없음.

* 정리: 1. DHI_DNI의 이름을 가진 변수 : 처음부터 두 값을 합쳐서 산출한 값
*       2. DHI_DNI_plus의 이름을 가진 변수 : DHI와 DNI가 0이 아닌 여부를 따로 구하여 이 여부의 값을 합쳐 만든 변수

(Sun_DHI가 1이고 Sun_DNI가 1인 경우, 1번 변수(Sun_DHI_DNI)는 1의 값을 가지지만,
2번 변수(Sun_DHI_DNI_plus)는 2의 값을 가짐)

(따라서 2번 변수는 각각의 DHI와 DNI의 상태를 반영)

In [None]:
# 해떠있는 여부의 합
train['Sun_DHI_DNI_plus']=train['Sun_DHI']+train['Sun_DNI']
test['Sun_DHI_DNI_plus']=test['Sun_DHI']+test['Sun_DNI']

# 하루동안 해떠있는 시간의 합
train['Sun_DHI_DNI_plus_hour']= train.Sun_DHI_hour + train.Sun_DNI_hour
test['Sun_DHI_DNI_plus_hour']= test.Sun_DHI_hour + test.Sun_DNI_hour

해당 시점이 하루 중 해가 떠있는 몇 시간째인가

In [None]:
# 해당시점이 해가 몇 시간째 떠 있는지 변수 생성 train
for i in tqdm(train.Day.unique()):
  for j in ['TARGET','DNI','DHI','DHI_DNI','DHI_DNI_plus']:
    a=train[train.Day==i]
    for k in a.index:
      if a.loc[k,'Sun_'+str(j)]==0:
        train.loc[k,'Sun_'+str(j)+'_accumulate']=0
      else:
        train.loc[k,'Sun_'+str(j)+'_accumulate']=sum(a.loc[:k,'Sun_'+str(j)])

HBox(children=(FloatProgress(value=0.0, max=1095.0), HTML(value='')))




In [None]:
# 해당시점이 해가 몇 시간째 떠 있는지 변수 생성 test
for i in tqdm(test.Day_temp.unique()):
  for j in ['TARGET','DNI','DHI','DHI_DNI','DHI_DNI_plus']:
    a=test[test.Day_temp==i]
    for k in a.index:
      if a.loc[k,'Sun_'+str(j)]==0:
        test.loc[k,'Sun_'+str(j)+'_accumulate']=0
      else:
        test.loc[k,'Sun_'+str(j)+'_accumulate']=sum(a.loc[:k,'Sun_'+str(j)])

HBox(children=(FloatProgress(value=0.0, max=567.0), HTML(value='')))




해당 시점이 하루 중 해가 떠있는 몇 시간째인가/하루 전체 동안 해가 떠있는 시간


---
*  해당 시점에 같은 시간 해가 떠있다고 해도 여름에 해가 3시간째 떠있을 때의 발전량과 겨울에 3시간째 떠있을 때의 발전량이 다르다고 판단
*  따라서 이러한 차이를 고려하기 위해 하루 해떠있는 시간을 100이라 했을 떄, 해당 시점이 전체 해뜬 시간의 몇 퍼센트에 위치하는지 파생 변수 생성

In [None]:
train.columns

Index(['Day', 'Hour', 'Minute', 'DHI', 'DNI', 'WS', 'RH', 'T', 'TARGET',
       'DHI_DNI', 'Sun_TARGET', 'Sun_DHI', 'Sun_DNI', 'Sun_DHI_DNI',
       'Sun_TARGET_hour', 'Sun_DHI_hour', 'Sun_DNI_hour', 'Sun_DHI_DNI_hour',
       'Sun_DHI_DNI_plus', 'Sun_DHI_DNI_plus_hour', 'Sun_TARGET_accumulate',
       'Sun_DNI_accumulate', 'Sun_DHI_accumulate', 'Sun_DHI_DNI_accumulate',
       'Sun_DHI_DNI_plus_accumulate'],
      dtype='object')

In [None]:
for i in tqdm(range(len(train))):
  for j in ['TARGET','DNI','DHI','DHI_DNI','DHI_DNI_plus']:
    if train.loc[i,"Sun_"+str(j)+"_accumulate"]==0:
      train.loc[i,"Sun_"+str(j)+"_per"]=0
    else:
      train.loc[i,"Sun_"+str(j)+"_per"] = (train.loc[i,"Sun_"+str(j)+"_accumulate"]/train.loc[i,"Sun_"+str(j)+"_hour"])*100

HBox(children=(FloatProgress(value=0.0, max=52560.0), HTML(value='')))




In [None]:
for i in tqdm(range(len(test))):
  for j in ['TARGET','DNI','DHI','DHI_DNI','DHI_DNI_plus']:
    if test.loc[i,"Sun_"+str(j)+"_accumulate"]==0:
      test.loc[i,"Sun_"+str(j)+"_per"]=0
    else:
      test.loc[i,"Sun_"+str(j)+"_per"] = (test.loc[i,"Sun_"+str(j)+"_accumulate"]/test.loc[i,"Sun_"+str(j)+"_hour"])*100

HBox(children=(FloatProgress(value=0.0, max=27216.0), HTML(value='')))




In [None]:
del test['Day_temp']

# 재구조화

* 하나의 row에 7일 + 2일(TARGET)을 넣기 위해 데이터 재구조화

In [None]:
train_df=[]
train_df=pd.DataFrame()

for i in tqdm(train.columns[3:]):
  for j in tqdm(range(len(train)-8*48)):
    for n in range(9):
      if n==7 or n==8:
        if i=='TARGET':
          train_df.loc[j,i+'_day'+str(n)]=train.loc[j+48*n,i]
        else:
          pass
      else:
        train_df.loc[j,i+'_day'+str(n)]=train.loc[j+48*n,i]

HBox(children=(FloatProgress(value=0.0, max=27.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52176.0), HTML(value='')))





In [None]:
temp_Hour = train['Hour']*60+train['Minute']
temp_Hour = pd.DataFrame(temp_Hour,columns=['time'])
temp_Hour = temp_Hour.iloc[:len(train_df),:]
train_df = pd.concat([temp_Hour, train_df],axis=1)
train_df.iloc[:48]

Unnamed: 0,time,DHI_day0,DHI_day1,DHI_day2,DHI_day3,DHI_day4,DHI_day5,DHI_day6,DNI_day0,DNI_day1,DNI_day2,DNI_day3,DNI_day4,DNI_day5,DNI_day6,WS_day0,WS_day1,WS_day2,WS_day3,WS_day4,WS_day5,WS_day6,RH_day0,RH_day1,RH_day2,RH_day3,RH_day4,RH_day5,RH_day6,T_day0,T_day1,T_day2,T_day3,T_day4,T_day5,T_day6,TARGET_day0,TARGET_day1,TARGET_day2,TARGET_day3,...,Sun_DHI_DNI_plus_accumulate_day2,Sun_DHI_DNI_plus_accumulate_day3,Sun_DHI_DNI_plus_accumulate_day4,Sun_DHI_DNI_plus_accumulate_day5,Sun_DHI_DNI_plus_accumulate_day6,Sun_TARGET_per_day0,Sun_TARGET_per_day1,Sun_TARGET_per_day2,Sun_TARGET_per_day3,Sun_TARGET_per_day4,Sun_TARGET_per_day5,Sun_TARGET_per_day6,Sun_DNI_per_day0,Sun_DNI_per_day1,Sun_DNI_per_day2,Sun_DNI_per_day3,Sun_DNI_per_day4,Sun_DNI_per_day5,Sun_DNI_per_day6,Sun_DHI_per_day0,Sun_DHI_per_day1,Sun_DHI_per_day2,Sun_DHI_per_day3,Sun_DHI_per_day4,Sun_DHI_per_day5,Sun_DHI_per_day6,Sun_DHI_DNI_per_day0,Sun_DHI_DNI_per_day1,Sun_DHI_DNI_per_day2,Sun_DHI_DNI_per_day3,Sun_DHI_DNI_per_day4,Sun_DHI_DNI_per_day5,Sun_DHI_DNI_per_day6,Sun_DHI_DNI_plus_per_day0,Sun_DHI_DNI_plus_per_day1,Sun_DHI_DNI_plus_per_day2,Sun_DHI_DNI_plus_per_day3,Sun_DHI_DNI_plus_per_day4,Sun_DHI_DNI_plus_per_day5,Sun_DHI_DNI_plus_per_day6
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5,1.6,2.2,1.8,2.1,1.5,1.9,69.08,90.66,73.8,77.76,72.1,77.41,86.51,-12.0,-10.0,-8.0,-14.0,-5.0,1.0,-2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5,1.6,2.1,2.0,2.0,1.7,1.8,69.06,90.68,68.2,77.77,72.1,77.42,86.54,-12.0,-10.0,-8.0,-13.0,-5.0,1.0,-2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,60,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.6,1.6,2.1,2.2,2.0,1.9,1.7,71.78,88.11,69.06,77.69,72.14,76.77,85.72,-12.0,-11.0,-8.0,-13.0,-5.0,1.0,-3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.6,1.6,2.1,2.2,2.0,2.0,1.4,71.75,88.11,69.04,71.57,72.11,76.77,85.73,-12.0,-11.0,-8.0,-13.0,-5.0,1.0,-3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,120,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.6,1.6,2.2,2.2,2.0,2.2,1.1,75.2,90.85,69.38,72.62,72.94,76.39,87.04,-12.0,-11.0,-8.0,-13.0,-5.0,1.0,-4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,150,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5,1.6,2.2,2.2,2.0,2.2,1.0,69.29,90.84,69.36,72.61,72.92,76.37,87.04,-11.0,-11.0,-7.0,-12.0,-5.0,0.0,-4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5,1.7,2.2,2.2,2.0,2.3,0.9,72.56,93.78,70.03,73.02,74.04,76.21,90.47,-11.0,-12.0,-7.0,-12.0,-5.0,0.0,-4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.4,1.7,2.2,2.2,2.0,2.4,0.9,72.55,93.77,70.02,73.01,68.57,76.2,90.47,-11.0,-12.0,-7.0,-12.0,-5.0,0.0,-4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,240,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.3,1.7,2.3,2.3,2.0,2.5,0.9,74.62,90.46,72.18,72.77,70.64,81.63,87.82,-11.0,-12.0,-7.0,-12.0,-5.0,0.0,-4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,270,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.3,1.6,2.4,2.2,2.1,2.6,0.9,74.61,90.46,66.78,72.77,70.63,81.64,87.84,-11.0,-12.0,-7.0,-12.0,-4.0,0.0,-4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
test_df=[]
test_df=pd.DataFrame()

for i in tqdm(test.columns[3:]):
  for b, j in tqdm(enumerate(range(81))):
    a=test.iloc[j*48*7:(j+1)*48*7,:]
    a.index=range(len(a))
    for c,n in enumerate(range(48)):
      for d, m in enumerate(range(7)):
        test_df.loc[b*48+c,i+'_day'+str(m)]=a.loc[n+48*m,i]

HBox(children=(FloatProgress(value=0.0, max=27.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))





In [None]:
test_temp_Hour = test['Hour']*60+test['Minute']
test_temp_Hour = pd.DataFrame(test_temp_Hour,columns=['time'])
test_temp_Hour = test_temp_Hour.iloc[:len(test_df),:]
test_df = pd.concat([test_temp_Hour, test_df],axis=1)
test_df

Unnamed: 0,time,DHI_day0,DHI_day1,DHI_day2,DHI_day3,DHI_day4,DHI_day5,DHI_day6,DNI_day0,DNI_day1,DNI_day2,DNI_day3,DNI_day4,DNI_day5,DNI_day6,WS_day0,WS_day1,WS_day2,WS_day3,WS_day4,WS_day5,WS_day6,RH_day0,RH_day1,RH_day2,RH_day3,RH_day4,RH_day5,RH_day6,T_day0,T_day1,T_day2,T_day3,T_day4,T_day5,T_day6,TARGET_day0,TARGET_day1,TARGET_day2,TARGET_day3,...,Sun_DHI_DNI_plus_accumulate_day2,Sun_DHI_DNI_plus_accumulate_day3,Sun_DHI_DNI_plus_accumulate_day4,Sun_DHI_DNI_plus_accumulate_day5,Sun_DHI_DNI_plus_accumulate_day6,Sun_TARGET_per_day0,Sun_TARGET_per_day1,Sun_TARGET_per_day2,Sun_TARGET_per_day3,Sun_TARGET_per_day4,Sun_TARGET_per_day5,Sun_TARGET_per_day6,Sun_DNI_per_day0,Sun_DNI_per_day1,Sun_DNI_per_day2,Sun_DNI_per_day3,Sun_DNI_per_day4,Sun_DNI_per_day5,Sun_DNI_per_day6,Sun_DHI_per_day0,Sun_DHI_per_day1,Sun_DHI_per_day2,Sun_DHI_per_day3,Sun_DHI_per_day4,Sun_DHI_per_day5,Sun_DHI_per_day6,Sun_DHI_DNI_per_day0,Sun_DHI_DNI_per_day1,Sun_DHI_DNI_per_day2,Sun_DHI_DNI_per_day3,Sun_DHI_DNI_per_day4,Sun_DHI_DNI_per_day5,Sun_DHI_DNI_per_day6,Sun_DHI_DNI_plus_per_day0,Sun_DHI_DNI_plus_per_day1,Sun_DHI_DNI_plus_per_day2,Sun_DHI_DNI_plus_per_day3,Sun_DHI_DNI_plus_per_day4,Sun_DHI_DNI_plus_per_day5,Sun_DHI_DNI_plus_per_day6
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.7,1.7,1.3,1.1,2.4,2.1,0.8,34.42,26.93,40.27,59.09,57.25,52.83,80.92,0.0,3.6,3.1,0.1,-6.0,-4.4,-2.8,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.7,1.7,1.3,1.2,2.4,2.0,0.9,34.17,27.12,40.55,61.20,57.25,54.44,81.53,0.1,3.5,3.0,-0.4,-6.0,-4.8,-2.9,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,60,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.7,1.7,1.2,1.3,2.5,1.9,1.0,34.23,28.00,40.27,65.76,55.26,52.78,79.91,0.2,3.4,2.9,-0.8,-6.1,-5.1,-3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.7,1.7,1.2,1.4,2.5,1.8,0.9,33.99,28.40,40.56,66.24,55.26,53.59,79.91,0.3,3.2,2.8,-0.9,-6.1,-5.3,-3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,120,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,1.7,1.2,1.5,2.6,1.7,0.9,33.97,30.53,41.81,69.70,54.25,52.63,77.20,0.4,3.0,2.6,-1.0,-6.0,-5.5,-3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3883,1290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,1.2,0.8,1.1,1.0,0.5,0.8,70.25,68.24,64.85,79.32,61.43,74.13,63.35,6.3,9.3,11.1,12.2,13.1,12.5,13.7,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3884,1320,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.2,1.0,1.2,1.0,0.7,0.7,68.66,69.78,66.71,80.41,62.61,73.54,64.82,6.3,8.7,10.3,11.7,12.5,12.0,13.1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3885,1350,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,1.3,1.2,1.1,1.0,0.9,0.7,71.07,71.70,68.97,83.10,64.77,75.01,66.10,5.8,8.3,9.8,11.2,12.0,11.7,12.8,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3886,1380,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8,1.4,1.3,1.1,1.0,1.1,0.6,72.80,71.52,69.70,84.37,65.31,74.47,67.64,5.3,7.9,9.3,10.8,11.5,11.3,12.4,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
