# 태양광 발전량 예측 AI 경진대회

### Column

- Hour - 시간
- Minute - 분
- DHI - 수평면 산란일사량(Diffuse Horizontal Irradiance (W/m2))
- DNI - 직달일사량(Direct Normal Irradiance (W/m2))
- WS - 풍속(Wind Speed (m/s))
- RH - 상대습도(Relative Humidity (%))
- T - 기온(Temperature (Degree C))
- **Target - 태양광 발전량 (kW)**


In [1]:
import pandas as pd 
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from lightgbm import LGBMRegressor

In [2]:
train = pd.read_csv('train.csv')

In [3]:
test = pd.read_csv('test.csv')

In [4]:
sub = pd.read_csv('sample_submission.csv')

In [5]:
for col in train.columns:
    if train[col].skew() >= 1 and col != "TARGET":
        print("{}의 로그 전환 전 skew값 : {}".format(col, train[col].skew()))
        
        print("{}의 로그 전환 후 skew값 : {}".format(col,np.log1p(train[col].skew())))
        
        print("###### \n")

DHI의 로그 전환 전 skew값 : 2.1486595965487445
DHI의 로그 전환 후 skew값 : 1.146976837370967
###### 

DNI의 로그 전환 전 skew값 : 1.1198749113362643
DNI의 로그 전환 후 skew값 : 0.7513570828564436
###### 

WS의 로그 전환 전 skew값 : 1.199682831261863
WS의 로그 전환 후 skew값 : 0.7883131823628775
###### 



In [5]:
del test['sep_day']

In [None]:
train['DHI'] = np.log1p(train['DHI'])
train['DNI'] = np.log1p(train['DNI'])
train['WS'] = np.log1p(train['WS'])

test['DHI'] = np.log1p(test['DHI'])
test['DNI'] = np.log1p(test['DNI'])
test['WS'] = np.log1p(test['WS'])

In [6]:
train['TARGET1'] = train['TARGET'].shift(-48)
train['TARGET2'] = train['TARGET'].shift(-96)

In [8]:
X_train = train.drop(['Day','Minute'],axis = 1)

X_train1 = X_train.loc[X_train['TARGET1'].notnull(),:].iloc[:,:-1]
X_train2 = X_train.loc[X_train['TARGET2'].notnull(),:]
del X_train2['TARGET1']

In [9]:
y_train1 = X_train1['TARGET1']
y_train2 = X_train2['TARGET2']

In [10]:
X_tr1,X_val1, y_tr1, y_val1 = train_test_split(X_train1.iloc[:,:-1],y_train1,test_size = 0.25,shuffle = False)
X_tr2, X_val2, y_tr2, y_val2 = train_test_split(X_train2.iloc[:,:-1], y_train2,test_size = 0.25,shuffle = False)

In [11]:
X_test = test.loc[test['Day'] == 6,:].drop(['Day','Minute'],axis = 1)

In [12]:
test0 = test.loc[test['Day'] == 0,].drop(['Day','Minute'],axis = 1)
test1 = test.loc[test['Day'] == 1,].drop(['Day','Minute'],axis = 1)
test2 = test.loc[test['Day'] == 2,].drop(['Day','Minute'],axis = 1)
test3 = test.loc[test['Day'] == 3,].drop(['Day','Minute'],axis = 1)
test4 = test.loc[test['Day'] == 4,].drop(['Day','Minute'],axis = 1)

test_list = [test0,test1,test2,test3,test4]

In [13]:
test_target0 = test.loc[test['Day'] == 1,:]['TARGET']
test_target1 = test.loc[test['Day'] == 2,:]['TARGET']
test_target2 = test.loc[test['Day'] == 3,:]['TARGET']
test_target3 = test.loc[test['Day'] == 4,:]['TARGET']
test_target4 = test.loc[test['Day'] == 5,:]['TARGET']
test_target5 = test.loc[test['Day'] == 6,:]['TARGET']

target_list = [[test_target0,test_target1],[test_target1,test_target2],
              [test_target2,test_target3],[test_target3,test_target4],
              [test_target4,test_target5]]

In [14]:
X_tr = [X_tr1,X_tr2]
X_val = [X_val1,X_val2]
y_tr = [y_tr1,y_tr2]
y_val = [y_val1,y_val2]

In [15]:
def pb_loss(true, pred, q) :
    
    L_list = []
    
    for i in range(len(true)) :
        
        if true.iloc[i] >= pred[i] :
            L = true.iloc[i] - pred[i]
            L *= q
            
        else :
            L1 = pred[i] - true.iloc[i]
            L2 = 1 - q
            L = L1 * L2
            
        L_list.append(L)
        
    return np.mean(L_list)

In [18]:
def model_learning(model,quantiles,train_X,train_y,test_list,target_list) :
    allpbloss = []
    print('{} model Start!\n'.format(str(model).split('(')[0]))
    
    for t, target in enumerate(zip(test_list,target_list)) :
        pbloss = []
        print('{}일째 데이터로 {}일,{}일 예측 시작'.format(t,t+1,t+2))
        
        for i in range(2) :
            print("\n{} Start Learning...\n".format(i+1))
            
            for q in quantiles :
                if str(model).split('(')[0] == 'LGBMRegressor' or str(model).split('(')[0] == 'GradientBoostingRegressor':
                    params = {'alpha' : q}        
                    models = model.set_params(**params)
                    
                else:
                    modelcopy = model.copy()
                    params = {'loss_function' : 'Quantile:alpha={}'.format(q)}
                    models = modelcopy.set_params(**params)
                
                models.fit(train_X[i],train_y[i])
                pred = models.predict(test_list[t])
                
                for a in range(81) :
                    pred[48*a : 48*(a+1)][:10] = 0
                    pred[48*a : 48*(a+1)][-9:] = 0
                    
                loss = pb_loss(target_list[t][i],pred,q)
                pbloss.append(loss)
                allpbloss.append(np.mean(pbloss))
                
                print("{} quantile pb_loss : {}".format(q,np.mean(pbloss)))
                
        print("{} Finish\n".format(i+1))
        print('평균 pb_loss : {}\n'.format(np.mean(pbloss)))
    
    print("All Finish!\n")
    print("전체 평균 pb_loss : {}".format(np.mean(allpbloss)))

In [19]:
def result_value(model,train_X,train_y,quantiles) :
    print("Start Learning...\n")
    predicted1 = []
    predicted2 = []
    predict_list = [predicted1,predicted2]
    
    for i in range(2) :
        for q in quantiles :
            params = {'alpha': q}
            models = model.set_params(**params)
            models.fit(train_X[i],train_y[i])
            pred = models.predict(X_test)
            predict_list[i].append(pred) 
            
            for pr in predict_list[i] :
                for a in range(81) :
                    pr[48*a : 48*(a+1)][:10] = 0
                    pr[48*a : 48*(a+1)][-9:] = 0
        
    for k in range(9):
        for v in range(2) :
            predict_list[v][k] = np.array([0 if x<0 else x for x in predict_list[v][k]])
            
        day7.iloc[:,k+1] = predicted1[k]
        day8.iloc[:,k+1] = predicted2[k]
        
    sub = pd.concat([day7,day8]).sort_index()
    print("Finish!")
    
    return sub

In [20]:
day7 = sub.loc[sub.id.str.contains('Day7')]
day8 = sub.loc[sub.id.str.contains('Day8')]

In [21]:
lgbm_params = { 'learning_rate': 0.05, 'max_depth': 9, 'n_estimators': 500, 'num_leaves': 9}
lgbm = LGBMRegressor(n_jobs=-1,random_state=1218,**lgbm_params,metric = 'quantile',objective = 'quantile')

In [23]:
model_learning(lgbm,quant2,X_tr,y_tr,test_list,target_list)

0일째 데이터로 1일,2일 예측 시작

1 Start Learning...

0.05 quantile pb_loss : 0.7830925923760502
0.16 quantile pb_loss : 1.3754928003289184
0.25 quantile pb_loss : 1.7203009589917393
0.38 quantile pb_loss : 1.9496307855860628
0.47 quantile pb_loss : 2.0728772013966683
0.58 quantile pb_loss : 2.109292072624489
0.66 quantile pb_loss : 2.0961293051484953
0.78 quantile pb_loss : 2.0236722724545526
0.87 quantile pb_loss : 1.9167772449541642

2 Start Learning...

0.05 quantile pb_loss : 1.8047933414917026
0.16 quantile pb_loss : 1.8241634043421007
0.25 quantile pb_loss : 1.8838613483570734
0.38 quantile pb_loss : 1.9536781304988347
0.47 quantile pb_loss : 2.0107310700788394
0.58 quantile pb_loss : 2.0397261595252347
0.66 quantile pb_loss : 2.045799709397516
0.78 quantile pb_loss : 2.0175752418074584
0.87 quantile pb_loss : 1.9661321643554335
2 Finish

평균 pb_loss : 1.9661321643554335

1일째 데이터로 2일,3일 예측 시작

1 Start Learning...

0.05 quantile pb_loss : 0.783614376321362
0.16 quantile pb_loss : 1.377182299

KeyboardInterrupt: 

## 파생변수 만들기

#### 습도

In [221]:
train['rain'] = [1 if i == 100 else 0 for i in train['RH']]
test['rain'] = [1 if i == 100 else 0 for i in test['RH']]

#### 포화 증기압

In [None]:
train['vp'] = (611 * np.exp((17.27 * train['T']) / (237.3 + train['T']))) / 1000
test['vp'] = (611 * np.exp((17.27 * test['T']) / (237.3 + test['T']))) / 1000

#### 이슬점 온도

In [None]:
train['dewpoint'] = train['T'] - ((100 - train['RH']) / 5)
test['dewpoint'] = test['T'] - ((100 - test['RH']) / 5)

In [None]:
def make_dewpoint(df,col) :
    dewpoint = []
    b = 17.62
    c = 243.12
    
    gamma = (b * df['T'] / (c + df['T']) + np.log(df[col] / 100.0))
    dewpoint_T = (c * gamma) / (b - gamma)
    return dewpoint_T

train.insert(7,'Dewpoint',make_dewpoint(train,'RH'))
test.insert(7,'Dewpoint',make_dewpoint(test,'RH'))

#### 온도 - 이슬점온도

In [None]:
train.insert(9,'Td',train['T'] - train['Dewpoint'])
test.insert(9,'Td',test['T'] - test['Dewpoint'])

#### 이슬점온도 (RH역수로 만든 것)

In [None]:
train['Dewpoint_1'] = make_dewpoint(train,'RH_1') ## (RH_1)
test['Dewpoint_1'] = make_dewpoint(test,'RH_1') ## (RH_1)

In [None]:
def make_dewpoint(df) :
    dewpoint = []
    b = 17.62
    c = 243.12
    
    gamma = (b * df['T'] / (c + df['T']) + np.log(df['RH'] / 100.0))
    dewpoint_T = (c * gamma) / (b - gamma)
    
    for dew in dewpoint_T :
        if dew >= 26 :
            dewpoint.append('high')
        elif 24<= dew < 26:
            dewpoint.append('extremely')
        elif 21 <= dew < 24:
            dewpoint.append('very')
        elif 18<= dew < 21:
            dewpoint.append('uncomfortable')
        elif 16<= dew < 18:
            dewpoint.append('ok')
        elif 13<= dew < 16:
            dewpoint.append('comfortable')
        elif 10<= dew < 13:
            dewpoint.append('very comfortable')
        else :
            dewpoint.append('dry')
    
    return dewpoint

In [None]:
train['Dewpoint'] = make_dewpoint(train)
test['Dewpoint'] = make_dewpoint(test)

train = pd.get_dummies(train,columns=['Dewpoint'])
test = pd.get_dummies(test,columns=['Dewpoint'])

#### 일조시간

In [None]:
def make_sunlight_time(df):
    start = []
    end = []
    result = []
    for day in df['Day'].unique() :
        start.append(int(df.loc[(df['Day'] == day) & ((df['DHI']!= 0) | (df['DNI'] != 0))].index[0]))
        end.append(int(df.loc[(df['Day']==day) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[len(df.loc[(df['Day']==day) & ((df['DHI'] !=0) | (df['DNI'] !=0))])-1]))
        
        result.append((end[day] - start[day]) / 2)
    return result

In [None]:
train_list = []
train_list.extend(make_sunlight_time(train))
train['Sunlight_time'] = train_list * 48

In [None]:
start = []
end = []
result = []
for i in range(81) :
    df = test[int(len(test) / 81 *i) : int(len(test) / 81 * (i+1))]
    for day in df['Day'].unique() :
        start.append(int(df.loc[(df['Day'] == day) & ((df['DHI']!= 0) | (df['DNI'] != 0))].index[0]))
        end.append(int(df.loc[(df['Day']==day) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[len(df.loc[(df['Day']==day) & ((df['DHI'] !=0) | (df['DNI'] !=0))])-1]))
        
        result.append((end[day] - start[day]) / 2)

test['Sunlight_time'] = result * 48

In [None]:
train['Sunlight_time'] = train['Sunlight_time'] * 0.7
test['Sunlight_time'] = test['Sunlight_time'] * 0.7

#### GHI

In [None]:
def make_GHI(df):
    start = []
    rise = []
    sunset = []
    end = []
    weight = []
    
    for v in range(df['Day'].nunique()):
        start.append(int(df.loc[df['Day']==v].index[0]))
        rise.append(int(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[0]))
        sunset.append(int(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[len(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))])-1]))
        end.append(int(df.loc[(df['Day'] ==v)].index[47]))
        
    
        a = round((180 / (sunset[v] - rise[v])),2)
        b = 0
    
        for i in range(start[v],rise[v]+1) :
            weight.append(0)
        
        for i in range(rise[v]+1, sunset[v]):
            b+=a
            weight.append(round(b,2))
    
        for i in range(sunset[v], end[v]+1) :
            weight.append(0)
            
        for i in range(len(sunset)) :
            weight[sunset[i]] = 180
     
    angle = []
    for value in weight :
        if value >0 :
            angle.append(round(value - 90,2))
        else :
            angle.append(value)
        
    return angle

In [None]:
train_list =[]
train_list.extend(make_GHI(train))
train_list=[0 if x< 0 else x for x in train_list]
train.insert(5,'GHI',train['DNI'] + train['DHI'] *  np.cos(train_list))

In [None]:
start = []
rise = []
sunset = []
end = []
weight = []
for i in range(81) :
    df = test[int(len(test) / 81*i) : int(len(test) / 81 * (i+1))]
    
    for v in range(df['Day'].nunique()):
        start.append(int(df.loc[df['Day']==v].index[0]))
        rise.append(int(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[0]))
        sunset.append(int(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[len(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))])-1]))
        end.append(int(df.loc[(df['Day'] ==v)].index[47]))
        
        a = round((180 / (sunset[v] - rise[v])),2)
        b = 0
        
        for i in range(start[v],rise[v]+1) :
            weight.append(0)
          
        for i in range(rise[v]+1,sunset[v]):
            b+= a
            weight.append(round(b,2))
        
        for i in range(sunset[v], end[v]+1) :
            weight.append(0)
            
        for i in range(len(sunset)):
            weight[sunset[i]] = 180
            
    angle = []
    for value in weight :
        if value >0 :
            angle.append(round(value - 90,2))
        else :
            angle.append(value)
            
angle = [0 if x < 0 else x for x in angle]

test.insert(5,'GHI',test['DNI'] + test['DHI'] * np.cos(angle))

### version2

In [None]:
 def make_GHI(df):
    start = []
    rise = []
    high = []
    sunset = []
    end = []
    weight = []
    
    for v in range(df['Day'].nunique()):
        
        start.append(int(df.loc[df['Day']==v].index[0]))
        rise.append(int(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[0]))
        high.append(int(df.loc[(df['Day'] == v) & (df['Hour']==12)& (df['Minute']==0)].index[0]))
        sunset.append(int(df.loc[(df['Day']==v) & ((df['DHI'] !=0) | (df['DNI'] !=0))].index[-1]))
        end.append(int(df.loc[(df['Day'] ==v)].index[47]))
        
        sunrise = 0
        noon = 90
        
        first = round((90 / (high[v] - rise[v])),2)
        last = round((90 / (sunset[v] - high[v])),2)
        
        for i in range(start[v],rise[v]) :
            weight.append(0)
        
        weight.append(90)
        
        for i in range(rise[v]+1,high[v]) :
            sunrise += first
            weight.append(90 - round(sunrise,2))
            
        weight.append(noon - 90)
        
        for i in range(high[v]+1,sunset[v]):
            noon -= last
            weight.append(90 - round(noon,2))
        
        weight.append(90)
        
        for i in range(sunset[v]+1,end[v]+1):
            weight.append(0)
    
    df.insert(5,'GHI',df['DNI'] + df['DHI'] * np.cos(weight))
    return df.head()

### DNI DHI ()

In [223]:
train['DI_mean'] = (train['DHI'] + train['DNI']) / 2

test['DI_mean'] = (test['DHI'] + test['DNI']) / 2

In [143]:
def make_sun(df):
    start = []
    rise = []
    high = []
    sunset = []
    weight = []
    end = []
    for v in range(df['Day'].nunique()):
        
        start.append(int(df.loc[df['Day']==v].index[0]))
        rise.append(int(df.loc[(df['Day'] == v) & (df['DI_mean']!=0)].index[0]))
        high.append(int(df.loc[(df['Day'] == v) & (df['Hour']==12)& (df['Minute']==0)].index[0]))
        sunset.append(int(df.loc[(df['Day'] == v) & (df['DI_mean']!=0)].index[len(df.loc[(train['Day'] == v) & (df['DI_mean']!=0)])-1]))
        end.append(int(df.loc[(df['Day'] ==v)].index[47]))
        
        
        sunrise = 1 # 시작
        noon = 2 # 12시
        
        first = round((1 / (high[v] - rise[v])),2)
        last = round((1 / (sunset[v] - high[v])),2)
    
        for i in range(start[v],rise[v]):
            weight.append(0)
            
        weight.append(1)
        for i in range(rise[v]+1,high[v]) :
            sunrise += first
            weight.append(round(sunrise,2))
    
        weight.append(noon)
    
        for i in range(high[v],sunset[v]) :
            noon -= last
            weight.append(round(noon,2))
        
        for i in range(sunset[v],end[v]):
            weight.append(0)
            
    return weight

In [None]:
start = []
rise = []
high = []
sunset = []
weight = []
end = []

for i in range(81) :
    df = test[int(len(test) / 81*i) : int(len(test) / 81 * (i+1))]
    for v in range(df['Day'].nunique()):
        
        start.append(int(df.loc[df['Day']==v].index[0]))
        rise.append(int(df.loc[(df['Day'] == v) & (df['DI_mean']!=0)].index[0]))
        high.append(int(df.loc[(df['Day'] == v) & (df['Hour']==12)& (df['Minute']==0)].index[0]))
        sunset.append(int(df.loc[(df['Day'] == v) & (df['DI_mean']!=0)].index[len(df.loc[(train['Day'] == v) & (df['DI_mean']!=0)])-1]))
        end.append(int(df.loc[(df['Day'] ==v)].index[47]))
        
        
        sunrise = 1 # 시작
        noon = 2 # 12시
        
        first = round((1 / (high[v] - rise[v])),2)
        last = round((1 / (sunset[v] - high[v])),2)
    
        for i in range(start[v],rise[v]):
            weight.append(0)
            
        weight.append(1)
        
        for i in range(rise[v]+1,high[v]) :
            sunrise += first
            weight.append(round(sunrise,2))
    
        weight.append(noon)
    
        for i in range(high[v],sunset[v]) :
            noon -= last
            weight.append(round(noon,2))
        
        for i in range(sunset[v],end[v]):
            weight.append(0)
    

In [None]:
test['sun'] = weight
del test['DI_mean']

#### 온도풍속계수

In [226]:
val = []
for i in range(len(train)) :
    val.append(round((742.9 + 176.5 * train['T'][i] + 3.562 * train['WS'][i] - 13.14*train['T'][i]**2 - 0.7466*train['T'][i]*train['WS'][i] - 0.151*train['WS'][i]**2),2))
    
train['option'] = val    

In [227]:
val = []
for i in range(len(test)) :
    val.append(round((742.9 + 176.5 * test['T'][i] + 3.562 * test['WS'][i] - 13.14*test['T'][i]**2 - 0.7466*test['T'][i]*test['WS'][i] - 0.151*test['WS'][i]**2),2))
    
test['option'] = val    

#### eff_gap

In [None]:
train['eff_gap'] = [.05 * (x - 25) if x > 25 else 0 for x in train['T']]
test['eff_gap'] = [.05 * (x - 25) if x > 25 else 0 for x in test['T']]

In [None]:
def temp_efficiency(df) :
    temp = []
    for t in df['T'] :
        if t > 25:
            temp.append(round((t * (0.05 **(t-25))),2))
        else:
            temp.append(0)
    
    return temp

train['T_efficiency'] = temp_efficiency(train)
test['T_efficiency'] = temp_efficiency(test)

#### MeanEncoding

In [229]:
train['Hour_mean'] = train['Hour'].map(train.groupby(['Hour'])['TARGET'].mean())
test['Hour_mean'] = test['Hour'].map(test.groupby(['Hour'])['TARGET'].mean())

train['day_mean'] = train.Day.map(train.groupby('Day').TARGET.mean())
test['day_mean'] = test.Day.map(test.groupby('Day').TARGET.mean())

train['T_mean'] = train['T'].map(train.groupby('T').TARGET.mean())
test['T_mean'] = test['T'].map(test.groupby('T').TARGET.mean())

train['Td_mean'] = train['Td'].map(train.groupby('Td').TARGET.mean())
test['Td_mean'] = test['Td'].map(test.groupby('Td').TARGET.mean())

train['WS_mean'] = train['WS'].map(train.groupby('WS')['TARGET'].mean())
test['WS_mean'] = test['WS'].map(test.groupby('WS')['TARGET'].mean())

#### Season

In [231]:
def season(x) :
    if x >= 24 :
        return 'summer'
    elif 5 <= x < 24 :
        return 'spring_or_fall'
    else :
        return 'winter'

In [232]:
train_temp_mean = train.groupby('Day')['T'].mean().reset_index(name = 'temp_mean')
train_temp_mean['season'] = train_temp_mean['temp_mean'].apply(season)
train = pd.merge(train, train_temp_mean, how = 'left', on = 'Day')

test_temp_mean = test.groupby('sep_day')['T'].mean().reset_index(name = 'temp_mean')
test_temp_mean['season'] = test_temp_mean['temp_mean'].apply(season)
test = pd.merge(test, test_temp_mean, how = 'left', on = 'sep_day')

train = pd.get_dummies(train,columns=['season'])

test = pd.get_dummies(test,columns=['season'])

del test['sep_day']

In [233]:
train.Minute = train.Minute.map({30 : 1, 0: 0})
test.Minute = test.Minute.map({30 : 1, 0: 0})

#### 일조율

In [None]:
try_list = []
for d in train['Day'].unique() :
    try_list.extend(round(train.loc[train['Day']==d]['GHI2'] / train.groupby('Day')['GHI2'].sum()[d],2))
    
train['sunshine'] = try_list
train['sunshine'] = round(train['sunshine'],2)

try_list = []
for i in range(81) :
    df = test[int(len(test) / 81*i) : int(len(test) / 81 * (i+1))]
    for d in df['Day'].unique() :
        try_list.extend(round(df.loc[test['Day']==d]['GHI2'] / df.groupby('Day')['GHI2'].sum()[d],2))
        
test['sunshine'] = try_list
test['sunshine'] = round(test['sunshine'],2)

In [None]:
try_list = []
for i in range(81) :
    df = test[int(len(test) / 81*i) : int(len(test) / 81 * (i+1))]
    for d in df['Day'].unique() :
        try_list.extend(round(df.loc[test['Day']==d]['x'] / df.groupby('Day')['x'].sum()[d],2))
        
test['sunshine'] = try_list
test['sunshine'] = round(test['sunshine'],2)

#### 습도와 온도차이

In [None]:
def rain_function(df,name) :
    
    rain = []
    for i,v in enumerate(df['RH']) :
        if v == 100 :
            rain.append(1)
        else :
            rain.append(0)
    
        rain[i] = rain[i] - v
        
    df[name] = rain
    
    return df.head()

In [None]:
rain_function(train,'RH_diff')
rain_function(test,'RH_diff')

### 시간에 따른 변수들의 차이

In [None]:
def diff_col_time_train(col) :
    interval = [12,24,48,72]
    for a in range(len(interval)) :
        diff_temper = []
        for i in range(len(train)) :
            if i > interval[a] :
                diff = train[col].iloc[i] - train[col].iloc[i - interval[a]]
                diff_temper.append(diff)
            else :
                diff = train[col].iloc[i] - train[col].iloc[0]
                diff_temper.append(diff)
        train['Diff_{}{}'.format(col,interval[a])] = diff_temper
    return train.head()

In [None]:
diff_col_time_train('T')

In [None]:
def diff_col_time_test(col):
    diff_temp1 = []
    diff_temp2 = []
    diff_temp3 = []
    diff_temp4 = []
    diff_temps = [diff_temp1,diff_temp2,diff_temp3,diff_temp4]
    interval = [12,24,48,72]
    
    for t in range(81) :
        df = test[int(len(test) / 81*t) : int(len(test) / 81 * (t+1))]
        for a in range(len(interval)) :
            diff_temper = []
            for i in range(len(df)) :
                if i> interval[a] :
                    diff = round(df[col].iloc[i] - df[col].iloc[i - interval[a]],2)
                    diff_temper.append(diff)
                else:
                    diff = round(df[col].iloc[i] - df[col].iloc[0],2)
                    diff_temper.append(diff)
            diff_temps[a].extend(diff_temper)
    
    for a in range(len(interval)) :
        test['Diff_{}{}'.format(col,interval[a])] = diff_temps[a]
        
    return test.head()

In [None]:
diff_col_time_train('RH')

#### 이동평균

In [None]:
def moving_average_function(data,col,windows,centers) :
    
    data['{}_{}'.format(col,windows)] = data[col].rolling(window = windows,center = centers,min_periods = 1).mean()
    
    return data

In [None]:
train['RH_1'] = 1 / train['RH']
test['RH_1'] = 1 / test['RH']

In [None]:
train['WS'] = np.sqrt(train['WS'])
test['WS'] = np.sqrt(test['WS'])

In [None]:
train['R_WS'] = train['WS']**2
test['R_WS'] = test['WS']**2