# Data Rolling & Difference

- 현재의 값이 과거의 값을 포함하여 모델에 반영한다면 조금 더 세밀한 분석을 할 수 있을것이라는 생각을 해보았다.

In [2]:
import os
import numpy as np 
import pandas as pd

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

import itertools

# 폰트 설정 - 윈도우
plt.rcParams['font.family'] = 'NanumGothic'

# 그래프의 크기
plt.rcParams['figure.figsize'] = 12, 6

# 글자 크기
plt.rcParams['font.size'] = 14

# 폰트 설정 시 - 기호 깨는거 방지하기
plt.rcParams['axes.unicode_minus'] = False

# 불필요한 경고 메시지를 나오지 않도록 한다.
import warnings
warnings.filterwarnings('ignore')

In [3]:
BASE_DIR = './data'

In [4]:
train_path = os.path.join(BASE_DIR, '2021 빅콘테스트_데이터분석분야_퓨처스리그_홍수ZERO_댐유입량,강우,수위데이터_210902_update.xlsx')

data = pd.read_excel(train_path)
data = data[1 : ]
data = data.reset_index(drop = True)
data.iloc[:,6:] = data.iloc[:,6:].apply(pd.to_numeric)  # 수치형으로 변환

data.columns = ['홍수사상번호', '연', '월', '일', '시간', '유입량', 
                '1_유역평균강수', '1_강우(A지역)', '1_강우(B지역)', '1_강우(C지역)', '1_강우(D지역)', '1_수위(E지역)', '1_수위(D지역)', 
                '2_유역평균강수', '2_강우(A지역)', '2_강우(B지역)', '2_강우(C지역)', '2_강우(D지역)', '2_수위(E지역)', '2_수위(D지역)', 
                '3_유역평균강수', '3_강우(A지역)', '3_강우(B지역)', '3_강우(C지역)', '3_강우(D지역)', '3_수위(E지역)', '3_수위(D지역)',
                '4_유역평균강수', '4_강우(A지역)', '4_강우(B지역)', '4_강우(C지역)', '4_강우(D지역)', '4_수위(E지역)', '4_수위(D지역)',
                '5_유역평균강수', '5_강우(A지역)', '5_강우(B지역)', '5_강우(C지역)', '5_강우(D지역)', '5_수위(E지역)', '5_수위(D지역)',
                '6_유역평균강수', '6_강우(A지역)', '6_강우(B지역)', '6_강우(C지역)', '6_강우(D지역)', '6_수위(E지역)', '6_수위(D지역)']

# 집단 7을 대상으로 Rolling & Difference

- 수위(E지역)의 경우 모든 집단에서 같은 값을 가지므로 집단 7을 만들지 않았음

In [7]:
data_feat = data
data_7 = data_feat[['1_유역평균강수', '2_유역평균강수', '3_유역평균강수', '4_유역평균강수', '5_유역평균강수', '6_유역평균강수']]
lst = []
lst_mean = []
for i in range(len(data_7)) :
    lst.append(list(data_7.loc[i]))
    lst[i].sort()
    lst[i].pop()
    lst[i].sort(reverse = True)
    lst[i].pop()
    lst_mean.append(np.mean(lst[i]))

data_feat_7 = pd.DataFrame(lst_mean, columns = ['7_유역평균강수'])

data_7 = data_feat[['1_강우(A지역)', '2_강우(A지역)', '3_강우(A지역)', '4_강우(A지역)', '5_강우(A지역)', '6_강우(A지역)']]
lst = []
lst_mean = []
for i in range(len(data_7)) :
    lst.append(list(data_7.loc[i]))
    lst[i].sort()
    lst[i].pop()
    lst[i].sort(reverse = True)
    lst[i].pop()
    lst_mean.append(np.mean(lst[i]))

data_feat_7 = pd.concat([data_feat_7, pd.DataFrame(lst_mean, columns = ['7_강우(A지역)'])], axis = 1)

data_7 = data_feat[['1_강우(B지역)', '2_강우(B지역)', '3_강우(B지역)', '4_강우(B지역)', '5_강우(B지역)', '6_강우(B지역)']]
lst = []
lst_mean = []
for i in range(len(data_7)) :
    lst.append(list(data_7.loc[i]))
    lst[i].sort()
    lst[i].pop()
    lst[i].sort(reverse = True)
    lst[i].pop()
    lst_mean.append(np.mean(lst[i]))

data_feat_7 = pd.concat([data_feat_7, pd.DataFrame(lst_mean, columns = ['7_강우(B지역)'])], axis = 1)

data_7 = data_feat[['1_강우(C지역)', '2_강우(C지역)', '3_강우(C지역)', '4_강우(C지역)', '5_강우(C지역)', '6_강우(C지역)']]
lst = []
lst_mean = []
for i in range(len(data_7)) :
    lst.append(list(data_7.loc[i]))
    lst[i].sort()
    lst[i].pop()
    lst[i].sort(reverse = True)
    lst[i].pop()
    lst_mean.append(np.mean(lst[i]))

data_feat_7 = pd.concat([data_feat_7, pd.DataFrame(lst_mean, columns = ['7_강우(C지역)'])], axis = 1)

data_7 = data_feat[['1_강우(D지역)', '2_강우(D지역)', '3_강우(D지역)', '4_강우(D지역)', '5_강우(D지역)', '6_강우(D지역)']]
lst = []
lst_mean = []
for i in range(len(data_7)) :
    lst.append(list(data_7.loc[i]))
    lst[i].sort()
    lst[i].pop()
    lst[i].sort(reverse = True)
    lst[i].pop()
    lst_mean.append(np.mean(lst[i]))

data_feat_7 = pd.concat([data_feat_7, pd.DataFrame(lst_mean, columns = ['7_강우(D지역)'])], axis = 1)

data_7 = data_feat[['1_수위(D지역)', '2_수위(D지역)', '3_수위(D지역)', '4_수위(D지역)', '5_수위(D지역)', '6_수위(D지역)']]
lst = []
lst_mean = []
for i in range(len(data_7)) :
    lst.append(list(data_7.loc[i]))
    lst[i].sort()
    lst[i].pop()
    lst[i].sort(reverse = True)
    lst[i].pop()
    lst_mean.append(np.mean(lst[i]))

data_feat_7 = pd.concat([data_feat_7, pd.DataFrame(lst_mean, columns = ['7_수위(D지역)'])], axis = 1)
data_feat = pd.concat([data_feat, data_feat_7], axis = 1)

In [8]:
data_feat

Unnamed: 0,홍수사상번호,연,월,일,시간,유입량,1_유역평균강수,1_강우(A지역),1_강우(B지역),1_강우(C지역),...,6_강우(C지역),6_강우(D지역),6_수위(E지역),6_수위(D지역),7_유역평균강수,7_강우(A지역),7_강우(B지역),7_강우(C지역),7_강우(D지역),7_수위(D지역)
0,1.0,2006.0,7.0,10.0,8.0,189.100000,6.4000,7,7,7,...,8,8,2.54,122.610,6.375000,7.0,7.0,7.50,8.00,122.597188
1,1.0,2006.0,7.0,10.0,9.0,216.951962,6.3000,7,8,7,...,10,10,2.53,122.600,6.850000,7.0,8.0,8.50,9.00,122.592208
2,1.0,2006.0,7.0,10.0,10.0,251.424419,6.4000,7,9,7,...,10,11,2.53,122.590,7.750000,7.0,9.0,8.75,9.50,122.587750
3,1.0,2006.0,7.0,10.0,11.0,302.812199,7.3000,7,10,7,...,15,14,2.53,122.585,9.750000,8.0,10.0,12.00,11.50,122.586667
4,1.0,2006.0,7.0,10.0,12.0,384.783406,8.2000,7,12,8,...,18,16,2.53,122.575,12.850000,10.5,12.0,14.00,13.25,122.582250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3046,26.0,2018.0,7.0,7.0,17.0,,2.3689,1,0,0,...,0,0,3.16,129.950,2.368900,1.0,0.0,0.25,0.00,129.969104
3047,26.0,2018.0,7.0,7.0,18.0,,2.3689,1,0,0,...,0,0,3.15,129.970,2.368900,1.0,0.0,0.25,0.00,129.982313
3048,26.0,2018.0,7.0,7.0,19.0,,2.3689,1,0,0,...,0,0,3.13,129.980,2.368900,1.0,0.0,0.25,0.00,129.989375
3049,26.0,2018.0,7.0,7.0,20.0,,2.3689,1,0,0,...,0,0,3.11,129.990,2.363875,1.0,0.0,0.25,0.00,129.996438


## rolling

In [9]:
# rolling 1
data_rolling = data_feat[['홍수사상번호']]
lst = [data_rolling[data_rolling['홍수사상번호'] == i].iloc[0 : 1].index for i in range(1, 27)]
lst = list(itertools.chain.from_iterable(lst))

data_rolling['7_유역평균강수_shift_1'] = data_feat['7_유역평균강수'].shift(1)
data_rolling['7_강우(A지역)_shift_1'] = data_feat['7_강우(A지역)'].shift(1)
data_rolling['7_강우(B지역)_shift_1'] = data_feat['7_강우(B지역)'].shift(1)
data_rolling['7_강우(C지역)_shift_1'] = data_feat['7_강우(C지역)'].shift(1)
data_rolling['7_강우(D지역)_shift_1'] = data_feat['7_강우(D지역)'].shift(1)
data_rolling['7_수위(D지역)_shift_1'] = data_feat['7_수위(D지역)'].shift(1)
data_rolling['7_수위(E지역)_shift_1'] = data_feat['1_수위(E지역)'].shift(1)
for i in range(1, 27) :
    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_1'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_1'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_1'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_유역평균강수_shift_1'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_유역평균강수_shift_1'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_1'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_1'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_1'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(A지역)_shift_1'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(A지역)_shift_1'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_1'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_1'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_1'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(B지역)_shift_1'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(B지역)_shift_1'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_1'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_1'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_1'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(C지역)_shift_1'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(C지역)_shift_1'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_1'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_1'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_1'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(D지역)_shift_1'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(D지역)_shift_1'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_1'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_1'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_1'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_수위(D지역)_shift_1'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_수위(D지역)_shift_1'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_1'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_1'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_1'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_수위(E지역)_shift_1'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_수위(E지역)_shift_1'] = dx

data_rolling = data_rolling.drop(['홍수사상번호'], axis = 1)

data_feat = pd.concat([data_feat, data_rolling], axis = 1)

In [11]:
# rolling 2
data_rolling = data_feat[['홍수사상번호']]
lst = [data_rolling[data_rolling['홍수사상번호'] == i].iloc[0 : 1].index for i in range(1, 27)]
lst = list(itertools.chain.from_iterable(lst))

data_rolling['7_유역평균강수_shift_2'] = data_feat['7_유역평균강수_shift_1'].shift(1)
data_rolling['7_강우(A지역)_shift_2'] = data_feat['7_강우(A지역)_shift_1'].shift(1)
data_rolling['7_강우(B지역)_shift_2'] = data_feat['7_강우(B지역)_shift_1'].shift(1)
data_rolling['7_강우(C지역)_shift_2'] = data_feat['7_강우(C지역)_shift_1'].shift(1)
data_rolling['7_강우(D지역)_shift_2'] = data_feat['7_강우(D지역)_shift_1'].shift(1)
data_rolling['7_수위(D지역)_shift_2'] = data_feat['7_수위(D지역)_shift_1'].shift(1)
data_rolling['7_수위(E지역)_shift_2'] = data_feat['7_수위(E지역)_shift_1'].shift(1)
for i in range(1, 27) :
    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_2'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_2'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_2'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_유역평균강수_shift_2'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_유역평균강수_shift_2'] = dx
        
    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_2'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_2'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_2'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(A지역)_shift_2'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(A지역)_shift_2'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_2'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_2'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_2'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(B지역)_shift_2'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(B지역)_shift_2'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_2'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_2'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_2'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(C지역)_shift_2'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(C지역)_shift_2'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_2'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_2'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_2'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(D지역)_shift_2'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(D지역)_shift_2'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_2'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_2'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_2'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_수위(D지역)_shift_2'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_수위(D지역)_shift_2'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_2'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_2'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_2'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_수위(E지역)_shift_2'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_수위(E지역)_shift_2'] = dx

data_rolling = data_rolling.drop(['홍수사상번호'], axis = 1)
data_feat = pd.concat([data_feat, data_rolling], axis = 1)

In [12]:
# rolling 3
data_rolling = data_feat[['홍수사상번호']]
lst = [data_rolling[data_rolling['홍수사상번호'] == i].iloc[0 : 1].index for i in range(1, 27)]
lst = list(itertools.chain.from_iterable(lst))

data_rolling['7_유역평균강수_shift_3'] = data_feat['7_유역평균강수_shift_2'].shift(1)
data_rolling['7_강우(A지역)_shift_3'] = data_feat['7_강우(A지역)_shift_2'].shift(1)
data_rolling['7_강우(B지역)_shift_3'] = data_feat['7_강우(B지역)_shift_2'].shift(1)
data_rolling['7_강우(C지역)_shift_3'] = data_feat['7_강우(C지역)_shift_2'].shift(1)
data_rolling['7_강우(D지역)_shift_3'] = data_feat['7_강우(D지역)_shift_2'].shift(1)
data_rolling['7_수위(D지역)_shift_3'] = data_feat['7_수위(D지역)_shift_2'].shift(1)
data_rolling['7_수위(E지역)_shift_3'] = data_feat['7_수위(E지역)_shift_2'].shift(1)
for i in range(1, 27) :
    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_3'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_3'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_유역평균강수_shift_3'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_유역평균강수_shift_3'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_유역평균강수_shift_3'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_3'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_3'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(A지역)_shift_3'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(A지역)_shift_3'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(A지역)_shift_3'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_3'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_3'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(B지역)_shift_3'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(B지역)_shift_3'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(B지역)_shift_3'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_3'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_3'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(C지역)_shift_3'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(C지역)_shift_3'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(C지역)_shift_3'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_3'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_3'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_강우(D지역)_shift_3'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_강우(D지역)_shift_3'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_강우(D지역)_shift_3'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_3'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_3'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(D지역)_shift_3'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_수위(D지역)_shift_3'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_수위(D지역)_shift_3'] = dx

    dx = data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_3'].iloc[1] - (data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_3'].iloc[2] - data_rolling[data_rolling['홍수사상번호'] == i]['7_수위(E지역)_shift_3'].iloc[1])
    if dx < 0 :
        data_rolling.loc[lst[i-1], '7_수위(E지역)_shift_3'] = 0
    else :
        data_rolling.loc[lst[i-1], '7_수위(E지역)_shift_3'] = dx

data_rolling = data_rolling.drop(['홍수사상번호'], axis = 1)
data_feat = pd.concat([data_feat, data_rolling], axis = 1)

In [13]:
data_feat

Unnamed: 0,홍수사상번호,연,월,일,시간,유입량,1_유역평균강수,1_강우(A지역),1_강우(B지역),1_강우(C지역),...,7_강우(D지역)_shift_2,7_수위(D지역)_shift_2,1_수위(E지역)_shift_2,7_유역평균강수_shift_3,7_강우(A지역)_shift_3,7_강우(B지역)_shift_3,7_강우(C지역)_shift_3,7_강우(D지역)_shift_3,7_수위(D지역)_shift_3,1_수위(E지역)_shift_3
0,1.0,2006.0,7.0,10.0,8.0,189.100000,6.4000,7,7,7,...,6.0,122.607146,2.56,4.950000,7.0,4.0,4.50,5.0,122.612125,2.57
1,1.0,2006.0,7.0,10.0,9.0,216.951962,6.3000,7,8,7,...,7.0,122.602167,2.55,5.425000,7.0,5.0,5.50,6.0,122.607146,2.56
2,1.0,2006.0,7.0,10.0,10.0,251.424419,6.4000,7,9,7,...,8.0,122.597188,2.54,5.900000,7.0,6.0,6.50,7.0,122.602167,2.55
3,1.0,2006.0,7.0,10.0,11.0,302.812199,7.3000,7,10,7,...,9.0,122.592208,2.53,6.375000,7.0,7.0,7.50,8.0,122.597188,2.54
4,1.0,2006.0,7.0,10.0,12.0,384.783406,8.2000,7,12,8,...,9.5,122.587750,2.53,6.850000,7.0,8.0,8.50,9.0,122.592208,2.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3046,26.0,2018.0,7.0,7.0,17.0,,2.3689,1,0,0,...,0.0,129.939375,3.19,2.611575,1.0,0.0,0.75,0.0,129.925417,3.21
3047,26.0,2018.0,7.0,7.0,18.0,,2.3689,1,0,0,...,0.0,129.957229,3.18,2.399350,1.0,0.0,0.50,0.0,129.939375,3.19
3048,26.0,2018.0,7.0,7.0,19.0,,2.3689,1,0,0,...,0.0,129.969104,3.16,2.368900,1.0,0.0,0.50,0.0,129.957229,3.18
3049,26.0,2018.0,7.0,7.0,20.0,,2.3689,1,0,0,...,0.0,129.982313,3.15,2.368900,1.0,0.0,0.25,0.0,129.969104,3.16


## diff

In [14]:
data_feat['7_유역평균강수_diff'] = pd.DataFrame(data_feat['7_유역평균강수'] - data_feat['7_유역평균강수_shift_1'])
data_feat['7_강우(A지역)_diff'] = pd.DataFrame(data_feat['7_강우(A지역)'] - data_feat['7_강우(A지역)_shift_1'])
data_feat['7_강우(B지역)_diff'] = pd.DataFrame(data_feat['7_강우(B지역)'] - data_feat['7_강우(B지역)_shift_1'])
data_feat['7_강우(C지역)_diff'] = pd.DataFrame(data_feat['7_강우(C지역)'] - data_feat['7_강우(C지역)_shift_1'])
data_feat['7_강우(D지역)_diff'] = pd.DataFrame(data_feat['7_강우(D지역)'] - data_feat['7_강우(D지역)_shift_1'])
data_feat['7_수위(D지역)_diff'] = pd.DataFrame(data_feat['7_수위(D지역)'] - data_feat['7_수위(D지역)_shift_1'])
data_feat['7_수위(E지역)_diff'] = pd.DataFrame(data_feat['1_수위(E지역)'] - data_feat['7_수위(E지역)_shift_1'])

In [15]:
data_feat

Unnamed: 0,홍수사상번호,연,월,일,시간,유입량,1_유역평균강수,1_강우(A지역),1_강우(B지역),1_강우(C지역),...,7_강우(D지역)_shift_3,7_수위(D지역)_shift_3,1_수위(E지역)_shift_3,7_유역평균강수_diff,7_강우(A지역)_diff,7_강우(B지역)_diff,7_강우(C지역)_diff,7_강우(D지역)_diff,7_수위(D지역)_diff,1_수위(E지역)_diff
0,1.0,2006.0,7.0,10.0,8.0,189.100000,6.4000,7,7,7,...,5.0,122.612125,2.57,0.475000,0.0,1.0,1.00,1.00,-0.004979,-0.01
1,1.0,2006.0,7.0,10.0,9.0,216.951962,6.3000,7,8,7,...,6.0,122.607146,2.56,0.475000,0.0,1.0,1.00,1.00,-0.004979,-0.01
2,1.0,2006.0,7.0,10.0,10.0,251.424419,6.4000,7,9,7,...,7.0,122.602167,2.55,0.900000,0.0,1.0,0.25,0.50,-0.004458,0.00
3,1.0,2006.0,7.0,10.0,11.0,302.812199,7.3000,7,10,7,...,8.0,122.597188,2.54,2.000000,1.0,1.0,3.25,2.00,-0.001083,0.00
4,1.0,2006.0,7.0,10.0,12.0,384.783406,8.2000,7,12,8,...,9.0,122.592208,2.53,3.100000,2.5,2.0,2.00,1.75,-0.004417,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3046,26.0,2018.0,7.0,7.0,17.0,,2.3689,1,0,0,...,0.0,129.925417,3.21,0.000000,0.0,0.0,-0.25,0.00,0.011875,-0.02
3047,26.0,2018.0,7.0,7.0,18.0,,2.3689,1,0,0,...,0.0,129.939375,3.19,0.000000,0.0,0.0,0.00,0.00,0.013208,-0.01
3048,26.0,2018.0,7.0,7.0,19.0,,2.3689,1,0,0,...,0.0,129.957229,3.18,0.000000,0.0,0.0,0.00,0.00,0.007062,-0.02
3049,26.0,2018.0,7.0,7.0,20.0,,2.3689,1,0,0,...,0.0,129.969104,3.16,-0.005025,0.0,0.0,0.00,0.00,0.007063,-0.02
