## **Code : Make variable-specific values average**
- Writer : Donghyeon Kim
- Update : 2022.11.11.
- 대상 : 태양광 사용 가구 & 미사용 가구
- 분석사항 : 보정된 데이터 1시간 단위 - 가구별 데이터가 아닌, 사용/미사용 카테고리에서 1시간 단위 평균치 산출

<hr>

## **Prior Settings**

In [1]:
# 라이브러리 설정
import os
import pandas as pd
import numpy as np
import openpyxl

In [2]:
# 사용자 함수 호출
# get_project_root : 파일의 상위-상위 경로 호출
# get_name_use_final : 태양광 사용자 이름 호출(최종)
# get_name_not_final : 태양광 미사용자 이름 호출(최종)
from pack_utils import get_project_root, get_name_use_final, get_name_not_final

### **Data**
#### Hour(1시간 단위)

In [3]:
# All Data
root = get_project_root()
folder_root = os.path.join(root, 'data_final')
file_name = os.path.join(folder_root, 'final_data_hour.xlsx')
df_all_hour = pd.read_excel(file_name)

# Filtering
# (1) Using Solar
df_all_hour_use = df_all_hour[df_all_hour.type == 'use']

# (2) Not Using Solar
df_all_hour_not = df_all_hour[df_all_hour.type == 'not']

In [4]:
df_all_hour.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 337509 entries, 0 to 337508
Data columns (total 25 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   가구번호                    337509 non-null  int64  
 1   연도                      337509 non-null  int64  
 2   월                       337509 non-null  int64  
 3   일                       337509 non-null  int64  
 4   시간                      337509 non-null  int64  
 5   설비용량(kW)                107389 non-null  object 
 6   발전량(kWh)                107389 non-null  float64
 7   전력소비량(kWh)              337509 non-null  float64
 8   수전전력량(kWh)              337509 non-null  float64
 9   잉여전력량(kWh)              107389 non-null  float64
 10  잉여전력량/발전량               107389 non-null  float64
 11  자가소비율                   107389 non-null  float64
 12  자가공급률                   107389 non-null  float64
 13  temperature             337509 non-null  float64
 14  uws_10m             

In [5]:
df_all_hour_use.drop(columns=['id_hh', 'id_hs', 'owner']).head()

Unnamed: 0,가구번호,연도,월,일,시간,설비용량(kW),발전량(kWh),전력소비량(kWh),수전전력량(kWh),잉여전력량(kWh),...,자가공급률,temperature,uws_10m,vws_10m,ghi,precipitation,relative_humidity_1p5m,specific_humidity_1p5m,ym,type
0,10,2021,3,1,0,3kW,0.0,0.200001,0.200001,0.0,...,0.0,10.322687,0.858208,0.041718,0.0,0.0,60.644928,0.004421,2021/3,use
1,10,2021,3,1,1,3kW,0.0,0.25,0.25,0.0,...,0.0,10.029108,0.135828,-1.085083,0.0,0.0,56.432068,0.004188,2021/3,use
2,10,2021,3,1,2,3kW,0.0,0.210003,0.210003,0.0,...,0.0,9.844873,0.362611,-0.180941,0.0,0.0,62.081512,0.004626,2021/3,use
3,10,2021,3,1,3,3kW,0.0,0.18,0.18,0.0,...,0.0,9.587915,-0.411156,0.997797,0.0,0.0,56.335152,0.004261,2021/3,use
4,10,2021,3,1,4,3kW,0.0,0.190002,0.190002,0.0,...,0.0,9.564752,-0.462263,-0.222837,0.0,0.0,61.451012,0.004599,2021/3,use


<hr>

## **Obtain the average of the values**
- 구분 : 태양광 사용 / 미사용
- 변수 : 태양광 발전 관련 변수 한정

### (1) Using Solar

In [6]:
# 결과 Dictionary
data_time = {}
data_time['연도'] = []
data_time['월'] = []
data_time['일'] = []
data_time['시간'] = []
data_time['발전량(kWh)'] = []
data_time['전력소비량(kWh)'] = []
data_time['수전전력량(kWh)'] = []
data_time['잉여전력량(kWh)'] = []
data_time['자가소비율'] = []
data_time['자가공급률'] = []
data_time['type'] = []

In [7]:
u_year = df_all_hour_use['연도'].unique().tolist()

for y in u_year:
    date_cond1 = (df_all_hour_use['연도'] == y)
    day_filter1 = df_all_hour_use[date_cond1]
    u_month = day_filter1['월'].unique().tolist()
    
    for m in u_month:
        date_cond2 = (day_filter1['월'] == m)
        day_filter2 = day_filter1[date_cond2]
        u_day = day_filter2['일'].unique().tolist()
        
        for d in u_day:
            date_cond3 = (day_filter2['일'] == d)
            day_filter3 = day_filter2[date_cond3]
            u_hour = sorted(day_filter3['시간'].unique().tolist())
            
            for h in u_hour:
                date_cond4 = (day_filter3['시간'] == h)
                day_filter4 = day_filter3[date_cond4]
                
                # Dictionary Filling
                # 연도
                data_time['연도'].append(y)
                
                # 월
                data_time['월'].append(m)
                
                # 일
                data_time['일'].append(d)
                
                # 시간
                data_time['시간'].append(h)
                
                # 발전량(kWh)
                power = np.mean(day_filter4['발전량(kWh)'])
                data_time['발전량(kWh)'].append(power)

                # 전력소비량(kWh)
                consum = np.mean(day_filter4['전력소비량(kWh)'])
                data_time['전력소비량(kWh)'].append(consum)

                # 수전전력량(kWh)
                grid_consum = np.mean(day_filter4['수전전력량(kWh)'])
                data_time['수전전력량(kWh)'].append(grid_consum)
                
                # 잉여전력량(kWh)
                export = np.mean(day_filter4['잉여전력량(kWh)'])
                data_time['잉여전력량(kWh)'].append(export)
                
                # 자가소비율(SCR)
                try:
                    scr = round((power - export)/power, 3)
                    data_time['자가소비율'].append(scr)
                except ZeroDivisionError:
                    data_time['자가소비율'].append(0)
                
                # 자가공급률(SSR)
                try:
                    ssr = round((power - export)/consum, 3)
                    data_time['자가공급률'].append(ssr)
                except ZeroDivisionError:
                    data_time['자가공급률'].append(0)
                
                # Type(사용)
                data_time['type'].append('use')
                    
    data_frame_time = pd.DataFrame(data_time)

In [8]:
data_frame_time.head(24)

Unnamed: 0,연도,월,일,시간,발전량(kWh),전력소비량(kWh),수전전력량(kWh),잉여전력량(kWh),자가소비율,자가공급률,type
0,2021,3,1,0,0.0,0.329999,0.329999,0.0,0.0,0.0,use
1,2021,3,1,1,0.0,0.3525,0.3525,0.0,0.0,0.0,use
2,2021,3,1,2,0.0,0.320001,0.320001,0.0,0.0,0.0,use
3,2021,3,1,3,0.0,0.305,0.305,0.0,0.0,0.0,use
4,2021,3,1,4,0.0,0.3375,0.3375,0.0,0.0,0.0,use
5,2021,3,1,5,0.0,0.33,0.33,0.0,0.0,0.0,use
6,2021,3,1,6,0.0,0.3925,0.3925,0.0,0.0,0.0,use
7,2021,3,1,7,0.032501,0.37,0.362501,0.025002,0.231,0.02,use
8,2021,3,1,8,0.234997,0.579998,0.522499,0.177498,0.245,0.099,use
9,2021,3,1,9,0.167502,0.384999,0.309999,0.092503,0.448,0.195,use


### (2) Not using Solar

In [9]:
# 결과 Dictionary
data_time2 = {}
data_time2['연도'] = []
data_time2['월'] = []
data_time2['일'] = []
data_time2['시간'] = []
data_time2['발전량(kWh)'] = []
data_time2['전력소비량(kWh)'] = []
data_time2['수전전력량(kWh)'] = []
data_time2['잉여전력량(kWh)'] = []
data_time2['자가소비율'] = []
data_time2['자가공급률'] = []
data_time2['type'] = []

In [10]:
u_year = df_all_hour_not['연도'].unique().tolist()

for y in u_year:
    date_cond1 = (df_all_hour_not['연도'] == y)
    day_filter1 = df_all_hour_not[date_cond1]
    u_month = day_filter1['월'].unique().tolist()
    
    for m in u_month:
        date_cond2 = (day_filter1['월'] == m)
        day_filter2 = day_filter1[date_cond2]
        u_day = day_filter2['일'].unique().tolist()
        
        for d in u_day:
            date_cond3 = (day_filter2['일'] == d)
            day_filter3 = day_filter2[date_cond3]
            u_hour = sorted(day_filter3['시간'].unique().tolist())
            
            for h in u_hour:
                date_cond4 = (day_filter3['시간'] == h)
                day_filter4 = day_filter3[date_cond4]
                
                # Dictionary Filling
                # 연도
                data_time2['연도'].append(y)
                
                # 월
                data_time2['월'].append(m)
                
                # 일
                data_time2['일'].append(d)
                
                # 시간
                data_time2['시간'].append(h)
                
                # 발전량(kWh)
                data_time2['발전량(kWh)'].append(np.nan)

                # 전력소비량(kWh)
                consum = np.mean(day_filter4['전력소비량(kWh)'])
                data_time2['전력소비량(kWh)'].append(consum)

                # 수전전력량(kWh)
                grid_consum = np.mean(day_filter4['수전전력량(kWh)'])
                data_time2['수전전력량(kWh)'].append(grid_consum)
                
                # 잉여전력량(kWh)
                data_time2['잉여전력량(kWh)'].append(np.nan)
                
                # 자가소비율(SCR)
                data_time2['자가소비율'].append(np.nan)
                
                # 자가공급률(SSR)
                data_time2['자가공급률'].append(np.nan)
                
                # Type(사용)
                data_time2['type'].append('not')
                    
    data_frame_time2 = pd.DataFrame(data_time2)

In [11]:
data_frame_time2.head(24)

Unnamed: 0,연도,월,일,시간,발전량(kWh),전력소비량(kWh),수전전력량(kWh),잉여전력량(kWh),자가소비율,자가공급률,type
0,2021,3,4,0,,0.455001,0.455001,,,,not
1,2021,3,4,1,,0.41,0.41,,,,not
2,2021,3,4,2,,0.39,0.39,,,,not
3,2021,3,4,3,,0.345,0.345,,,,not
4,2021,3,4,4,,0.425,0.425,,,,not
5,2021,3,4,5,,0.4425,0.4425,,,,not
6,2021,3,4,6,,0.255001,0.255001,,,,not
7,2021,3,4,7,,0.485,0.485,,,,not
8,2021,3,4,8,,0.4975,0.4975,,,,not
9,2021,3,4,9,,0.325,0.325,,,,not


### (3) Making xlsx file
변수는 동일하므로 Merging을 실행할 수 있으나, 별개 데이터 생성이 주목적이므로 생략함.

In [12]:
xlsx_name = os.path.join(folder_root, 'final_data_hour_use_average.xlsx')
data_frame_time.to_excel(xlsx_name, sheet_name='hour_average', index=False)

In [13]:
xlsx_name2 = os.path.join(folder_root, 'final_data_hour_not_average.xlsx')
data_frame_time2.to_excel(xlsx_name2, sheet_name='hour_average', index=False)