# 프로세스 준비

In [1]:
import pandas as pd
import warnings, os, shutil
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [2]:
warnings.simplefilter(action='ignore')
pd.options.display.max_columns = None

## 데이터 확인

In [3]:
dataDir = "4S data"
S2DataDir = "2022-01-01-2022-12-01_Time-Series"
outputDir = "result data"

files_list_4S = os.listdir(dataDir)
files_list_STNL = os.listdir(S2DataDir)

files_list_4S = [file for file in files_list_4S if file.endswith('.xlsx')]
files_list_STNL = [file for file in files_list_STNL if file.endswith('.csv')]

In [None]:
print('4S file list :', files_list_4S)
print(f"4S file : {len(files_list_4S)}개")
print('='*100)
print('S2 file list :', files_list_STNL)
print(f"Sentinel-2 file : {len(files_list_STNL)}개")

In [None]:
loc_list = list(map(lambda x: x.strip('.xlsx'), files_list_4S))
files_list_STNL_matched = []

for location_4S in loc_list:
    for location_S2 in files_list_STNL:
        if location_4S in location_S2:
            files_list_STNL_matched.append(location_S2)

print(f"Location 목록 : {loc_list}")
print(f"조인 가능 데이터 목록 : {files_list_STNL_matched}")

## 4S 데이터 로드

In [6]:
''' [Location명]_4S 변수 이름으로 데이터프레임 생성 '''
for file in files_list_4S:
    globals()[f'{file.strip(".xlsx")}_4S'] = pd.read_excel(f"{dataDir}\\{file}")

## 센티넬 데이터 로드

In [7]:
''' [Location명]_S2 변수 이름으로 데이터프레임 생성 '''
for file in files_list_STNL_matched:
    globals()[f"{file.strip('.csv')}"] = pd.read_csv(f"{S2DataDir}\\{file}", encoding='utf-8')

---

# 데이터 전처리

## 4S 데이터 전처리 및 센티넬 데이터와 조인

In [8]:
''' 폴더 생성 '''
if os.path.exists(outputDir):
    shutil.rmtree(outputDir)
os.makedirs(outputDir)


for location in loc_list:
    
    ''' 4S 데이터 전처리 '''
    df_4S = globals()[f'{location}_4S']
    df_4S = df_4S.drop(columns="Unnamed: 0")
    df_4S.dropna(inplace=True)
    df_4S[['Year', 'Month', 'Date']] = df_4S[['Year', 'Month', 'Date']].astype('uint64').astype('string')
    df_4S.rename(columns={'Date':'Day'}, inplace=True)

    datePadding = lambda x: '0' + x if len(x) == 1 else x
    df_4S['Month'] = df_4S['Month'].apply(datePadding)
    df_4S['Day'] = df_4S['Day'].apply(datePadding)
    df_4S['DATE'] = df_4S['Year'] + '-' + df_4S['Month'] + '-' + df_4S['Day']

    try:
        df_4S = df_4S[['DATE', 'Ref_BLU', 'Ref_GRN', 'Ref_RED', 'Ref_NIR', 'Ref_PAR', 'NDVI']]
    except:
        df_4S = df_4S[['DATE', 'Ref_BLU', 'Ref_GRN', 'Ref_RED', 'Ref_NIR', 'NDVI']]

    df_4S['DATE'] = df_4S['DATE'].astype('datetime64')
    df_4S.set_index('DATE', inplace=True)


    ''' 센티넬 데이터 조인 준비 '''
    df_S2 = globals()[f'{location}_S2']
    df_S2['DATE'] = df_S2['DATE'].astype('datetime64')

    buff_5M_cols = {'Blue':'BLUE_5M', 'Green':'GREEN_5M', 'Red':'RED_5M', 'NIR':'NIR_5M', 'NDVI':'NDVI_5M'}
    buff_10M_cols = {'Blue':'BLUE_10M', 'Green':'GREEN_10M', 'Red':'RED_10M', 'NIR':'NIR_10M', 'NDVI':'NDVI_10M'}
    buff_15M_cols = {'Blue':'BLUE_15M', 'Green':'GREEN_15M', 'Red':'RED_15M', 'NIR':'NIR_15M', 'NDVI':'NDVI_15M'}
    buff_20M_cols = {'Blue':'BLUE_20M', 'Green':'GREEN_20M', 'Red':'RED_20M', 'NIR':'NIR_20M', 'NDVI':'NDVI_20M'}
    buff_25M_cols = {'Blue':'BLUE_25M', 'Green':'GREEN_25M', 'Red':'RED_25M', 'NIR':'NIR_25M', 'NDVI':'NDVI_25M'}
    buff_30M_cols = {'Blue':'BLUE_30M', 'Green':'GREEN_30M', 'Red':'RED_30M', 'NIR':'NIR_30M', 'NDVI':'NDVI_30M'}
    
    df_5M = df_S2[df_S2['BUFF_DIST'] == 5][['DATE', 'Blue', 'Green', 'Red', 'NIR', 'NDVI']].rename(columns=buff_5M_cols).groupby('DATE').mean()
    df_10M = df_S2[df_S2['BUFF_DIST'] == 10][['DATE', 'Blue', 'Green', 'Red', 'NIR', 'NDVI']].rename(columns=buff_10M_cols).groupby('DATE').mean()
    df_15M = df_S2[df_S2['BUFF_DIST'] == 15][['DATE', 'Blue', 'Green', 'Red', 'NIR', 'NDVI']].rename(columns=buff_15M_cols).groupby('DATE').mean()
    df_20M = df_S2[df_S2['BUFF_DIST'] == 20][['DATE', 'Blue', 'Green', 'Red', 'NIR', 'NDVI']].rename(columns=buff_20M_cols).groupby('DATE').mean()
    df_25M = df_S2[df_S2['BUFF_DIST'] == 25][['DATE', 'Blue', 'Green', 'Red', 'NIR', 'NDVI']].rename(columns=buff_25M_cols).groupby('DATE').mean()
    df_30M = df_S2[df_S2['BUFF_DIST'] == 30][['DATE', 'Blue', 'Green', 'Red', 'NIR', 'NDVI']].rename(columns=buff_30M_cols).groupby('DATE').mean()


    ''' 조인 결과 저장 '''
    result_df = pd.concat([df_4S, df_5M, df_10M, df_15M, df_20M, df_25M, df_30M], axis=1, join='inner')
    result_df.to_excel(f'{outputDir}\\{location}_total.xlsx', engine='xlsxwriter')
    globals()[f'{location}_total'] = result_df.copy()

---

# 통계 데이터 생성

## 평균

- TODO
    - 버퍼별 평균 - 필드 평균 : 오차 테이블 생성
    - 지역별 통계 시트에 나눠서 작성
    - 플로팅

In [61]:
for Location in loc_list:    
    BLUE_mean = pd.DataFrame(globals()[f'{Location}_total'][['BLUE_5M', 'BLUE_10M', 'BLUE_15M', 'BLUE_20M', 'BLUE_25M', 'BLUE_30M', 'Ref_BLU']].mean()).rename(
                index={'Ref_BLU':'Field', 'BLUE_5M':'5M', 'BLUE_10M':'10M', 'BLUE_15M':'15M', 'BLUE_20M':'20M', 'BLUE_25M':'25M', 'BLUE_30M':'30M'})
    GREEN_mean = pd.DataFrame(globals()[f'{Location}_total'][['GREEN_5M', 'GREEN_10M', 'GREEN_15M', 'GREEN_20M', 'GREEN_25M', 'GREEN_30M', 'Ref_GRN']].mean()).rename(
                index={'Ref_GRN':'Field', 'GREEN_5M':'5M', 'GREEN_10M':'10M', 'GREEN_15M':'15M', 'GREEN_20M':'20M', 'GREEN_25M':'25M', 'GREEN_30M':'30M'})
    RED_mean = pd.DataFrame(globals()[f'{Location}_total'][['RED_5M', 'RED_10M', 'RED_15M', 'RED_20M', 'RED_25M', 'RED_30M', 'Ref_RED']].mean()).rename(
                index={'Ref_RED':'Field', 'RED_5M':'5M', 'RED_10M':'10M', 'RED_15M':'15M', 'RED_20M':'20M', 'RED_25M':'25M', 'RED_30M':'30M'})
    NIR_mean = pd.DataFrame(globals()[f'{Location}_total'][['NIR_5M', 'NIR_10M', 'NIR_15M', 'NIR_20M', 'NIR_25M', 'NIR_30M', 'Ref_NIR']].mean()).rename(
                index={'Ref_NIR':'Field', 'NIR_5M':'5M', 'NIR_10M':'10M', 'NIR_15M':'15M', 'NIR_20M':'20M', 'NIR_25M':'25M', 'NIR_30M':'30M'})
    NDVI_mean = pd.DataFrame(globals()[f'{Location}_total'][['NDVI_5M', 'NDVI_10M', 'NDVI_15M', 'NDVI_20M', 'NDVI_25M', 'NDVI_30M', 'NDVI']].mean()).rename(
                index={'NDVI':'Field', 'NDVI_5M':'5M', 'NDVI_10M':'10M', 'NDVI_15M':'15M', 'NDVI_20M':'20M', 'NDVI_25M':'25M', 'NDVI_30M':'30M'})

    mean_df = pd.concat([BLUE_mean,GREEN_mean,RED_mean,NIR_mean,NDVI_mean], axis=1)
    mean_df.columns = ['BLUE', 'GREEN', 'RED', 'NIR', 'NDVI']
    globals()[f'{Location}_mean'] = mean_df.copy()

## 표준편차

In [62]:
for Location in loc_list:
    BLUE_std = pd.DataFrame(globals()[f'{Location}_total'][['BLUE_5M', 'BLUE_10M', 'BLUE_15M', 'BLUE_20M', 'BLUE_25M', 'BLUE_30M', 'Ref_BLU']].std()).rename(
                index={'Ref_BLU':'Field', 'BLUE_5M':'5M', 'BLUE_10M':'10M', 'BLUE_15M':'15M', 'BLUE_20M':'20M', 'BLUE_25M':'25M', 'BLUE_30M':'30M'})
    GREEN_std = pd.DataFrame(globals()[f'{Location}_total'][['GREEN_5M', 'GREEN_10M', 'GREEN_15M', 'GREEN_20M', 'GREEN_25M', 'GREEN_30M', 'Ref_GRN']].std()).rename(
                index={'Ref_GRN':'Field', 'GREEN_5M':'5M', 'GREEN_10M':'10M', 'GREEN_15M':'15M', 'GREEN_20M':'20M', 'GREEN_25M':'25M', 'GREEN_30M':'30M'})
    RED_std = pd.DataFrame(globals()[f'{Location}_total'][['RED_5M', 'RED_10M', 'RED_15M', 'RED_20M', 'RED_25M', 'RED_30M', 'Ref_RED']].std()).rename(
                index={'Ref_RED':'Field', 'RED_5M':'5M', 'RED_10M':'10M', 'RED_15M':'15M', 'RED_20M':'20M', 'RED_25M':'25M', 'RED_30M':'30M'})
    NIR_std = pd.DataFrame(globals()[f'{Location}_total'][['NIR_5M', 'NIR_10M', 'NIR_15M', 'NIR_20M', 'NIR_25M', 'NIR_30M', 'Ref_NIR']].std()).rename(
                index={'Ref_NIR':'Field', 'NIR_5M':'5M', 'NIR_10M':'10M', 'NIR_15M':'15M', 'NIR_20M':'20M', 'NIR_25M':'25M', 'NIR_30M':'30M'})
    NDVI_std = pd.DataFrame(globals()[f'{Location}_total'][['NDVI_5M', 'NDVI_10M', 'NDVI_15M', 'NDVI_20M', 'NDVI_25M', 'NDVI_30M', 'NDVI']].std()).rename(
                index={'NDVI':'Field', 'NDVI_5M':'5M', 'NDVI_10M':'10M', 'NDVI_15M':'15M', 'NDVI_20M':'20M', 'NDVI_25M':'25M', 'NDVI_30M':'30M'})

    std_df = pd.concat([BLUE_std,GREEN_std,RED_std,NIR_std,NDVI_std], axis=1)
    std_df.columns = ['BLUE', 'GREEN', 'RED', 'NIR', 'NDVI']
    globals()[f'{Location}_std'] = std_df.copy()

# 데이터 시각화

## 전체일자 지역별 평균

In [63]:
location = widgets.Dropdown(options=loc_list)

@interact
def showByLocationMean(Location = location):
    return globals()[f'{Location}_mean']

interactive(children=(Dropdown(description='Location', options=('AMD', 'GCK', 'HAWS1', 'JJ', 'PYC'), value='AM…

## 전체일자 지역별 표준편차

In [64]:
location2 = widgets.Dropdown(options=loc_list)

@interact
def showByLocationMean(Location = location2):
    return globals()[f'{Location}_std']

interactive(children=(Dropdown(description='Location', options=('AMD', 'GCK', 'HAWS1', 'JJ', 'PYC'), value='AM…