#### 설비 갯 수가 계산된 학습용 전처리 데이터에 전주 좌표 추가

In [2]:
import pandas as pd

import matplotlib.pyplot as plt
from freeman.plt_setting import plt_settings
from freeman.aiddd.data_manager import read_data, write_data

# 차트 한글처리 지원
plt_settings()

#### 데이터 불러오기

In [3]:
# 설비 갯 수가 추가된 학습용 데이터
df_data = read_data('3rd pp counts-base-on-cons-1st')
# 유의미한 좌표가 추가된 3차 제공 데이터
df_pole = read_data('3rd provide pole')

#### 데이터 전처리

##### 공사비 레코드에 전주 좌표 추가

In [4]:
# 전주 데이터에서 좌표 추가를 위해 필요한 컬럼만 추출
df_pole_xy = df_pole[['공사번호', '전산화번호', 'GISID', 'X좌표-Y좌표']].copy()

In [5]:
# 컬럼명 변경: 사용하기 좋게 영문으로
rename_columns = {
    '공사번호': 'cons_no', '전산화번호': 'comp_no', 
    'GISID': 'gisid', 'X좌표-Y좌표': 'position'
}
df_pole_xy.rename(columns=rename_columns, inplace=True)

In [9]:
# `position` 컬럼을 이용해 좌표관련 컬럼 추가
df_pole_xy[['x', 'y', 'temp1', 'temp2']] = \
    df_pole_xy.position.str.split(',', expand=True)

In [10]:
# 공사번호 기준 전산화번호로 정렬
#  - 전산화번호별로 정렬된 데이터의 인덱스가 공사번호별로 일정하게 증가함
#  - GISID로 정렬하면 일정하지 않음
df_pole_xy = df_pole_xy.sort_values(by=['cons_no', 'comp_no'])

In [11]:
# 공사비에 있는 공사번호만 남기고 나머지 레코드는 제거
df_pole_xy = df_pole_xy[df_pole_xy.cons_no.isin(df_data.cons_no)]

In [12]:
# 공사번호별 전주(최대 10개) x,y좌표 붙이기
MAX_POLE_COUNTS = 10
merge_data = []

for cons_no in df_pole_xy.cons_no.unique():
    temp_df = df_pole_xy[df_pole_xy.cons_no == cons_no]
    x_values = temp_df['x'].tolist()
    y_values = temp_df['y'].tolist()
    values_size = len(x_values)
    new_data = []
    for i in range(MAX_POLE_COUNTS):
        if i < values_size:
            append_data = [float(x_values[i]), float(y_values[i])]
        else:
            append_data = [0.0, 0.0]
        new_data += append_data
    new_data = [cons_no] + new_data
    merge_data.append(new_data)
    
# 컬럼명만들기
column_names = ['cons_no']
for index in range(MAX_POLE_COUNTS):
    column_names.append(f'pole{index+1}_x')
    column_names.append(f'pole{index+1}_y')
    
# 데이터프레임만들기
df_position = pd.DataFrame(merge_data, columns=column_names)

In [13]:
df_position.head()

Unnamed: 0,cons_no,pole1_x,pole1_y,pole2_x,pole2_y,pole3_x,pole3_y,pole4_x,pole4_y,pole5_x,...,pole6_x,pole6_y,pole7_x,pole7_y,pole8_x,pole8_y,pole9_x,pole9_y,pole10_x,pole10_y
0,442720173009,127.497907,36.638881,127.49573,36.647452,127.495709,36.647546,127.495254,36.644221,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,442720174402,127.534065,36.725321,127.533852,36.725459,127.533855,36.725922,127.534358,36.725188,127.534667,...,127.5345,36.725774,127.534287,36.726108,0.0,0.0,0.0,0.0,0.0,0.0
2,442720193782,127.526735,36.563426,127.526971,36.563041,127.527371,36.563285,127.52706,36.56345,127.527329,...,127.527288,36.563843,127.52753,36.563658,0.0,0.0,0.0,0.0,0.0,0.0
3,442720193806,127.547019,36.561259,127.547143,36.560796,127.541254,36.559375,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,442720194411,127.491587,36.664314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
# 전처리 레코드에 전주좌표 추가
df_data = pd.merge(
    df_data, df_position,
    left_on='cons_no', right_on='cons_no', how='left'
)

In [15]:
write_data('3rd pp pole-position-on-cons-1st', df_data)