#### 공사비 공사번호 기준 설비 카운팅 + 좌표추가
* 전주 1~10, 전선 1~11

In [1]:
import pandas as pd

import matplotlib.pyplot as plt
from freeman.plt_setting import plt_settings
from freeman.aiddd.data_manager import read_data, write_data

In [2]:
# 차트 한글처리 지원
plt_settings()

#### 데이터 불러오기

In [3]:
df_data = read_data('2nd pp counts-base-on-cons-1st')
df_pole = read_data('2nd provide pole')

#### 데이터 전처리

In [4]:
# 필요 컬럼만 추출
df_pole_xy = df_pole[['공사번호', '전산화번호', 'GISID', 'X좌표-Y좌표']].copy()

In [5]:
# 컬럼명 변경: 사용하기 좋게 영문으로
rename_columns = {
    '공사번호': 'const_no', '전산화번호': 'compute_no', 
    'GISID': 'gisid', 'X좌표-Y좌표': 'position'
}
df_pole_xy.rename(columns=rename_columns, inplace=True)

In [6]:
# 좌표관련 컬럼 추가
df_pole_xy[['x', 'y', 'temp1', 'temp2']] = \
    df_pole_xy['position'].str.split(',', expand=True)

In [7]:
# 공사번호 기준 전산화번호로 정렬
df_pole_xy = df_pole_xy.sort_values(by=['const_no', 'compute_no'])

In [8]:
# 공사비에 있는 공사번호만 남기고 나머지 레코드는 제거
df_pole_xy = df_pole_xy[df_pole_xy['const_no'].isin(df_data['공사번호'])]

In [9]:
# 공사번호별 전주(최대 10개) x,y좌표 붙이기
MAX_POLE_COUNTS = 10
merge_data = []

for const_no in df_pole_xy['const_no'].unique():
    temp_df = df_pole_xy[df_pole_xy['const_no'] == const_no]
    x_values = temp_df['x'].tolist()
    y_values = temp_df['y'].tolist()
    values_size = len(x_values)
    new_data = []
    for i in range(MAX_POLE_COUNTS):
        if i < values_size:
            append_data = [float(x_values[i]), float(y_values[i])]
        else:
            append_data = [0.0, 0.0]
        new_data += append_data
    new_data = [const_no] + new_data
    merge_data.append(new_data)
    
# 컬럼명만들기
column_names = ['const_no']
for index in range(MAX_POLE_COUNTS):
    column_names.append(f'pole{index+1}_x')
    column_names.append(f'pole{index+1}_y')
    
# 데이터프레임만들기
df_position = pd.DataFrame(merge_data, columns=column_names)

In [10]:
df_position.head()

Unnamed: 0,const_no,pole1_x,pole1_y,pole2_x,pole2_y,pole3_x,pole3_y,pole4_x,pole4_y,pole5_x,...,pole6_x,pole6_y,pole7_x,pole7_y,pole8_x,pole8_y,pole9_x,pole9_y,pole10_x,pole10_y
0,442720173009,14193000.0,4388890.0,14192760.0,4390079.0,14192760.0,4390092.0,14192710.0,4389631.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,442720174402,14197030.0,4400889.0,14197000.0,4400908.0,14197000.0,4400972.0,14197060.0,4400870.0,14197090.0,...,14197080.0,4400952.0,14197050.0,4400998.0,0.0,0.0,0.0,0.0,0.0,0.0
2,442720193782,14196210.0,4378428.0,14196240.0,4378374.0,14196280.0,4378408.0,14196250.0,4378431.0,14196280.0,...,14196270.0,4378485.0,14196300.0,4378460.0,0.0,0.0,0.0,0.0,0.0,0.0
3,442720193806,14198470.0,4378127.0,14198480.0,4378063.0,14197830.0,4377866.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,442720194411,14192300.0,4392419.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
df_pp_data = pd.merge(
    df_data, df_position,
    left_on='공사번호', right_on='const_no', how='left'
)

In [12]:
df_pp_data.head()

Unnamed: 0,공사번호,총공사비,최종변경일시,year,month,day,dayofweek,dayofyear,최종변경자사번,사번코드,...,pole6_x,pole6_y,pole7_x,pole7_y,pole8_x,pole8_y,pole9_x,pole9_y,pole10_x,pole10_y
0,477420183473,37874518,2021-02-25 09:10:18,2021,2,25,3,56,1988103181,AAA,...,14293060.0,4452301.0,14292770.0,4451661.0,14292800.0,4451671.0,14292810.0,4451665.0,0.0,0.0
1,477420183616,31478585,2021-02-25 13:52:47,2021,2,25,3,56,MDE1706013,MDE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,477420183728,30268679,2021-04-12 08:11:24,2021,4,12,0,102,MDP2100086,MDP,...,14306510.0,4449207.0,14306580.0,4449207.0,14306690.0,4449164.0,14306670.0,4449210.0,14306650.0,4449241.0
3,477420193243,11598900,2022-11-21 17:45:02,2022,11,21,0,325,MDE1900011,MDE,...,14292740.0,4415728.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,477420193349,23535217,2020-10-19 09:06:23,2020,10,19,0,293,MDE1900011,MDE,...,14287680.0,4459526.0,14287650.0,4459496.0,14287690.0,4459523.0,0.0,0.0,0.0,0.0


In [13]:
write_data('2nd pp pole-position-on-cons-1st', df_pp_data)