#### 전주 순번 및 전주와 지선주 갯 수 계산

In [1]:
import ast
import pandas as pd
from datetime import datetime

from freeman.aiddd.data_manager import read_data, write_data

#### 데이터 불러오기

In [6]:
_start_time = datetime.now()

# 4차 전처리: 공사비 + 전주 + 전선 데이터 
_df_cons = read_data(
    '1d-merge-cons-pole-line', process_seq='4th', dtype={'cons_id': str}
)

# 4차 전처리: 전주 데이터(전선 정보를 이용해 순서 정하고 좌표 지정하기 위해)
_df_pole = read_data(
    '1c-preprocessed-pole', process_seq='4th', dtype={'cons_id': str}
)

# 4차 전처리: 전선 데이터
_df_line = read_data(
    '1d-preprocessed-line', process_seq='4th', dtype={'cons_id': str}
)

print(
    f'Total Elapsed Time for Data Load: {datetime.now() - _start_time}\n'
    f'Preprocessed Data(cons) Shape: {_df_cons.shape}\n'
    f'Preprocessed Data(pole) Shape: {_df_pole.shape}\n'
    f'Preprocessed Data(line) Shape: {_df_line.shape}'
)

Total Elapsed Time for Data Load: 0:00:00.232162
Preprocessed Data(cons) Shape: (14728, 81)
Preprocessed Data(pole) Shape: (26920, 20)
Preprocessed Data(line) Shape: (29704, 51)


In [7]:
df_cons, df_pole, df_line = _df_cons.copy(), _df_pole.copy(), _df_line.copy()

#### df_pole 전산화번호 dict 생성
* {
    'comp_id': [x, y]
  }

In [8]:
dict_pole = df_pole[['comp_id', 'x', 'y']]\
    .set_index('comp_id').T.to_dict('list')

# 확인
dict_pole[df_pole.iloc[1].comp_id]

[128.393165784208, 36.8303489085259]

#### 공사번호별 전선 순서 계산

In [59]:
# 공사비 유일값
unique_cons_ids = df_cons.cons_id.unique()
# 공사비 유일값에 들어갈 pole paths
cons_id_pole_paths = []

In [60]:
def get_xy(comp_id):
    xy = dict_pole.get(comp_id, [0, 0])
    return xy[0], xy[1]

def append_path(paths, df_curr_pole, comp_id):
    # 시작전주가 현 공사번호의 전주에 있으면 신설(1) 없으면 기설(0)
    is_new = 1 if (df_curr_pole.comp_id == start_pole).any() else 0
    paths.append(is_new)
    paths.append(comp_id)
    x, y = get_xy(comp_id=comp_id)
    paths.append(x)
    paths.append(y)
        

In [61]:
# 공사번호별 전주 및 전선 데이터에 있는 전산화번호 비교
for cons_id in unique_cons_ids:
    new_pole_paths = [cons_id]
    df_curr_pole = df_pole[df_pole.cons_id == cons_id]
    df_curr_line = df_line[df_line.cons_id == cons_id]
    
    # 지선주 갯 수: df_curr_not_line.shape[0]
    # 실제 전주 갯 수: pole_cnts - 지선주 갯 수
    df_curr_not_line = df_curr_pole[
        ~df_curr_pole.comp_id.isin(df_curr_line.comp_id) &
        ~df_curr_pole.comp_id.isin(df_curr_line.from_comp_id)
    ]
    # 실제 전주 갯 수, 지지전주 갯 수 등록
    curr_pole_counts = df_curr_pole.shape[0]
    support_pols_counts = df_curr_not_line.shape[0]
    new_pole_paths.append(curr_pole_counts-support_pols_counts)
    new_pole_paths.append(support_pols_counts)
    
    # 전선에 연결된 전주들
    comp_id_values = df_curr_line.comp_id.tolist()
    from_comp_id_values = df_curr_line.from_comp_id.tolist()
    # 기기간 거리
    span_values = df_curr_line.span.tolist()
    # 전산화번호나 전원측전산화번호에만 있는 전주들
    only_comp_id_values = [
        item for item in comp_id_values if item not in from_comp_id_values
    ]
    only_from_comp_id_values = [
        item for item in from_comp_id_values if item not in comp_id_values
    ]
    
    processed_comp_id = []
    is_exception = False
    for start_pole in only_from_comp_id_values:
        # 하나의 전주에서 여러개 전주로 진행되는 공사는 제외
        if start_pole in processed_comp_id:
            is_exception = True
            break
        processed_comp_id.append(start_pole)
        append_path(new_pole_paths, df_curr_pole, comp_id=start_pole)
        
        # 전주 순번 지정
        # 회전을 1 추가한 이유는 정상적으로 종료되면 마지막 next_span값이 체워지지 않아
        # 강제로 에러를 발생시켜 0을 추가하기 위함
        loop_size = len(from_comp_id_values) + 1
        next_value = start_pole
        for _ in range(loop_size):
            # next_index = from_comp_id_values.index(next_value)
            try:
                next_index = from_comp_id_values.index(next_value)
            except ValueError as ve:
                # 다음 길이가 0이면 전주간 전선이 연결되지 않는 구간임
                next_span = 0
                new_pole_paths.append(next_span)
                break
            next_span = span_values[next_index]
            new_pole_paths.append(next_span)
            
            next_value = comp_id_values[next_index]
            append_path(new_pole_paths, df_curr_pole, comp_id=next_value)
    
    if is_exception is not True:
        cons_id_pole_paths.append(new_pole_paths)

In [None]:
# 컬럼명 만들기
column_names = ['cons_id']


In [62]:
df_pole[df_pole.cons_id=='477420193349']

Unnamed: 0,cons_id,comp_id,x,y,pole_shape_G,pole_shape_O,pole_shape_V,pole_type_1,pole_type_B,pole_type_C,pole_type_E,pole_type_H,pole_type_M,pole_spec_6.0,pole_spec_8.0,pole_spec_10.0,pole_spec_11.0,pole_spec_12.0,pole_spec_14.0,pole_spec_16.0
6,477420193349,7103S622,128.345893,37.146079,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0
7,477420193349,7103S723,128.346364,37.145936,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0
8,477420193349,7103S724,128.346539,37.146082,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1
9,477420193349,7103S725,128.346487,37.146335,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1
10,477420193349,7103S821,128.348403,37.146904,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0
11,477420193349,7103S921,128.348422,37.146354,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0
12,477420193349,7103Y021,128.348137,37.146136,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0
13,477420193349,7103Y121,128.348501,37.146327,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0


In [64]:
for cons_id in ['477420193827']:
    new_pole_paths = [cons_id]
    df_curr_pole = df_pole[df_pole.cons_id == cons_id]
    df_curr_line = df_line[df_line.cons_id == cons_id]
    
    # 지선주 갯 수: df_curr_not_line.shape[0]
    # 실제 전주 갯 수: pole_cnts - 지선주 갯 수
    df_curr_not_line = df_curr_pole[
        ~df_curr_pole.comp_id.isin(df_curr_line.comp_id) &
        ~df_curr_pole.comp_id.isin(df_curr_line.from_comp_id)
    ]
    
    # 전선에 연결된 전주들
    comp_id_values = df_curr_line.comp_id.tolist()
    from_comp_id_values = df_curr_line.from_comp_id.tolist()
    # 기기간 거리
    span_values = df_curr_line.span.tolist()
    # 전산화번호나 전원측전산화번호에만 있는 전주들
    only_comp_id_values = [
        item for item in comp_id_values if item not in from_comp_id_values
    ]
    only_from_comp_id_values = [
        item for item in from_comp_id_values if item not in comp_id_values
    ]
    
    print(
        f'{df_curr_pole.comp_id.tolist()}\n'
        f'{df_curr_not_line.comp_id.tolist()}\n'
        f'{comp_id_values}{only_comp_id_values}\n'
        f'{from_comp_id_values}{only_from_comp_id_values}'
    )

['7696C162', '7696C171', '7696C262']
[]
['7696C162', '7696C171', '7696C262']['7696C171']
['7696C262', '7696C162', '7696C351']['7696C351']


In [58]:
[item for item in cons_id_pole_paths if item[0] == '476920213814']

[['476920213814',
  0,
  '32594442',
  0,
  0,
  28,
  0,
  '3259Y871',
  127.718301135788,
  36.3552077923443,
  0,
  0,
  '32574983',
  0,
  0,
  52,
  0,
  '3357P061',
  127.724966302741,
  36.3230749349085,
  0,
  0,
  '3455F142',
  0,
  0,
  38,
  0,
  '3455F053',
  0,
  0,
  0,
  0,
  '3359H932',
  0,
  0,
  46,
  0,
  '3459C032',
  127.747669758426,
  36.3619745481094,
  0,
  0,
  '3557G851',
  0,
  0,
  38,
  0,
  '3557G752',
  127.784727522574,
  36.3267722161864,
  0,
  0,
  '3560X641',
  0,
  0,
  52,
  0,
  '3560X741',
  0,
  0,
  39,
  0,
  '3560X841',
  0,
  0,
  0,
  0,
  '3757Q641',
  0,
  0,
  45,
  0,
  '3757Q631',
  0,
  0,
  38,
  0,
  '37573862',
  0,
  0,
  0]]