In [11]:
import pandas as pd
import re

import _codes
import _util
import _config
#re
pattern = re.compile(r'\d/4')

#file_path setting
files = _util.get_files(_config.hospital_read_path)

df_list = []
for file in files:
    df_list.append(pd.read_csv(_config.hospital_read_path + '\\' + file, encoding=_config.euc_kr))
df = pd.concat(df_list, ignore_index=False)

_cols = {}
for col in df.columns:
    val = str(col)
    if col.count('20') > 0 and col.count('.1/4') > 0: val = val.replace('.1/4', '')
    elif col.count('시군구') > 0: val = 'sgg_nm'
    elif col.count('Unnamed') > 0: continue

    _cols[col] = val

df.rename(columns=_cols, inplace=True)
df = df[list(_cols.values())]
df = pd.DataFrame(df)

num_cols = [col for col in df.columns if col.isnumeric()]
not_num_cols = [col for col in df.columns if not col.isnumeric()]

df = df.melt(
    id_vars=not_num_cols
    , value_vars=num_cols
    , var_name='year'
    , value_name='개수'
    , ignore_index=False
)
df.reset_index(drop=True, inplace=True)
# df.to_csv(_config.hospital_write_path + '\\' + 'hospital' + '.csv', encoding=_config.euc_kr, index=False)

sido_cd = None
for idx, row in df.iterrows():
    sgg = str(row['sgg_nm']).strip()

    if sgg == '고양시 일산 동구': 
        sgg = '고양시 일산동구'
        df.loc[idx, 'sgg_nm'] = sgg
    if sgg == '고양시 일산 서구': 
        sgg = '고양시 일산서구'
        df.loc[idx, 'sgg_nm'] = sgg

    sgg_split = sgg.split(' ')
    if len(sgg_split) > 1: 
        sgg = sgg_split[1]
        # print(sgg)

    #year
    year = str(row['year'])
    year = int(year.strip())

    case_no = 1
    sido_sgg_cd_map = None
    # if 2005 < year: code = _codes.sgg_codes_2000
    # elif 2010 < year: code = _codes.sgg_codes_2005
    # elif 2015 < year: code = _codes.sgg_codes_2010
    # elif 2020 < year: code = _codes.sgg_codes_2015
    # elif 2023 < year: code = _codes.sgg_codes_2020
    # elif 2023 >= year: code = _codes.sgg_codes_2023

    code = _codes.sgg_codes_2023
    sido_sgg_cd_map = _util.process_sgg_codes(code, case_no)

    #sido_cd mapping
    sido_cd = _util.sido_cd_map.get(sgg, sido_cd)

    #sgg_cd mapping2
    sido = sido_sgg_cd_map.get(sido_cd, {})
    sido = dict(sido)
    sgg_cd = sido.get(sgg, None)

    #예외
    if sgg == '전라북도': sgg_cd = '35'

    df.loc[idx, 'sgg_cd'] = f'`{str(sgg_cd)}'
    df.loc[idx, 'sido_cd'] = f'`{sido_cd}'

df = df.pivot_table(
    index=['sido_cd', 'sgg_cd', 'sgg_nm', 'year']
    , columns='요양기관종별'
    , values='개수'
    , aggfunc='sum'
)
df.reset_index(drop=False, inplace=False)

_list = []
for idx, row in df.iterrows():
    cnt = row['병원']
    
    import math
    if math.isnan(cnt): continue
    elif cnt == 0: continue

    _list.append(row)

df = pd.DataFrame(_list)
df.reset_index(drop=False, inplace=False)

_list = []
for col in df.columns:
    if col.count('계') > 0: continue
    _list.append(col)

df = df[_list]
df.to_csv(_config.hospital_write_path + '\\' + 'hospital' + '.csv', encoding=_config.euc_kr, index=False)
print("end write file")


end write file


In [19]:
import _config
import _util
import _codes
import pandas as pd
import math

def create_table(columns):
    columns = list(columns)
    for idx in range(len(columns)):
        columns[idx] = f'"{columns[idx]}"'

        if str(columns[idx]).count('sido_cd') > 0: columns[idx] = columns[idx] + ' varchar(2)'
        elif str(columns[idx]).count('sgg_cd') > 0: columns[idx] = columns[idx] + ' varchar(6)'
        elif str(columns[idx]).count('sgg_nm') > 0: columns[idx] = columns[idx] + ' varchar(20)'
        elif str(columns[idx]).count('year') > 0: columns[idx] = columns[idx] + ' varchar(4)'
        else: columns[idx] = columns[idx] + ' int4'

    sql = f'''
        do $$
        begin
            if exists (select 1 from pg_tables where tablename = 'hospital') then
            drop table hospital cascade;
            end if;
            if not exists (select 1 from pg_tables where tablename = 'hospital') then
            create table if not exists hospital(
            {', '.join(columns)}
            );
            end if;
        end $$;
    '''

    _util.execute_sql(sql)

def insert_data(df, columns):
    columns = list(columns)
    for idx in range(len(columns)):
        columns[idx] = f'"{columns[idx]}"'

    df = pd.DataFrame(df)
    data = df.values.tolist()

    for chunk in _util.chunker(data, 1000):
        values = []
        for row in chunk:
            for idx in range(len(columns)):
                if str(row[idx]).count('`') > 0:
                    val = str(row[idx]).replace('`', '')
                    row[idx] = f'\'{val}\''
                elif isinstance(row[idx], (int, float)): row[idx] = _util.objToInt(row[idx])
                else: row[idx] = f'\'{str(row[idx])}\''
                    
                if row[idx] == 'None': row[idx] = '\'\''

            row = [str(item) for item in row]
            values.append(f"({', '.join(row)})")

        sql = f'''
            insert into hospital(
                {','.join(columns)})
            values {', '.join(values)}
        '''
        _util.execute_sql(sql)

file_name = 'hospital' + '.csv'
file_path = _config.hospital_write_path + '\\' + file_name

df = pd.read_csv(file_path, encoding=_config.euc_kr)
create_table(df.columns)
insert_data(df, df.columns)



        do $$
        begin
            if exists (select 1 from pg_tables where tablename = 'hospital') then
            drop table hospital cascade;
            end if;
            if not exists (select 1 from pg_tables where tablename = 'hospital') then
            create table if not exists hospital(
            "sido_cd" varchar(2), "sgg_cd" varchar(6), "sgg_nm" varchar(20), "year" varchar(4), "병원" int4, "보건소" int4, "보건의료원" int4, "보건지소" int4, "보건진료소" int4, "상급종합병원" int4, "약국" int4, "요양병원" int4, "의원" int4, "정신병원" int4, "조산원" int4, "종합병원" int4, "치과병원" int4, "치과의원" int4, "한방병원" int4, "한의원" int4
            );
            end if;
        end $$;
    


AttributeError: module '_util' has no attribute 'objToInt'