In [20]:
##make a sgg_cd setting csv file

import pandas as pd
import _config
import _codes
import _util

file_nm = '05_22_school'
df = pd.read_csv(_config.school_read_path + '\\' + file_nm + '.csv', encoding=_config.euc_kr)
drop_value = '면적'
indexes_to_drop = df[df['항목'].str.contains('면적')].index
df = df.drop(indexes_to_drop)

#row to column
df = pd.melt(
    df
    , id_vars=['소재지(시군구)별', '교육과정별', '항목', '단위']
    , value_vars=[f'{year} 년' for year in range(2005, 2023)]
    , var_name='year'
    , value_name='값'
    , ignore_index=False
)
# df = df.dropna(subset=['year'])
new_data_list = []
for idx, row in df.iterrows():
    if row['값'] == 0: continue
    if pd.isna(row['값']): continue

    new_data_list.append(row)
df = pd.DataFrame(new_data_list)

# df = df.dropna(subset=['값'])
df.reset_index(drop=True, inplace=True)

sido_cd = None
for idx, row in df.iterrows():
    sgg = row['소재지(시군구)별']

    #sido_cd setting
    sido_cd = _util.sido_cd_map.get(sgg, sido_cd)
    df.loc[idx, 'sido_cd'] = sido_cd

    #year
    year = str(row['year']).replace('년', '')
    year = int(year.strip())

    if year < 2005: sido_sgg_cd_map = _util.process_sgg_codes(_codes.sgg_codes_2000, 1)
    elif year < 2010: sido_sgg_cd_map = _util.process_sgg_codes(_codes.sgg_codes_2005, 1)
    elif year < 2015: sido_sgg_cd_map = _util.process_sgg_codes(_codes.sgg_codes_2010, 1)
    elif year < 2020: sido_sgg_cd_map = _util.process_sgg_codes(_codes.sgg_codes_2015, 1)
    elif year < 2023: sido_sgg_cd_map = _util.process_sgg_codes(_codes.sgg_codes_2020, 1)
    elif year >= 2023: sido_sgg_cd_map = _util.process_sgg_codes(_codes.sgg_codes_2023, 1)
    else: 
        # print(year)
        raise Exception
    
    # year 4words setting
    df.loc[idx, 'year'] = f'`{year}'

    # 시도 코드 매핑
    sido_cd = _util.sido_cd_map.get(sgg, sido_cd)
    df.loc[idx, 'sido_cd'] = f'`{str(sido_cd)}'  # 시도 코드 할당

    # 시군구 코드 매핑2
    sido = sido_sgg_cd_map.get(sido_cd, {})
    sido = dict(sido)
    sgg_cd = sido.get(sgg, None)

    if sgg == '전라북도': sgg_cd = '35'
    df.loc[idx, 'sgg_cd'] = f'`{str(sgg_cd)}'

#row to column
df = df.pivot_table(index=['sido_cd', 'sgg_cd', '소재지(시군구)별', 'year', '항목', '단위'], 
    columns='교육과정별', 
    values='값',
    aggfunc='sum',
    fill_value=0
)  # 여기서 aggfunc는 데이터 집계 방법을 정의합니다.
df = df.reset_index()

df = df.drop(columns='각종학교', errors='raise')
column_list = [
    'sido_cd', 'sgg_cd', '소재지(시군구)별', 'year', '항목'
    , '유치원', '초등학교', '중학교', '고등학교', '특수학교', '계'
]
#column sort
df = df[column_list]

df.to_csv(_config.school_write_path + '\\' + file_nm + '.csv', encoding=_config.euc_kr, index=False)

In [3]:
## db create table && data insert

import pandas as pd
import _util
import _config
import _codes

files = _util.get_files(_config.school_write_path)
files = [file for file in files if file.count('school_')]

df_list = []
for file in files:
    df_list.append(pd.read_csv(_config.school_write_path + '\\' + file, encoding=_config.euc_kr))

df = pd.concat(df_list, ignore_index=False)
cols = list(df.columns)

if len(cols) == 0: exit()

_cols = []
for col in cols:
    col = f'"{col}"'

    if col.count('sido_cd') > 0: val = f'{col} varchar(2)'
    elif col.count('sgg_cd'): val = f'{col} varchar(5)'
    elif col.count('sgg_nm'): val = f'{col} varchar(20)'
    elif col.count('year'): val = f'{col} varchar(4)'
    elif col.count('항목'): val = f'{col} varchar(10)'
    else: val = f'{col} float8'
    _cols.append(val)
sql = f'''
    do $$
        begin
        if exists (select 1 from pg_tables where tablename = 'school') then
        drop table school cascade;
        end if;
        if not exists (select 1 from pg_tables where tablename = 'school') then
        create table school(
            {', '.join(_cols)}
        );
        end if;
    end $$;
'''

_util.execute_sql(sql)

def insert_data(df, columns):
    #column setting
    columns = list(columns)
    for idx in range(len(columns)):
        columns[idx] = f'"{columns[idx]}"'

        # if str(columns[idx]).count('세') > 0: columns[idx] = f'"{columns[idx]}"'
        # elif str(columns[idx]).count('year') > 0: columns[idx] = f'"{columns[idx]}"'
        # elif str(columns[idx]).count('행정구역') > 0: columns[idx] = f'"{columns[idx]}"'

    #to use dataframe for clause
    for idx, row in df.iterrows():
        row = row.tolist()
            
        for idx in range(len(columns)):
            if str(row[idx]).count('`') > 0: row[idx] = str(row[idx]).replace('`', '')
            row[idx] = f'\'{str(row[idx])}\''

            if row[idx] == 'None': row[idx] = '\'\''

        sql = f'''
            insert into school(
                {','.join(columns)})
            values(
                {', '.join(row)}
            )
        '''
        _util.execute_sql(sql)

    
insert_data(df, df.columns)





    do $$
        begin
        if exists (select 1 from pg_tables where tablename = 'school') then
        drop table school cascade;
        end if;
        if not exists (select 1 from pg_tables where tablename = 'school') then
        create table school(
            "sido_cd" varchar(2), "sgg_cd" varchar(5), "sgg_nm" varchar(20), "year" varchar(4), "항목" varchar(10), "유치원" float8, "초등학교" float8, "중학교" float8, "고등학교" float8, "특수학교" float8, "계" float8
        );
        end if;
    end $$;



In [1]:
import _config
import pandas as pd
import _util

df = pd.read_csv(_config.school_write_path + '\\' + file_nm + '.csv', encoding=_config.euc_kr)

year = 2022
file_nm = f'school_{year}'
sql = f'''
    select *
    from school
    where "year" = {year}::text
'''
result = _util.execute_sql(sql)
if result is not None:
    _cols = {}
    _col_list = [
        'sido_cd'
        , 'sgg_cd'
        , 'sgg_nm'
        , 'year'
        , '항목'
        , '유치원'
        , '초등학교'
        , '중학교'
        , '고등학교'
        , '특수학교'
        , '계'
    ]
    cols = [col for col in df.columns if col != 'Unnamed: 0']
    for idx in range(len(cols)): _cols[idx] = _col_list[idx]
    df = pd.DataFrame(list(result))
    df.rename(columns=_cols, inplace=True)
    df.to_csv(_config.school_write_path + '\\' + file_nm + '.csv', encoding=_config.euc_kr, index=False)

NameError: name 'file_nm' is not defined