## 공시지가 데이터 전처리

In [None]:
import pandas as pd

In [None]:
# 데이터 불러오기
estate = pd.read_csv('./data/공시지가 관련/국토교통부_표준지공시지가_20240101.csv')

estate.head()

In [None]:
estate.columns

Index(['일련번호', '토지대장번호(PNU)', '시군구', '읍면동리', '지번구분', '본번지', '부번지', '시도명',
       '시군구명', '소재지', '지목', '면적', '용도지역1', '용도지역2', '용도지구1', '계획시설저촉여부',
       '계획시설', '기타제한1', '농지구분', '경지정리', '임야구분', '이용상황', '이용상황_기타', '주위환경',
       '주위환경_기타', '지세', '형상', '방위', '도로거리', '도로교통', '철도거리', '하수거리', '전년지가',
       '공시지가', '실제용도지대', '지리적위치1', '지리적위치2', '특수제한1', '특수제한2', '대규모_사업_방식',
       '대규모_사업_단계', '도로구분', '위험시설접근성', '묘지소재불리여건', '중복지정용도지역'],
      dtype='object')

In [None]:
# 필요한 컬럼 필터링
estate = estate[['시도명','시군구명','공시지가']]

In [None]:
estate['시도명'].unique()

array(['서울특별시', '부산광역시', '대구광역시', '인천광역시', '광주광역시', '대전광역시', '울산광역시',
       '세종특별자치시', '경기도', '충청북도', '충청남도', '전라남도', '경상북도', '경상남도',
       '제주특별자치도', '강원특별자치도', '전북특별자치도'], dtype=object)

**지역 필터링**

In [None]:
# 서울/경기/인천 외 지역 제외
seoul = estate['시도명'] == '서울특별시'
gg = estate['시도명'] =='경기도'
incheon = estate['시도명'] == '인천광역시'
city = seoul| gg | incheon
estate = estate[city]
estate

Unnamed: 0,시도명,시군구명,공시지가
0,서울특별시,종로구,5150000
1,서울특별시,종로구,460200
2,서울특별시,종로구,669100
3,서울특별시,종로구,3987000
4,서울특별시,종로구,3842000
...,...,...,...
181303,경기도,양평군,173800
181304,경기도,양평군,56200
181305,경기도,양평군,209300
181306,경기도,양평군,12200


In [None]:
estate['시군구명'].unique()

array(['종로구', '중구', '용산구', '성동구', '광진구', '동대문구', '중랑구', '성북구', '강북구',
       '도봉구', '노원구', '은평구', '서대문구', '마포구', '양천구', '강서구', '구로구', '금천구',
       '영등포구', '동작구', '관악구', '서초구', '강남구', '송파구', '강동구', '동구', '미추홀구',
       '연수구', '남동구', '부평구', '계양구', '서구', '강화군', '옹진군', '수원장안구', '수원권선구',
       '수원팔달구', '수원영통구', '성남수정구', '성남중원구', '성남분당구', '의정부시', '안양만안구',
       '안양동안구', '부천원미구', '부천소사구', '부천오정구', '광명시', '평택시', '동두천시', '안산상록구',
       '안산단원구', '고양덕양구', '고양일산동구', '고양일산서구', '과천시', '구리시', '남양주시', '오산시',
       '시흥시', '군포시', '의왕시', '하남시', '용인처인구', '용인기흥구', '용인수지구', '파주시',
       '이천시', '안성시', '김포시', '화성시', '광주시', '양주시', '포천시', '여주시', '연천군',
       '가평군', '양평군'], dtype=object)

**평균 공시지가 계산**

In [None]:
g_estate = estate.groupby(by='시군구명', as_index=False)['공시지가'].mean()

In [None]:
g_estate

Unnamed: 0,시군구명,공시지가
0,가평군,1.425912e+05
1,강남구,1.680318e+07
2,강동구,5.667941e+06
3,강북구,3.519811e+06
4,강서구,4.261157e+06
...,...,...
73,파주시,3.356315e+05
74,평택시,6.349830e+05
75,포천시,1.834384e+05
76,하남시,2.540962e+06


**시군구 통합**

시군구2 : 경기/인천 '구' 단위 → '시' 단위 통합

In [None]:
bc = ['부천원미구', '부천소사구', '부천오정구']
sn = ['성남수정구', '성남중원구', '성남분당구']
sw = ['수원장안구', '수원권선구','수원팔달구', '수원영통구']
an = ['안산상록구','안산단원구']
gy = ['고양덕양구', '고양일산동구', '고양일산서구']
yi = ['용인처인구', '용인기흥구', '용인수지구']

####################################################################################################
bc01 = g_estate[g_estate['시군구명'].isin(bc)]

new_row = {
    '시군구명': '부천시',  # 새로운 이름
    '공시지가': g_estate['공시지가'].mean()
}

g_estate = pd.concat([g_estate, pd.DataFrame([new_row])], ignore_index=True)

####################################################################################################
sn01 = g_estate[g_estate['시군구명'].isin(sn)]

new_row1 = {
    '시군구명': '성남시',  # 새로운 이름
    '공시지가': g_estate['공시지가'].mean()
}

g_estate = pd.concat([g_estate, pd.DataFrame([new_row1])], ignore_index=True)

####################################################################################################
sw01 = g_estate[g_estate['시군구명'].isin(sw)]

new_row2 = {
    '시군구명': '수원시',  # 새로운 이름
    '공시지가': g_estate['공시지가'].mean()
}

g_estate = pd.concat([g_estate, pd.DataFrame([new_row2])], ignore_index=True)

####################################################################################################
an01 = g_estate[g_estate['시군구명'].isin(an)]

new_row3 = {
    '시군구명': '안양시',  # 새로운 이름
    '공시지가': g_estate['공시지가'].mean()
}

g_estate = pd.concat([g_estate, pd.DataFrame([new_row3])], ignore_index=True)

####################################################################################################
gy01 = g_estate[g_estate['시군구명'].isin(gy)]

new_row4 = {
    '시군구명': '고양시',  # 새로운 이름
    '공시지가': g_estate['공시지가'].mean()
}

g_estate = pd.concat([g_estate, pd.DataFrame([new_row4])], ignore_index=True)

####################################################################################################
yi01 = g_estate[g_estate['시군구명'].isin(yi)]

new_row5 = {
    '시군구명': '용인시',  # 새로운 이름
    '공시지가': g_estate['공시지가'].mean(),
}

g_estate = pd.concat([g_estate, pd.DataFrame([new_row5])], ignore_index=True)

In [None]:
g_estate

Unnamed: 0,시군구명,공시지가
0,가평군,1.425912e+05
1,강남구,1.680318e+07
2,강동구,5.667941e+06
3,강북구,3.519811e+06
4,강서구,4.261157e+06
...,...,...
79,성남시,2.956037e+06
80,수원시,2.956037e+06
81,안양시,2.956037e+06
82,고양시,2.956037e+06


In [None]:
g_max = g_estate['공시지가'].max()


g_estate['공시지가2'] = g_estate['공시지가']/g_max
g_estate

Unnamed: 0,시군구명,공시지가,공시지가2
0,가평군,1.425912e+05,0.008486
1,강남구,1.680318e+07,1.000000
2,강동구,5.667941e+06,0.337314
3,강북구,3.519811e+06,0.209473
4,강서구,4.261157e+06,0.253592
...,...,...,...
79,성남시,2.956037e+06,0.175921
80,수원시,2.956037e+06,0.175921
81,안양시,2.956037e+06,0.175921
82,고양시,2.956037e+06,0.175921


**데이터 저장**

In [None]:
g_estate.to_csv('공시지가_찐.csv', index=False)