In [18]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np

In [19]:
train = pd.read_csv('train_df_errno.csv')
test = pd.read_csv('test_df.csv')
sub = pd.read_csv('sample_submission.csv')
age = pd.read_csv('age_gender_info.csv')

train.shape, test.shape, sub.shape, age.shape

((2896, 15), (1008, 14), (150, 2), (16, 23))

In [20]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2896 entries, 0 to 2895
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   단지코드        2896 non-null   object 
 1   총세대수        2896 non-null   int64  
 2   임대건물구분      2896 non-null   object 
 3   지역          2896 non-null   object 
 4   공급유형        2896 non-null   object 
 5   전용면적        2896 non-null   float64
 6   전용면적별세대수    2896 non-null   int64  
 7   공가수         2896 non-null   float64
 8   자격유형        2896 non-null   object 
 9   임대보증금       2327 non-null   object 
 10  임대료         2327 non-null   object 
 11  10분내지하철수    2685 non-null   float64
 12  10분내버스정류장수  2892 non-null   float64
 13  단지내주차면수     2896 non-null   float64
 14  등록차량수       2896 non-null   float64
dtypes: float64(6), int64(2), object(7)
memory usage: 339.5+ KB


In [21]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1008 entries, 0 to 1007
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   단지코드        1008 non-null   object 
 1   총세대수        1008 non-null   int64  
 2   임대건물구분      1008 non-null   object 
 3   지역          1008 non-null   object 
 4   공급유형        1008 non-null   object 
 5   전용면적        1008 non-null   float64
 6   전용면적별세대수    1008 non-null   int64  
 7   공가수         1008 non-null   float64
 8   자격유형        1006 non-null   object 
 9   임대보증금       828 non-null    object 
 10  임대료         828 non-null    object 
 11  10분내지하철수    970 non-null    float64
 12  10분내버스정류장수  1008 non-null   float64
 13  단지내주차면수     1008 non-null   float64
dtypes: float64(5), int64(2), object(7)
memory usage: 110.4+ KB


In [22]:
train.isna().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            0
임대보증금         569
임대료           569
10분내지하철수      211
10분내버스정류장수      4
단지내주차면수         0
등록차량수           0
dtype: int64

In [23]:
test.isna().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            2
임대보증금         180
임대료           180
10분내지하철수       38
10분내버스정류장수      0
단지내주차면수         0
dtype: int64

In [24]:
len(sub)

150

In [25]:
len(test['단지코드'].unique())  # 3개 코드 결측값 채워서 제출해야 함

147

### 결측치 처리

#### 데이터 결합

In [26]:
all_df = pd.concat([train, test], join='inner', ignore_index=True)
all_df

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
0,C2515,545,아파트,경상남도,국민임대,33.48,276,17.0,A,9216000,82940,0.0,3.0,624.0
1,C2515,545,아파트,경상남도,국민임대,39.60,60,17.0,A,12672000,107130,0.0,3.0,624.0
2,C2515,545,아파트,경상남도,국민임대,39.60,20,17.0,A,12672000,107130,0.0,3.0,624.0
3,C2515,545,아파트,경상남도,국민임대,46.90,38,17.0,A,18433000,149760,0.0,3.0,624.0
4,C2515,545,아파트,경상남도,국민임대,46.90,19,17.0,A,18433000,149760,0.0,3.0,624.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3899,C1267,675,아파트,경상남도,행복주택,36.77,126,38.0,L,-,-,0.0,1.0,467.0
3900,C2189,382,아파트,전라북도,국민임대,29.19,96,45.0,H,6872000,106400,0.0,2.0,300.0
3901,C2189,382,아파트,전라북도,국민임대,29.19,20,45.0,H,6872000,106400,0.0,2.0,300.0
3902,C2189,382,아파트,전라북도,국민임대,39.45,202,45.0,H,13410000,144600,0.0,2.0,300.0


In [27]:
all_df.isna().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            2
임대보증금         749
임대료           749
10분내지하철수      249
10분내버스정류장수      4
단지내주차면수         0
dtype: int64

#### 자격유형(test) 결측치 처리

In [28]:
all_df['지역'].unique()

array(['경상남도', '대전광역시', '경기도', '전라북도', '강원도', '광주광역시', '충청남도', '부산광역시',
       '제주특별자치도', '울산광역시', '충청북도', '전라남도', '경상북도', '대구광역시', '서울특별시',
       '세종특별자치시'], dtype=object)

In [29]:
all_df.loc[all_df['자격유형'].isnull()]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
3092,C2411,962,아파트,경상남도,국민임대,46.9,240,25.0,,71950000,37470,0.0,2.0,840.0
3154,C2253,1161,아파트,강원도,영구임대,26.37,745,0.0,,2249000,44770,0.0,2.0,173.0


In [30]:
grouped = all_df.groupby(['단지코드', '임대건물구분','지역','공급유형'])  
group1 = grouped.get_group(('C2411', '아파트', '경상남도', '국민임대'))  
group1  # 동일 단지에서 자격유형 결측치를 발견하여 주변 행의 값과 동일한 값으로 채움

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
3089,C2411,962,아파트,경상남도,국민임대,39.43,56,25.0,A,11992000,100720,0.0,2.0,840.0
3090,C2411,962,아파트,경상남도,국민임대,39.72,336,25.0,A,11992000,100720,0.0,2.0,840.0
3091,C2411,962,아파트,경상남도,국민임대,39.82,179,25.0,A,11992000,100720,0.0,2.0,840.0
3092,C2411,962,아파트,경상남도,국민임대,46.9,240,25.0,,71950000,37470,0.0,2.0,840.0
3093,C2411,962,아파트,경상남도,국민임대,51.93,150,25.0,A,21586000,171480,0.0,2.0,840.0


In [31]:
group2 = grouped.get_group(('C2253', '아파트', '강원도', '영구임대'))
group2  # 동일 단지에서 자격유형 결측치를 발견하여 주변 행의 값과 동일한 값으로 채움

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
3154,C2253,1161,아파트,강원도,영구임대,26.37,745,0.0,,2249000,44770,0.0,2.0,173.0
3155,C2253,1161,아파트,강원도,영구임대,31.32,239,0.0,C,3731000,83020,0.0,2.0,173.0
3156,C2253,1161,아파트,강원도,영구임대,31.32,149,0.0,C,3731000,83020,0.0,2.0,173.0


In [32]:
all_df.loc[3092, '자격유형'] = 'A'  # group1에서 확인
all_df.loc[3154, '자격유형'] = 'C'  # group2에서 확인

In [33]:
all_df.loc[all_df['자격유형'].isnull()]  # 자격유형 결측치 처리 확인

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수


#### 자격유형 label encoding

In [34]:
print(all_df.자격유형.unique())

['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O']


In [35]:
mapping = {'A':1, 'B':2, 'C':3, 'D':4, 'E':5, 'F':6, 'G':7, 'H':8, 'I':9, 'J':10, 
          'K':11, 'L':12, 'M':13, 'N':14, 'O':15}
all_df['자격유형'] = all_df['자격유형'].map(mapping).astype(int)
print(all_df.자격유형.unique())

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]


#### 10분내버스정류장수(train) 결측치 처리

In [36]:
all_df.isna().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            0
임대보증금         749
임대료           749
10분내지하철수      249
10분내버스정류장수      4
단지내주차면수         0
dtype: int64

In [37]:
all_df.loc[all_df['10분내버스정류장수'].isnull(), :]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
2293,N2431,1047,아파트,경상남도,공공임대(10년),74.97,80,15.0,1,46000000,456000,,,1066.0
2294,N2431,1047,아파트,경상남도,공공임대(10년),84.95,124,15.0,1,57000000,462000,,,1066.0
2295,N2431,1047,아파트,경상남도,공공임대(10년),84.96,289,15.0,1,57000000,462000,,,1066.0
2296,N2431,1047,아파트,경상남도,공공임대(10년),84.98,82,15.0,1,57000000,462000,,,1066.0


In [38]:
grouped = all_df.groupby(['임대건물구분','지역','공급유형','자격유형'])
group1 = grouped.get_group(('아파트','경상남도','공공임대(10년)',1))
group1

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
2158,C1788,376,아파트,경상남도,공공임대(10년),51.59,116,28.0,1,29000000,340000,0.0,3.0,380.0
2159,C1788,376,아파트,경상남도,공공임대(10년),59.97,260,28.0,1,36000000,390000,0.0,3.0,380.0
2208,C2405,600,아파트,경상남도,공공임대(10년),75.84,48,22.0,1,36019000,603480,0.0,8.0,728.0
2209,C2405,600,아파트,경상남도,공공임대(10년),75.99,54,22.0,1,36113000,603480,0.0,8.0,728.0
2210,C2405,600,아파트,경상남도,공공임대(10년),84.95,182,22.0,1,44868000,636400,0.0,8.0,728.0
2293,N2431,1047,아파트,경상남도,공공임대(10년),74.97,80,15.0,1,46000000,456000,,,1066.0
2294,N2431,1047,아파트,경상남도,공공임대(10년),84.95,124,15.0,1,57000000,462000,,,1066.0
2295,N2431,1047,아파트,경상남도,공공임대(10년),84.96,289,15.0,1,57000000,462000,,,1066.0
2296,N2431,1047,아파트,경상남도,공공임대(10년),84.98,82,15.0,1,57000000,462000,,,1066.0
2498,C1941,404,아파트,경상남도,공공임대(10년),84.94,64,19.0,1,47288000,627080,0.0,3.0,490.0


In [39]:
group1['10분내버스정류장수'].mean()

4.318181818181818

* 데이터 확인 후 임의 처리 -> 4

In [40]:
all_df.loc[all_df['10분내버스정류장수'].isnull(), '10분내버스정류장수'] = 4
all_df.iloc[2293:2297, :]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수
2293,N2431,1047,아파트,경상남도,공공임대(10년),74.97,80,15.0,1,46000000,456000,,4.0,1066.0
2294,N2431,1047,아파트,경상남도,공공임대(10년),84.95,124,15.0,1,57000000,462000,,4.0,1066.0
2295,N2431,1047,아파트,경상남도,공공임대(10년),84.96,289,15.0,1,57000000,462000,,4.0,1066.0
2296,N2431,1047,아파트,경상남도,공공임대(10년),84.98,82,15.0,1,57000000,462000,,4.0,1066.0


In [41]:
all_df.loc[all_df['10분내버스정류장수'].isnull(), :]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수


In [42]:
all_df.isnull().sum()

단지코드            0
총세대수            0
임대건물구분          0
지역              0
공급유형            0
전용면적            0
전용면적별세대수        0
공가수             0
자격유형            0
임대보증금         749
임대료           749
10분내지하철수      249
10분내버스정류장수      0
단지내주차면수         0
dtype: int64

#### 임대건물구분, 지역, 공급유형 label encoding

In [43]:
all_df.임대건물구분.unique()

array(['아파트', '상가'], dtype=object)

In [44]:
all_df.지역.unique()

array(['경상남도', '대전광역시', '경기도', '전라북도', '강원도', '광주광역시', '충청남도', '부산광역시',
       '제주특별자치도', '울산광역시', '충청북도', '전라남도', '경상북도', '대구광역시', '서울특별시',
       '세종특별자치시'], dtype=object)

In [45]:
all_df.공급유형.unique()

array(['국민임대', '공공임대(50년)', '영구임대', '임대상가', '공공임대(10년)', '공공임대(분납)',
       '장기전세', '공공분양', '행복주택', '공공임대(5년)'], dtype=object)

In [46]:
building = {'아파트':1, '상가':2}
region = {'경상남도':1, '대전광역시':2, '경기도':3, '전라북도':4, '강원도':5, '광주광역시':6, 
         '충청남도':7, '부산광역시':8, '제주특별자치도':9, '울산광역시':10, '충청북도': 11, '전라남도': 12,
         '경상북도':13, '대구광역시':14, '서울특별시':15, '세종특별자치시':16}
rent = {'국민임대':1, '공공임대(50년)':2, '영구임대':3, '임대상가':4, '공공임대(10년)':5,
       '공공임대(분납)':6, '장기전세':7, '공공분양':8, '행복주택':9, '공공임대(5년)':10}

all_df['임대건물구분_num'] = all_df['임대건물구분'].map(building)
all_df['지역_num'] = all_df['지역'].map(region)
all_df['공급유형_num'] = all_df['공급유형'].map(rent)
all_df

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num
0,C2515,545,아파트,경상남도,국민임대,33.48,276,17.0,1,9216000,82940,0.0,3.0,624.0,1,1,1
1,C2515,545,아파트,경상남도,국민임대,39.60,60,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1
2,C2515,545,아파트,경상남도,국민임대,39.60,20,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1
3,C2515,545,아파트,경상남도,국민임대,46.90,38,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1
4,C2515,545,아파트,경상남도,국민임대,46.90,19,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3899,C1267,675,아파트,경상남도,행복주택,36.77,126,38.0,12,-,-,0.0,1.0,467.0,1,1,9
3900,C2189,382,아파트,전라북도,국민임대,29.19,96,45.0,8,6872000,106400,0.0,2.0,300.0,1,4,1
3901,C2189,382,아파트,전라북도,국민임대,29.19,20,45.0,8,6872000,106400,0.0,2.0,300.0,1,4,1
3902,C2189,382,아파트,전라북도,국민임대,39.45,202,45.0,8,13410000,144600,0.0,2.0,300.0,1,4,1


In [47]:
all_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3904 entries, 0 to 3903
Data columns (total 17 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   단지코드        3904 non-null   object 
 1   총세대수        3904 non-null   int64  
 2   임대건물구분      3904 non-null   object 
 3   지역          3904 non-null   object 
 4   공급유형        3904 non-null   object 
 5   전용면적        3904 non-null   float64
 6   전용면적별세대수    3904 non-null   int64  
 7   공가수         3904 non-null   float64
 8   자격유형        3904 non-null   int32  
 9   임대보증금       3155 non-null   object 
 10  임대료         3155 non-null   object 
 11  10분내지하철수    3655 non-null   float64
 12  10분내버스정류장수  3904 non-null   float64
 13  단지내주차면수     3904 non-null   float64
 14  임대건물구분_num  3904 non-null   int64  
 15  지역_num      3904 non-null   int64  
 16  공급유형_num    3904 non-null   int64  
dtypes: float64(5), int32(1), int64(5), object(6)
memory usage: 503.4+ KB


(임대보증금, 임대료는 결측치 처리와 숫자형으로 전환한 뒤에...)

In [48]:
all_df['단지코드'] = all_df['단지코드'].astype('category')
all_df['단지코드_num'] = all_df['단지코드'].cat.codes  # 문자열 코드를 숫자로 바꿔주는 방법 중 하나
all_df

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
0,C2515,545,아파트,경상남도,국민임대,33.48,276,17.0,1,9216000,82940,0.0,3.0,624.0,1,1,1,492
1,C2515,545,아파트,경상남도,국민임대,39.60,60,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
2,C2515,545,아파트,경상남도,국민임대,39.60,20,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
3,C2515,545,아파트,경상남도,국민임대,46.90,38,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
4,C2515,545,아파트,경상남도,국민임대,46.90,19,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3899,C1267,675,아파트,경상남도,행복주택,36.77,126,38.0,12,-,-,0.0,1.0,467.0,1,1,9,86
3900,C2189,382,아파트,전라북도,국민임대,29.19,96,45.0,8,6872000,106400,0.0,2.0,300.0,1,4,1,381
3901,C2189,382,아파트,전라북도,국민임대,29.19,20,45.0,8,6872000,106400,0.0,2.0,300.0,1,4,1,381
3902,C2189,382,아파트,전라북도,국민임대,39.45,202,45.0,8,13410000,144600,0.0,2.0,300.0,1,4,1,381


In [49]:
all_df_last = all_df.drop(['임대건물구분','지역','공급유형'], axis=1)
all_df_last

Unnamed: 0,단지코드,총세대수,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
0,C2515,545,33.48,276,17.0,1,9216000,82940,0.0,3.0,624.0,1,1,1,492
1,C2515,545,39.60,60,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
2,C2515,545,39.60,20,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
3,C2515,545,46.90,38,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
4,C2515,545,46.90,19,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3899,C1267,675,36.77,126,38.0,12,-,-,0.0,1.0,467.0,1,1,9,86
3900,C2189,382,29.19,96,45.0,8,6872000,106400,0.0,2.0,300.0,1,4,1,381
3901,C2189,382,29.19,20,45.0,8,6872000,106400,0.0,2.0,300.0,1,4,1,381
3902,C2189,382,39.45,202,45.0,8,13410000,144600,0.0,2.0,300.0,1,4,1,381


In [50]:
train.shape, test.shape

((2896, 15), (1008, 14))

In [51]:
train_df = all_df_last.iloc[0:2896, : ]
test_df = all_df_last.iloc[2896: , : ]
train_df

Unnamed: 0,단지코드,총세대수,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
0,C2515,545,33.48,276,17.0,1,9216000,82940,0.0,3.0,624.0,1,1,1,492
1,C2515,545,39.60,60,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
2,C2515,545,39.60,20,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
3,C2515,545,46.90,38,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
4,C2515,545,46.90,19,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2891,C2532,239,49.20,19,7.0,1,11346000,116090,0.0,1.0,166.0,1,5,1,501
2892,C2532,239,51.08,34,7.0,1,14005000,142310,0.0,1.0,166.0,1,5,1,501
2893,C2532,239,51.73,34,7.0,1,14005000,142310,0.0,1.0,166.0,1,5,1,501
2894,C2532,239,51.96,114,7.0,1,14005000,142310,0.0,1.0,166.0,1,5,1,501


In [52]:
train_df['등록차량수'] = train['등록차량수']
train_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['등록차량수'] = train['등록차량수']


Unnamed: 0,단지코드,총세대수,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num,등록차량수
0,C2515,545,33.48,276,17.0,1,9216000,82940,0.0,3.0,624.0,1,1,1,492,205.0
1,C2515,545,39.60,60,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492,205.0
2,C2515,545,39.60,20,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492,205.0
3,C2515,545,46.90,38,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492,205.0
4,C2515,545,46.90,19,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492,205.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2891,C2532,239,49.20,19,7.0,1,11346000,116090,0.0,1.0,166.0,1,5,1,501,146.0
2892,C2532,239,51.08,34,7.0,1,14005000,142310,0.0,1.0,166.0,1,5,1,501,146.0
2893,C2532,239,51.73,34,7.0,1,14005000,142310,0.0,1.0,166.0,1,5,1,501,146.0
2894,C2532,239,51.96,114,7.0,1,14005000,142310,0.0,1.0,166.0,1,5,1,501,146.0


In [53]:
train_df.corr()

Unnamed: 0,총세대수,전용면적,전용면적별세대수,공가수,자격유형,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num,등록차량수
총세대수,1.0,0.040105,0.098874,0.066243,0.172231,0.211761,-0.00274,0.522629,0.338464,-0.057314,0.127929,-0.018322,0.333513
전용면적,0.040105,1.0,-0.032453,-0.081549,-0.167612,-0.010291,-0.004884,0.06734,0.012461,0.02749,-0.0335,-0.008477,0.112717
전용면적별세대수,0.098874,-0.032453,1.0,0.162061,-0.07555,-0.057606,0.045425,0.283585,-0.37577,0.013067,-0.152652,-0.028514,0.250513
공가수,0.066243,-0.081549,0.162061,1.0,0.210929,-0.094096,0.043966,0.288062,-0.246412,0.141961,0.077623,-0.01873,0.11891
자격유형,0.172231,-0.167612,-0.07555,0.210929,1.0,0.009012,-0.032335,-0.063684,0.166886,-0.061576,0.673264,0.078305,-0.154482
10분내지하철수,0.211761,-0.010291,-0.057606,-0.094096,0.009012,1.0,0.097503,-0.053185,0.214795,-0.025924,0.03816,-0.075952,-0.107308
10분내버스정류장수,-0.00274,-0.004884,0.045425,0.043966,-0.032335,0.097503,1.0,0.073123,-0.011631,-0.133163,-0.040865,-0.133334,0.104203
단지내주차면수,0.522629,0.06734,0.283585,0.288062,-0.063684,-0.053185,0.073123,1.0,-0.40949,0.007536,-0.141016,-0.071937,0.861338
임대건물구분_num,0.338464,0.012461,-0.37577,-0.246412,0.166886,0.214795,-0.011631,-0.40949,1.0,-0.08666,0.3019,0.007605,-0.44913
지역_num,-0.057314,0.02749,0.013067,0.141961,-0.061576,-0.025924,-0.133163,0.007536,-0.08666,1.0,0.078985,-0.006771,0.060674


### train set model selection

In [54]:
train_df.columns

Index(['단지코드', '총세대수', '전용면적', '전용면적별세대수', '공가수', '자격유형', '임대보증금', '임대료',
       '10분내지하철수', '10분내버스정류장수', '단지내주차면수', '임대건물구분_num', '지역_num', '공급유형_num',
       '단지코드_num', '등록차량수'],
      dtype='object')

In [55]:
from sklearn.model_selection import train_test_split
sel = ['총세대수', '전용면적', '전용면적별세대수', '공가수', '자격유형', '10분내버스정류장수', 
       '단지내주차면수', '임대건물구분_num', '지역_num', '공급유형_num', '단지코드_num']
X = train_df[sel]
y = train_df['등록차량수']
test_X = test_df[sel]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [56]:
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

#### Linear Regression

In [57]:
model = LinearRegression()
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('train score : ', model.score(X_train, y_train))
print('test score : ', model.score(X_test, y_test))  # 결정계수

train score :  0.79448922010556
test score :  0.8035633714266129


* test size 찾기

In [58]:
for one in [0.1, 0.2, 0.3, 0.4]:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=one, random_state=0)
    model = LinearRegression()
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    
    print(f'test_size : {one}')
    print('train score : ', model.score(X_train, y_train))
    print('test score : ', model.score(X_test, y_test))
    print()

test_size : 0.1
train score :  0.7942917986012363
test score :  0.8244621707079368

test_size : 0.2
train score :  0.7938317149166443
test score :  0.8099354981036184

test_size : 0.3
train score :  0.7957803529154193
test score :  0.798817806904653

test_size : 0.4
train score :  0.7950689313029522
test score :  0.7981562587395765



In [59]:
mae_val = np.mean(abs(y_test - pred))
print(mae_val)
mse_val = np.mean((y_test - pred)**2)
print(mse_val)
rmse_val = mse_val ** 0.5
print(rmse_val)

139.21238650971497
38312.78335682394
195.73651513405449


#### RandomForest(Regression)

In [60]:
model = RandomForestRegressor(n_jobs=-1)
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('train score : ', model.score(X_train, y_train))
print('test score : ', model.score(X_test, y_test))

train score :  0.9978715039668012
test score :  0.9889502767060081


In [61]:
mae_val = np.mean(abs(y_test - pred))
print(mae_val)
mse_val = np.mean((y_test - pred)**2)
print(mse_val)
rmse_val = mse_val ** 0.5
print(rmse_val)

20.775375323554787
2097.393023295944
45.79730366840327


In [62]:
model = RandomForestRegressor(n_jobs=-1)
model.fit(X_train, y_train)
pred = model.predict(test_X)
pred[0:10]

array([ 675.53,  708.91,  675.29,  675.29,  682.77,  682.77,  685.96,
        692.42, 1434.73, 1434.16])

In [63]:
test_df['등록차량수'] = pred
test_df['단지별차량수평균'] = test_df.groupby('단지코드')['등록차량수'].transform(np.mean)
test_new = test_df.drop_duplicates(['단지코드'], keep='first').reset_index()
test_new

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['등록차량수'] = pred
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['단지별차량수평균'] = test_df.groupby('단지코드')['등록차량수'].transform(np.mean)


Unnamed: 0,index,단지코드,총세대수,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num,등록차량수,단지별차량수평균
0,2896,C1072,754,39.79,116,14.0,8,22830000,189840,0.0,2.0,683.0,1,3,1,32,675.53,684.867500
1,2904,C1128,1354,39.79,368,9.0,8,22830000,189840,0.0,3.0,1216.0,1,3,1,43,1434.73,1426.236667
2,2913,C1456,619,33.40,82,18.0,1,19706000,156200,0.0,16.0,547.0,1,8,1,143,606.66,627.266667
3,2922,C1840,593,39.57,253,7.0,1,14418000,108130,0.0,3.0,543.0,1,4,1,263,577.02,584.757500
4,2926,C1332,1297,39.99,282,11.0,8,28598000,203050,0.0,2.0,1112.0,1,3,1,110,1052.75,1056.036250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,3878,C2456,349,26.44,24,17.0,8,6992000,117000,0.0,4.0,270.0,1,9,1,471,220.79,221.575000
143,3882,C1266,596,26.94,164,35.0,8,8084000,149910,0.0,1.0,593.0,1,11,1,85,535.21,538.974000
144,3887,C2152,120,24.83,66,9.0,3,-,-,0.0,1.0,40.0,1,5,3,369,45.92,46.120000
145,3889,C1267,675,24.87,28,38.0,8,6882000,104370,0.0,1.0,467.0,1,1,1,86,406.17,406.525455


#### 전처리 과정에서 삭제했던 행 결합(제출 양식에 맞추기 위함)

In [64]:
add_dat = {'code': ['C2675', 'C2335', 'C1327'],
          'num': ['0', '0', '0']}
add_df = pd.DataFrame(add_dat)
add_df

Unnamed: 0,code,num
0,C2675,0
1,C2335,0
2,C1327,0


In [65]:
sub_df = test_new[['단지코드', '단지별차량수평균']]
sub_df.columns = ['code','num']
sub_df = pd.concat([sub_df, add_df]).reset_index()
sub_df = sub_df.drop(['index'], axis=1)
sub_df

Unnamed: 0,code,num
0,C1072,684.867
1,C1128,1426.24
2,C1456,627.267
3,C1840,584.757
4,C1332,1056.04
...,...,...
145,C1267,406.525
146,C2189,291.058
147,C2675,0
148,C2335,0


In [66]:
sub_df.to_csv('20210720_rf.csv', index=False)
sub_df.head()

Unnamed: 0,code,num
0,C1072,684.867
1,C1128,1426.24
2,C1456,627.267
3,C1840,584.757
4,C1332,1056.04


* score : 128.3086113594 (rank 369, 2021-07-20 07:40)

In [67]:
pd.set_option('display.max_rows', 1000)  # 중간 생략 없이 1000개 행은 다 보여준다.

In [68]:
all_df.loc[all_df['10분내지하철수'].isnull()]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
86,C1312,518,아파트,충청남도,국민임대,39.72,60,12.0,1,17460000.0,122210.0,,3.0,527.0,1,7,1,101
87,C1312,518,아파트,충청남도,국민임대,39.98,89,12.0,1,17460000.0,122210.0,,3.0,527.0,1,7,1,101
88,C1312,518,아파트,충청남도,국민임대,41.55,225,12.0,1,19954000.0,130940.0,,3.0,527.0,1,7,1,101
89,C1312,518,아파트,충청남도,국민임대,46.9,143,12.0,1,28687000.0,149660.0,,3.0,527.0,1,7,1,101
90,C1874,619,아파트,충청남도,영구임대,26.37,294,2.0,3,3141000.0,69900.0,,2.0,97.0,1,7,3,278
91,C1874,619,아파트,충청남도,영구임대,26.37,149,2.0,3,3141000.0,69900.0,,2.0,97.0,1,7,3,278
92,C1874,619,아파트,충청남도,영구임대,31.32,149,2.0,3,3731000.0,83020.0,,2.0,97.0,1,7,3,278
93,C1874,619,상가,충청남도,임대상가,12.62,1,2.0,4,,,,2.0,97.0,2,7,4,278
94,C1874,619,상가,충청남도,임대상가,17.4,1,2.0,4,,,,2.0,97.0,2,7,4,278
95,C1874,619,상가,충청남도,임대상가,17.4,1,2.0,4,,,,2.0,97.0,2,7,4,278


In [69]:
grouped = all_df.groupby(['임대건물구분','지역'])
group3 = grouped.get_group(('아파트', '경상남도'))
group3

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
0,C2515,545,아파트,경상남도,국민임대,33.48,276,17.0,1,9216000,82940,0.0,3.0,624.0,1,1,1,492
1,C2515,545,아파트,경상남도,국민임대,39.6,60,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
2,C2515,545,아파트,경상남도,국민임대,39.6,20,17.0,1,12672000,107130,0.0,3.0,624.0,1,1,1,492
3,C2515,545,아파트,경상남도,국민임대,46.9,38,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
4,C2515,545,아파트,경상남도,국민임대,46.9,19,17.0,1,18433000,149760,0.0,3.0,624.0,1,1,1,492
5,C2515,545,아파트,경상남도,국민임대,51.97,106,17.0,1,23042000,190090,0.0,3.0,624.0,1,1,1,492
6,C2515,545,아파트,경상남도,국민임대,51.97,26,17.0,1,23042000,190090,0.0,3.0,624.0,1,1,1,492
84,C2576,405,아파트,경상남도,국민임대,46.9,313,19.0,1,19436000,161780,0.0,4.0,296.0,1,1,1,520
85,C2576,405,아파트,경상남도,국민임대,59.88,92,19.0,1,28252000,235420,0.0,4.0,296.0,1,1,1,520
123,C2416,560,아파트,경상남도,영구임대,26.34,360,0.0,3,3138000,69820,0.0,5.0,154.0,1,1,3,458


In [70]:
grouped = all_df.groupby(['임대건물구분','지역'])
group4 = grouped.get_group(('아파트', '경기도'))
group4

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
18,C1945,755,아파트,경기도,국민임대,39.72,120,6.0,2,15607000,130070,1.0,3.0,734.0,1,3,1,301
19,C1945,755,아파트,경기도,국민임대,39.72,120,6.0,2,15607000,130070,1.0,3.0,734.0,1,3,1,301
20,C1945,755,아파트,경기도,국민임대,51.93,207,6.0,2,23139000,193230,1.0,3.0,734.0,1,3,1,301
21,C1945,755,아파트,경기도,국민임대,51.93,96,6.0,2,23139000,193230,1.0,3.0,734.0,1,3,1,301
22,C1945,755,아파트,경기도,국민임대,59.88,160,6.0,2,29209000,244020,1.0,3.0,734.0,1,3,1,301
23,C1945,755,아파트,경기도,국민임대,59.88,52,6.0,2,29209000,244020,1.0,3.0,734.0,1,3,1,301
34,C1244,1722,아파트,경기도,국민임대,39.62,275,10.0,1,29551000,203030,0.0,10.0,1483.0,1,3,1,80
35,C1244,1722,아파트,경기도,국민임대,39.62,126,10.0,1,29551000,203030,0.0,10.0,1483.0,1,3,1,80
36,C1244,1722,아파트,경기도,국민임대,39.62,168,10.0,1,29551000,203030,0.0,10.0,1483.0,1,3,1,80
37,C1244,1722,아파트,경기도,국민임대,39.72,219,10.0,1,29551000,203030,0.0,10.0,1483.0,1,3,1,80


In [71]:
grouped = all_df.groupby(['임대건물구분','지역'])
group5 = grouped.get_group(('아파트', '대전광역시'))
group5

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
7,C1407,1216,아파트,대전광역시,국민임대,30.95,288,13.0,1,15620000.0,127350.0,1.0,1.0,1285.0,1,2,1,131
8,C1407,1216,아파트,대전광역시,국민임대,30.99,68,13.0,1,15620000.0,127350.0,1.0,1.0,1285.0,1,2,1,131
9,C1407,1216,아파트,대전광역시,국민임대,30.99,34,13.0,1,15620000.0,127350.0,1.0,1.0,1285.0,1,2,1,131
10,C1407,1216,아파트,대전광역시,국민임대,41.11,148,13.0,1,27637000.0,164610.0,1.0,1.0,1285.0,1,2,1,131
11,C1407,1216,아파트,대전광역시,국민임대,41.39,74,13.0,1,27637000.0,164610.0,1.0,1.0,1285.0,1,2,1,131
12,C1407,1216,아파트,대전광역시,국민임대,41.58,70,13.0,1,27637000.0,164610.0,1.0,1.0,1285.0,1,2,1,131
13,C1407,1216,아파트,대전광역시,국민임대,41.58,70,13.0,1,27637000.0,164610.0,1.0,1.0,1285.0,1,2,1,131
14,C1407,1216,아파트,대전광역시,국민임대,46.36,170,13.0,1,36048000.0,198250.0,1.0,1.0,1285.0,1,2,1,131
15,C1407,1216,아파트,대전광역시,국민임대,46.36,170,13.0,1,36048000.0,198250.0,1.0,1.0,1285.0,1,2,1,131
16,C1407,1216,아파트,대전광역시,국민임대,51.24,62,13.0,1,42056000.0,253520.0,1.0,1.0,1285.0,1,2,1,131


In [72]:
grouped = all_df.groupby(['임대건물구분','지역'])
group6 = grouped.get_group(('아파트', '충청남도'))
group6

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
86,C1312,518,아파트,충청남도,국민임대,39.72,60,12.0,1,17460000,122210,,3.0,527.0,1,7,1,101
87,C1312,518,아파트,충청남도,국민임대,39.98,89,12.0,1,17460000,122210,,3.0,527.0,1,7,1,101
88,C1312,518,아파트,충청남도,국민임대,41.55,225,12.0,1,19954000,130940,,3.0,527.0,1,7,1,101
89,C1312,518,아파트,충청남도,국민임대,46.9,143,12.0,1,28687000,149660,,3.0,527.0,1,7,1,101
90,C1874,619,아파트,충청남도,영구임대,26.37,294,2.0,3,3141000,69900,,2.0,97.0,1,7,3,278
91,C1874,619,아파트,충청남도,영구임대,26.37,149,2.0,3,3141000,69900,,2.0,97.0,1,7,3,278
92,C1874,619,아파트,충청남도,영구임대,31.32,149,2.0,3,3731000,83020,,2.0,97.0,1,7,3,278
339,C1068,806,아파트,충청남도,국민임대,36.65,200,11.0,1,11234000,106090,,2.0,804.0,1,7,1,30
340,C1068,806,아파트,충청남도,국민임대,36.98,130,11.0,1,11234000,106090,,2.0,804.0,1,7,1,30
341,C1068,806,아파트,충청남도,국민임대,41.55,252,11.0,1,14981000,124830,,2.0,804.0,1,7,1,30


In [73]:
grouped = all_df.groupby(['임대건물구분','지역'])
group7 = grouped.get_group(('아파트', '경상북도'))
group7

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
867,C2371,474,아파트,경상북도,국민임대,36.52,42,20.0,1,9731000,72080,0.0,1.0,374.0,1,13,1,440
868,C2371,474,아파트,경상북도,국민임대,36.52,134,20.0,1,9731000,72080,0.0,1.0,374.0,1,13,1,440
869,C2371,474,아파트,경상북도,국민임대,36.8,88,20.0,1,9731000,72080,0.0,1.0,374.0,1,13,1,440
870,C2371,474,아파트,경상북도,국민임대,46.9,22,20.0,1,17302000,108130,0.0,1.0,374.0,1,13,1,440
871,C2371,474,아파트,경상북도,국민임대,46.9,96,20.0,1,17302000,108130,0.0,1.0,374.0,1,13,1,440
872,C2371,474,아파트,경상북도,국민임대,46.94,46,20.0,1,17302000,108130,0.0,1.0,374.0,1,13,1,440
873,C2371,474,아파트,경상북도,국민임대,51.72,24,20.0,1,21868000,123750,0.0,1.0,374.0,1,13,1,440
874,C2371,474,아파트,경상북도,국민임대,51.93,22,20.0,1,21868000,123750,0.0,1.0,374.0,1,13,1,440
947,C2097,998,아파트,경상북도,국민임대,33.8,184,29.0,1,13158000,90020,0.0,2.0,1240.0,1,13,1,350
948,C2097,998,아파트,경상북도,국민임대,39.9,121,29.0,1,16737000,110790,0.0,2.0,1240.0,1,13,1,350


In [74]:
train[train['단지코드']== 'N2431']

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,등록차량수
2293,N2431,1047,아파트,경상남도,공공임대(10년),74.97,80,15.0,A,46000000,456000,,,1066.0,1214.0
2294,N2431,1047,아파트,경상남도,공공임대(10년),84.95,124,15.0,A,57000000,462000,,,1066.0,1214.0
2295,N2431,1047,아파트,경상남도,공공임대(10년),84.96,289,15.0,A,57000000,462000,,,1066.0,1214.0
2296,N2431,1047,아파트,경상남도,공공임대(10년),84.98,82,15.0,A,57000000,462000,,,1066.0,1214.0
2350,N2431,1047,아파트,경상남도,국민임대,36.77,272,16.0,A,11217000,233330,0.0,2.0,1066.0,1214.0
2351,N2431,1047,아파트,경상남도,국민임대,46.78,200,16.0,A,24389000,303220,0.0,2.0,1066.0,1214.0


In [75]:
all_df[all_df['임대보증금'].isnull()]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
80,C1925,601,상가,강원도,임대상가,32.1,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
81,C1925,601,상가,강원도,임대상가,32.1,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
82,C1925,601,상가,강원도,임대상가,32.1,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
83,C1925,601,상가,강원도,임대상가,72.16,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
93,C1874,619,상가,충청남도,임대상가,12.62,1,2.0,4,,,,2.0,97.0,2,7,4,278
94,C1874,619,상가,충청남도,임대상가,17.4,1,2.0,4,,,,2.0,97.0,2,7,4,278
95,C1874,619,상가,충청남도,임대상가,17.4,1,2.0,4,,,,2.0,97.0,2,7,4,278
96,C1874,619,상가,충청남도,임대상가,22.89,1,2.0,4,,,,2.0,97.0,2,7,4,278
97,C1874,619,상가,충청남도,임대상가,23.13,1,2.0,4,,,,2.0,97.0,2,7,4,278
98,C1874,619,상가,충청남도,임대상가,23.13,1,2.0,4,,,,2.0,97.0,2,7,4,278


In [84]:
all_df[all_df['임대료'].isnull()]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,10분내지하철수,10분내버스정류장수,단지내주차면수,임대건물구분_num,지역_num,공급유형_num,단지코드_num
80,C1925,601,상가,강원도,임대상가,32.1,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
81,C1925,601,상가,강원도,임대상가,32.1,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
82,C1925,601,상가,강원도,임대상가,32.1,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
83,C1925,601,상가,강원도,임대상가,72.16,1,9.0,4,,,0.0,4.0,117.0,2,5,4,294
93,C1874,619,상가,충청남도,임대상가,12.62,1,2.0,4,,,,2.0,97.0,2,7,4,278
94,C1874,619,상가,충청남도,임대상가,17.4,1,2.0,4,,,,2.0,97.0,2,7,4,278
95,C1874,619,상가,충청남도,임대상가,17.4,1,2.0,4,,,,2.0,97.0,2,7,4,278
96,C1874,619,상가,충청남도,임대상가,22.89,1,2.0,4,,,,2.0,97.0,2,7,4,278
97,C1874,619,상가,충청남도,임대상가,23.13,1,2.0,4,,,,2.0,97.0,2,7,4,278
98,C1874,619,상가,충청남도,임대상가,23.13,1,2.0,4,,,,2.0,97.0,2,7,4,278
