In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import mean_squared_error

In [2]:
# 导入数据
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')
train.head()

Unnamed: 0,时间,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,房屋朝向,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况,月租金
0,1,3072,0.128906,2,0.236364,0.008628,东南,,1,1,1,,11.0,118.0,2.0,40.0,0.764167,,5.602716
1,1,3152,0.132812,1,0.381818,0.017046,东,,1,0,0,,10.0,100.0,4.0,58.0,0.709167,,16.977929
2,1,5575,0.042969,0,0.290909,0.010593,东南,,2,1,2,,12.0,130.0,5.0,37.0,0.5725,,8.998302
3,1,3103,0.085938,2,0.581818,0.019199,南,,3,2,2,,7.0,90.0,2.0,63.0,0.658333,,5.602716
4,1,5182,0.214844,0,0.545455,0.010427,东北,,2,1,1,,3.0,31.0,,,,,7.300509


In [3]:
print(train.shape)

(196539, 19)


In [4]:
train.describe()

Unnamed: 0,时间,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况,月租金
count,196539.0,196539.0,195538.0,196539.0,196539.0,196539.0,20138.0,196539.0,196539.0,196539.0,24230.0,196508.0,196508.0,91778.0,91778.0,91778.0,18492.0,196539.0
mean,2.115229,3224.116562,0.124151,0.955449,0.408711,0.013139,2.725196,2.236635,1.299625,1.223818,0.900289,7.905139,67.945982,3.28485,57.493735,0.551202,3.589228,7.949313
std,0.78698,2023.073726,0.133299,0.851511,0.1831,0.008104,0.667763,0.896961,0.613169,0.487234,0.299621,4.025696,43.522394,1.477147,35.191414,0.247268,1.996912,6.310609
min,1.0,0.0,0.007812,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.001667,1.0,0.0
25%,1.0,1388.0,0.039062,0.0,0.290909,0.009268,3.0,2.0,1.0,1.0,1.0,4.0,33.0,2.0,23.0,0.356667,2.0,4.923599
50%,2.0,3086.0,0.082031,1.0,0.418182,0.01291,3.0,2.0,1.0,1.0,1.0,9.0,61.0,4.0,59.0,0.554167,2.0,6.621392
75%,3.0,5199.0,0.160156,2.0,0.563636,0.014896,3.0,3.0,2.0,1.0,1.0,11.0,103.0,5.0,87.0,0.745833,6.0,8.998302
max,3.0,6627.0,1.0,2.0,1.0,1.0,3.0,11.0,8.0,8.0,1.0,14.0,152.0,5.0,119.0,1.0,6.0,100.0


In [5]:
# 查看每列的数据类型
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196539 entries, 0 to 196538
Data columns (total 19 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   时间        196539 non-null  int64  
 1   小区名       196539 non-null  int64  
 2   小区房屋出租数量  195538 non-null  float64
 3   楼层        196539 non-null  int64  
 4   总楼层       196539 non-null  float64
 5   房屋面积      196539 non-null  float64
 6   房屋朝向      196539 non-null  object 
 7   居住状态      20138 non-null   float64
 8   卧室数量      196539 non-null  int64  
 9   厅的数量      196539 non-null  int64  
 10  卫的数量      196539 non-null  int64  
 11  出租方式      24230 non-null   float64
 12  区         196508 non-null  float64
 13  位置        196508 non-null  float64
 14  地铁线路      91778 non-null   float64
 15  地铁站点      91778 non-null   float64
 16  距离        91778 non-null   float64
 17  装修情况      18492 non-null   float64
 18  月租金       196539 non-null  float64
dtypes: float64(12), int64(6), object(1)
memory u

### 将特征值分类了
* 房屋朝向
* 小区名和小区房屋出租数量
* 楼层、总楼层、房屋面积
* 卧室数量、厅的数量、卫的数量
* 区、位置
* 地铁线路、地铁站点、距离
* 装修情况
* 居住状态
* 出租方式

In [6]:
# 将房屋朝向这一列进行编码，将其转换为数字
## 统计房屋朝向的种类
print(train['房屋朝向'].value_counts())
## 将不同的房屋朝向进行编码，整理成字典的形式，健为房屋的具体朝向，值为对应的数字
orientation_dict = {}
for orientation in train['房屋朝向'].unique():
    orientation_dict[orientation] = list(train['房屋朝向'].unique()).index(orientation)
print(len(orientation_dict))
## 根据字典，将房屋朝向这一列进行编码
train['房屋朝向'] = train['房屋朝向'].map(orientation_dict)
## 判断test的房屋朝向是否在train中出现过
for orientation in test['房屋朝向'].unique():
    if orientation not in orientation_dict:
        orientation_dict[orientation] = max(orientation_dict.values()) + 1
## 根据字典，将房屋朝向这一列进行编码
test['房屋朝向'] = test['房屋朝向'].map(orientation_dict)
print(len(orientation_dict))
### 比之前多了两个数字

南            54770
东南           54359
东            31962
西南           17470
北            10428
             ...  
东南 西 北           1
北 西              1
东南 南 西南 西        1
西南 西 东北          1
南 西南 西 西北        1
Name: 房屋朝向, Length: 64, dtype: int64
64
66


In [7]:
# 判断相同的'小区名'对应的'小区房屋出租数量'是否相同
train_1 = train[['小区名','小区房屋出租数量']]
train_1.dropna(inplace=True)
train_1['test'] = train_1.groupby('小区名')['小区房屋出租数量'].transform('sum').duplicated(keep=False)
print(train_1['test'].unique())
test_1 = test[['小区名','小区房屋出租数量']]
test_1.dropna(inplace=True)
test_1['test'] = test_1.groupby('小区名')['小区房屋出租数量'].transform('sum').duplicated(keep=False)
print(test_1['test'].unique())

[ True]
[ True]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['test'] = train_1.groupby('小区名')['小区房屋出租数量'].transform('sum').duplicated(keep=False)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_1.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https:/

In [8]:
# 进行部分缺失值的填充
## 将时间这一列删除
train.drop('时间', axis=1, inplace=True)
test.drop('时间', axis=1, inplace=True)
## 填充小区房屋出租数量这一列
train['小区房屋出租数量'] = train.groupby('小区名')['小区房屋出租数量'].transform(lambda x: x.fillna(x.median()))
test['小区房屋出租数量'] = test.groupby('小区名')['小区房屋出租数量'].transform(lambda x: x.fillna(x.median()))

In [9]:
## 判断小区名一致，区是否一致
train_1 = train[['小区名','区']]
train_1.dropna(inplace=True)
train_1['test'] = train_1.groupby('小区名')['区'].transform('sum').duplicated(keep=False)
print(train_1['test'].unique())
test_1 = test[['小区名','区']]
test_1.dropna(inplace=True)
test_1['test'] = test_1.groupby('小区名')['区'].transform('sum').duplicated(keep=False)
print(test_1['test'].unique())

[ True]
[ True]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['test'] = train_1.groupby('小区名')['区'].transform('sum').duplicated(keep=False)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_1.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas

In [10]:
## 用每个区的月租金的平均数的rank值来填充缺失值
train_1 = train[['区','月租金']]
# 使用-1代替缺失值
train_1.fillna(-1, inplace=True)
train_1['区均值'] = train_1.groupby('区')['月租金'].transform('mean')
train_1['区rank'] = train_1['区均值'].rank(method='dense').astype(int)
district_dict = dict(zip(train_1['区'], train_1['区rank']))
# 使用-1代替tran区列中的缺失值
train['区'] = train['区'].fillna(-1)
train['区'] = train['区'].map(district_dict)
# 判断区这一列是否还有缺失值
print(train['区'].isnull().sum())
# 使用-1代替test区列中的缺失值
test['区'] = test['区'].fillna(-1)
test['区'] = test['区'].map(district_dict)

0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['区均值'] = train_1.groupby('区')['月租金'].transform('mean')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['区rank'] = train_1['区均值'].rank(method='dense').astype(int)


In [11]:
## 用每个位置的月租金的平均数的rank值来填充缺失值
train_1 = train[['位置','月租金']]
# 使用-1000000代替缺失值
train_1.fillna(-1000000, inplace=True)
train_1['位置均值'] = train_1.groupby('位置')['月租金'].transform('mean')
train_1['位置rank'] = train_1['位置均值'].rank(method='dense').astype(int)
location_dict = dict(zip(train_1['位置'], train_1['位置rank']))
# 使用-1000000代替tran位置列中的缺失值
train['位置'] = train['位置'].fillna(-1000000)
train['位置'] = train['位置'].map(location_dict)
# 判断位置这一列是否还有缺失值
print(train['位置'].isnull().sum())
# 使用-1000000代替test位置列中的缺失值
test['位置'] = test['位置'].fillna(-1000000)
test['位置'] = test['位置'].map(location_dict)

0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['位置均值'] = train_1.groupby('位置')['月租金'].transform('mean')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['位置rank'] = train_1['位置均值'].rank(method='dense').astype(int)


In [14]:
## 地铁线路、地铁站点、距离
### 相同方法
train_1 = train[['地铁线路','月租金']]
train_1.fillna(-1, inplace=True)
train_1['地铁线路均值'] = train_1.groupby('地铁线路')['月租金'].transform('mean')
train_1['地铁线路rank'] = train_1['地铁线路均值'].rank(method='dense').astype(int)
subway_line_dict = dict(zip(train_1['地铁线路'], train_1['地铁线路rank']))
train['地铁线路'] = train['地铁线路'].fillna(-1)
train['地铁线路'] = train['地铁线路'].map(subway_line_dict)
print(train['地铁线路'].isnull().sum())
test['地铁线路'] = test['地铁线路'].fillna(-1)
test['地铁线路'] = test['地铁线路'].map(subway_line_dict)

0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['地铁线路均值'] = train_1.groupby('地铁线路')['月租金'].transform('mean')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['地铁线路rank'] = train_1['地铁线路均值'].rank(method='dense').astype(int)


In [15]:
# 地铁站点
train_1 = train[['地铁站点','月租金']]
train_1.fillna(-100000, inplace=True)
train_1['地铁站点均值'] = train_1.groupby('地铁站点')['月租金'].transform('mean')
train_1['地铁站点rank'] = train_1['地铁站点均值'].rank(method='dense').astype(int)
subway_station_dict = dict(zip(train_1['地铁站点'], train_1['地铁站点rank']))
train['地铁站点'] = train['地铁站点'].fillna(-100000)
train['地铁站点'] = train['地铁站点'].map(subway_station_dict)
print(train['地铁站点'].isnull().sum())
test['地铁站点'] = test['地铁站点'].fillna(-100000)
test['地铁站点'] = test['地铁站点'].map(subway_station_dict)

0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['地铁站点均值'] = train_1.groupby('地铁站点')['月租金'].transform('mean')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_1['地铁站点rank'] = train_1['地铁站点均值'].rank(method='dense').astype(int)


In [22]:
# 距离
## 使用距离的均值来填充缺失值
train['距离'].fillna(train['距离'].mean(), inplace=True)
# 将小区名和距离整理成字典的形式，健为小区名，值为距离的均值
distance_dict = dict(train.groupby('小区名')['距离'].mean())
# 判断test的小区名是否在train中出现过
for community in test['小区名'].unique():
    if community not in distance_dict:
        distance_dict[community] = train['距离'].mean()
# 根据字典，将距离这一列进行填充
test['距离'] = test['距离'].fillna(test['小区名'].map(distance_dict))
train.head()

Unnamed: 0,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,房屋朝向,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况,月租金
0,3072,0.128906,2,0.236364,0.008628,0,,1,1,1,,11,83,2,103,0.764167,,5.602716
1,3152,0.132812,1,0.381818,0.017046,1,,1,0,0,,13,125,5,117,0.709167,,16.977929
2,5575,0.042969,0,0.290909,0.010593,0,,2,1,2,,15,95,6,99,0.5725,,8.998302
3,3103,0.085938,2,0.581818,0.019199,2,,3,2,2,,5,17,2,10,0.658333,,5.602716
4,5182,0.214844,0,0.545455,0.010427,3,,2,1,1,,9,71,1,57,0.551202,,7.300509


In [23]:
## 居住状态
### 查看居住状态的种类
print(train['居住状态'].value_counts())
### 将缺失值替换成0
train['居住状态'].fillna(0, inplace=True)
test['居住状态'].fillna(0, inplace=True)
print(train['居住状态'].value_counts())

3.0    17087
1.0     2483
2.0      568
Name: 居住状态, dtype: int64
0.0    176401
3.0     17087
1.0      2483
2.0       568
Name: 居住状态, dtype: int64


In [24]:
## 出租方式
### 查看出租方式的种类
print(train['出租方式'].value_counts())
### 将缺失值替换成2
train['出租方式'].fillna(2, inplace=True)
test['出租方式'].fillna(2, inplace=True)
print(train['出租方式'].value_counts())

1.0    21814
0.0     2416
Name: 出租方式, dtype: int64
2.0    172309
1.0     21814
0.0      2416
Name: 出租方式, dtype: int64


In [25]:
## 装修情况
### 查看装修情况的种类
print(train['装修情况'].value_counts())
### 将缺失值替换成0
train['装修情况'].fillna(0, inplace=True)
test['装修情况'].fillna(0, inplace=True)
print(train['装修情况'].value_counts())

2.0    9265
6.0    7293
1.0    1154
4.0     542
3.0     214
5.0      24
Name: 装修情况, dtype: int64
0.0    178047
2.0      9265
6.0      7293
1.0      1154
4.0       542
3.0       214
5.0        24
Name: 装修情况, dtype: int64


In [26]:
train.head()

Unnamed: 0,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,房屋朝向,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况,月租金
0,3072,0.128906,2,0.236364,0.008628,0,0.0,1,1,1,2.0,11,83,2,103,0.764167,0.0,5.602716
1,3152,0.132812,1,0.381818,0.017046,1,0.0,1,0,0,2.0,13,125,5,117,0.709167,0.0,16.977929
2,5575,0.042969,0,0.290909,0.010593,0,0.0,2,1,2,2.0,15,95,6,99,0.5725,0.0,8.998302
3,3103,0.085938,2,0.581818,0.019199,2,0.0,3,2,2,2.0,5,17,2,10,0.658333,0.0,5.602716
4,5182,0.214844,0,0.545455,0.010427,3,0.0,2,1,1,2.0,9,71,1,57,0.551202,0.0,7.300509


In [27]:
test.head()

Unnamed: 0,id,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,房屋朝向,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况
0,1,6011,0.382812,1,0.6,0.007117,1,3.0,2,1,1,1.0,13,43,1,57,0.551202,6.0
1,2,1697,0.152344,1,0.472727,0.007448,1,0.0,2,1,1,2.0,9,99,1,57,0.551202,0.0
2,3,754,0.207031,2,0.709091,0.014068,0,0.0,3,2,2,2.0,13,150,5,108,0.400833,0.0
3,4,1285,0.011719,0,0.090909,0.008937,2,0.0,2,1,1,2.0,14,128,6,107,0.384167,0.0
4,5,4984,0.035156,1,0.218182,0.008606,0,0.0,2,1,1,2.0,14,84,3,75,0.598333,0.0


In [29]:
# 整理
test = test.sort_values(by=['id'], ascending=(True))
# 将id列作为索引
test.set_index('id', inplace=True)
test.head()

Unnamed: 0_level_0,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,房屋朝向,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,6011,0.382812,1,0.6,0.007117,1,3.0,2,1,1,1.0,13,43,1,57,0.551202,6.0
2,1697,0.152344,1,0.472727,0.007448,1,0.0,2,1,1,2.0,9,99,1,57,0.551202,0.0
3,754,0.207031,2,0.709091,0.014068,0,0.0,3,2,2,2.0,13,150,5,108,0.400833,0.0
4,1285,0.011719,0,0.090909,0.008937,2,0.0,2,1,1,2.0,14,128,6,107,0.384167,0.0
5,4984,0.035156,1,0.218182,0.008606,0,0.0,2,1,1,2.0,14,84,3,75,0.598333,0.0


In [31]:
train_label = train['月租金'].to_list()
# 删除月租金列
train.drop('月租金', axis=1, inplace=True)
train.head()

Unnamed: 0,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,房屋朝向,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况
0,3072,0.128906,2,0.236364,0.008628,0,0.0,1,1,1,2.0,11,83,2,103,0.764167,0.0
1,3152,0.132812,1,0.381818,0.017046,1,0.0,1,0,0,2.0,13,125,5,117,0.709167,0.0
2,5575,0.042969,0,0.290909,0.010593,0,0.0,2,1,2,2.0,15,95,6,99,0.5725,0.0
3,3103,0.085938,2,0.581818,0.019199,2,0.0,3,2,2,2.0,5,17,2,10,0.658333,0.0
4,5182,0.214844,0,0.545455,0.010427,3,0.0,2,1,1,2.0,9,71,1,57,0.551202,0.0


In [32]:
# 判断train和test的列是否一致
print(train.columns == test.columns)

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True]


### 超参过程省略

In [34]:
kf = KFold(n_splits=10, shuffle=True, random_state=2023)
# 建模
model=xgb.XGBRegressor(max_depth=8,
                         learning_rate=0.1,
                         objective="reg:linear",
                         n_estimators=3115,
                         colsample_bytree=0.6, 
                         reg_alpha=3, 
                         reg_lambda=2, 
                         gamma=0.6,
                         subsample=0.7, 
                         silent=1, 
                         n_jobs=-1)
# 交叉验证
score = cross_val_score(model, train, train_label, cv=kf, scoring='neg_mean_squared_error')
print(score.mean())

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

-1.894544167950364


In [35]:
model.fit(train,train_label)
predict = model.predict(test)
test['月租金_预测'] = predict
test.head()

Parameters: { "silent" } are not used.



Unnamed: 0_level_0,小区名,小区房屋出租数量,楼层,总楼层,房屋面积,房屋朝向,居住状态,卧室数量,厅的数量,卫的数量,出租方式,区,位置,地铁线路,地铁站点,距离,装修情况,月租金_预测
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,6011,0.382812,1,0.6,0.007117,1,3.0,2,1,1,1.0,13,43,1,57,0.551202,6.0,4.287949
2,1697,0.152344,1,0.472727,0.007448,1,0.0,2,1,1,2.0,9,99,1,57,0.551202,0.0,6.14272
3,754,0.207031,2,0.709091,0.014068,0,0.0,3,2,2,2.0,13,150,5,108,0.400833,0.0,13.849586
4,1285,0.011719,0,0.090909,0.008937,2,0.0,2,1,1,2.0,14,128,6,107,0.384167,0.0,5.437343
5,4984,0.035156,1,0.218182,0.008606,0,0.0,2,1,1,2.0,14,84,3,75,0.598333,0.0,5.675562
