In [1]:
# 导入包
import numpy as np
import pandas as pd

In [2]:
# 读取csv
data = pd.read_csv(r"data/lianjia.csv")

## 1. 观察结构，调整列索引顺序

（Region, Garden, Layout, Floor, ID, Year, Size, Elevator, Direction, Renovation, Price）

In [3]:
# 查看结构
data.head()

Unnamed: 0,Direction,Elevator,Floor,Garden,Id,Layout,Price,Region,Renovation,Size,Year
0,南北,无电梯,低楼层(共6层),翠竹苑,0,3室1厅,365.0,浦东,简装,77.84,1995.0
1,南,有电梯,低楼层(共36层),百汇园,1,3室2厅,1449.0,徐汇,精装,145.2,1995.0
2,南北,有电梯,中楼层(共26层),仁恒河滨城(二期),2,3室2厅,1630.0,浦东,精装,161.94,1995.0
3,南,有电梯,高楼层(共30层),财富海景花园,3,3室2厅,2000.0,浦东,精装,185.0,1995.0
4,东,有电梯,中楼层(共26层),仁恒滨江园,4,3室2厅,1360.0,浦东,精装,130.41,1995.0


In [4]:
# 调整列索引顺序
data = data.reindex(
    columns=[
        "Region",
        "Garden",
        "Layout",
        "Floor",
        "Id",
        "Year",
        "Size",
        "Elevator",
        "Direction",
        "Renovation",
        "Price",
    ]
)
data

Unnamed: 0,Region,Garden,Layout,Floor,Id,Year,Size,Elevator,Direction,Renovation,Price
0,浦东,翠竹苑,3室1厅,低楼层(共6层),0,1995.0,77.84,无电梯,南北,简装,365.0
1,徐汇,百汇园,3室2厅,低楼层(共36层),1,1995.0,145.2,有电梯,南,精装,1449.0
2,浦东,仁恒河滨城(二期),3室2厅,中楼层(共26层),2,1995.0,161.94,有电梯,南北,精装,1630.0
3,浦东,财富海景花园,3室2厅,高楼层(共30层),3,1995.0,185,有电梯,南,精装,2000.0
4,浦东,仁恒滨江园,3室2厅,中楼层(共26层),4,1995.0,130.41,有电梯,东,精装,1360.0
...,...,...,...,...,...,...,...,...,...,...,...
1705,杨浦,水电新村,1室2厅,高楼层(共6层),1705,1995.0,57.62,无电梯,南,精装,260.0
1706,浦东,祥安菊苑,3室2厅,中楼层(共11层),1706,1995.0,126.58,,南,其他,250.0
1707,徐汇,梅陇四村,2室1厅,低楼层(共6层),1707,1995.0,64.58,无电梯,南北,其他,320.0
1708,嘉定,翔和雅苑,2室1厅,高楼层(共18层),1708,1995.0,69.6,,南,毛坯,260.0


## 2. 增加一列关于目前状况
（状况：state，可以使用1代表售出，0代表未售出）

In [5]:
# 获取data行数
rowNumber = data.shape[0]
print("There is {} rows in data.".format(rowNumber))

# 生成0和1的1维数组
states = np.random.randint(0, 2, size=(rowNumber,))

# 给data增加State列
data["State"] = states
data.head()

There is 1710 rows in data.


Unnamed: 0,Region,Garden,Layout,Floor,Id,Year,Size,Elevator,Direction,Renovation,Price,State
0,浦东,翠竹苑,3室1厅,低楼层(共6层),0,1995.0,77.84,无电梯,南北,简装,365.0,1
1,徐汇,百汇园,3室2厅,低楼层(共36层),1,1995.0,145.2,有电梯,南,精装,1449.0,1
2,浦东,仁恒河滨城(二期),3室2厅,中楼层(共26层),2,1995.0,161.94,有电梯,南北,精装,1630.0,0
3,浦东,财富海景花园,3室2厅,高楼层(共30层),3,1995.0,185.0,有电梯,南,精装,2000.0,0
4,浦东,仁恒滨江园,3室2厅,中楼层(共26层),4,1995.0,130.41,有电梯,东,精装,1360.0,1


## 3. 删除ID这一列无用的数据

In [6]:
# 删除ID列，直接操作原数据
data.drop("Id", axis=1, inplace=True)  # 也可以写成 data.drop("Id", axis="columns")
data.head()

Unnamed: 0,Region,Garden,Layout,Floor,Year,Size,Elevator,Direction,Renovation,Price,State
0,浦东,翠竹苑,3室1厅,低楼层(共6层),1995.0,77.84,无电梯,南北,简装,365.0,1
1,徐汇,百汇园,3室2厅,低楼层(共36层),1995.0,145.2,有电梯,南,精装,1449.0,1
2,浦东,仁恒河滨城(二期),3室2厅,中楼层(共26层),1995.0,161.94,有电梯,南北,精装,1630.0,0
3,浦东,财富海景花园,3室2厅,高楼层(共30层),1995.0,185.0,有电梯,南,精装,2000.0,0
4,浦东,仁恒滨江园,3室2厅,中楼层(共26层),1995.0,130.41,有电梯,东,精装,1360.0,1


## 4. 查找楼层（floor）小于7的所有房屋

In [8]:
# 写字符串处理函数
def getFloorNumber(floorInf):
    start = floorInf.find("共") + 1
    stop = floorInf.rfind("层")
    return int(floorInf[start:stop])


# 写个判断函数，生成布尔值
def isLowFloor(floor):
    isLow = []
    for i in floor:
        floorNumbers = getFloorNumber(i)
        if floorNumbers < 6:
            isLow.append(True)
        else:
            isLow.append(False)
    return isLow


lowFloor = data[isLowFloor(data["Floor"])]
lowFloor.head()

Unnamed: 0,Region,Garden,Layout,Floor,Year,Size,Elevator,Direction,Renovation,Price,State
11,闵行,绿野香洲(公寓),3室2厅,中楼层(共5层),1995.0,107.53,无电梯,南,精装,660.0,1
27,杨浦,安图新村,2室1厅,中楼层(共5层),1995.0,58.05,无电梯,南,精装,320.0,0
64,松江,万科白马花园(公寓),3室2厅,中楼层(共5层),1995.0,88.29,无电梯,南,精装,312.0,0
78,浦东,万科蓝山,联排别墅,3层2005年建板楼),,3室2厅,,223.79平米,南,1180.0,0
89,徐汇,谨斜小区,1室1厅,中楼层(共5层),1995.0,34.62,无电梯,南,精装,220.0,0


## 5. 修改楼层小于7的房源的电梯状态，统一改成无电梯，反之就设为有电梯

In [9]:
# 用标签索引修改
data.loc[lowFloor.index, "Elevator"] = "无电梯"
data

Unnamed: 0,Region,Garden,Layout,Floor,Year,Size,Elevator,Direction,Renovation,Price,State
0,浦东,翠竹苑,3室1厅,低楼层(共6层),1995.0,77.84,无电梯,南北,简装,365.0,1
1,徐汇,百汇园,3室2厅,低楼层(共36层),1995.0,145.2,有电梯,南,精装,1449.0,1
2,浦东,仁恒河滨城(二期),3室2厅,中楼层(共26层),1995.0,161.94,有电梯,南北,精装,1630.0,0
3,浦东,财富海景花园,3室2厅,高楼层(共30层),1995.0,185,有电梯,南,精装,2000.0,0
4,浦东,仁恒滨江园,3室2厅,中楼层(共26层),1995.0,130.41,有电梯,东,精装,1360.0,1
...,...,...,...,...,...,...,...,...,...,...,...
1705,杨浦,水电新村,1室2厅,高楼层(共6层),1995.0,57.62,无电梯,南,精装,260.0,1
1706,浦东,祥安菊苑,3室2厅,中楼层(共11层),1995.0,126.58,,南,其他,250.0,1
1707,徐汇,梅陇四村,2室1厅,低楼层(共6层),1995.0,64.58,无电梯,南北,其他,320.0,0
1708,嘉定,翔和雅苑,2室1厅,高楼层(共18层),1995.0,69.6,,南,毛坯,260.0,0
