## 单变量房价预测

In [None]:
# 在Jupyter Notebook中安装需要的lib
#!pip install pandas
#!pip install matplotlib --upgrade
#!pip install -i https://pypi.tuna.tsinghua.edu.cn/simple seaborn
#!pip install -i https://pypi.tuna.tsinghua.edu.cn/simple numpy

In [None]:
# pandas用于快速读取CSV到Data Frame(推荐的数据存储接口)
import pandas as pd
# seaborn提供2D库matplotlib的高级接口封装
import seaborn as sns
# seaborn2D图样式设置
sns.set(context="notebook", style="whitegrid", palette="dark")

In [None]:
# 用pandas读取一个2列的csv到Data Frame df0
df0 = pd.read_csv('data0.csv', names=['square', 'price'])
# seaborn.lmplot专门用于线性关系的可视化
sns.lmplot('square', 'price', df0, height=6, fit_reg=True)

In [None]:
# 打印data frame的维度信息
df0.info()

## 多变量房价预测

In [None]:
# python 2D绘图库
import matplotlib.pyplot as plt
# python 3D绘图库
from mpl_toolkits import mplot3d
# 读取3列的csv
df1 = pd.read_csv('data1.csv', names=['square', 'bedrooms', 'price'])
# 跳过表头
df1.head()

In [None]:
fig = plt.figure()
# 创建一个 Axes3D object
ax = plt.axes(projection='3d')
# 设置 3 个坐标轴的名称
ax.set_xlabel('square')
ax.set_ylabel('bedrooms')
ax.set_zlabel('price')
# 绘制 3D 散点图
ax.scatter3D(df1['square'], df1['bedrooms'], df1['price'], c=df1['price'], cmap='Greens')

## 数据规范化

In [None]:
# 用于数据规范化的lambda表达式
def normalize_feature(df):
    return df.apply(lambda column: (column - column.mean()) / column.std())
# 对data frame的数据规范化为”(x - 均值)/标准差" 
df = normalize_feature(df1)
# 跳过表头
df.head()

In [None]:
# 设置一个 Axes3D object
ax = plt.axes(projection='3d')
# 设置 3 个坐标轴的名称
ax.set_xlabel('square')
ax.set_ylabel('bedrooms')
ax.set_zlabel('price')
# 绘制 3D 散点图
ax.scatter3D(df['square'], df['bedrooms'], df['price'], c=df['price'], cmap='Reds')

In [None]:
df.info()

## 数据处理：添加 ones 列（x0）

In [None]:
import numpy as np
# ones是n行1列的数据框，表示x0恒为1
ones = pd.DataFrame({'ones': np.ones(len(df))}) 

In [None]:
ones.info()

In [None]:
# 根据列合并数据
df = pd.concat([ones, df], axis=1)  
df.head()

In [None]:
df.info()