# 线性回归（多变量）
## 房价预测
LinearRegressionData.txt：面积、卧室数、房价

In [60]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

<img src='Img/dataset_img.png'>

In [61]:
path = 'LinearRegressionData.txt'
data = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])
data.head()  # 显然前5行数据

Unnamed: 0,Size,Bedrooms,Price
0,2104,3,399900
1,1600,3,329900
2,2400,3,369000
3,1416,2,232000
4,3000,4,539900


In [62]:
# add ones column, x0
data.insert(0, 'Ones', 1)
data.head()

Unnamed: 0,Ones,Size,Bedrooms,Price
0,1,2104,3,399900
1,1,1600,3,329900
2,1,2400,3,369000
3,1,1416,2,232000
4,1,3000,4,539900


In [63]:
# set X (training data) and Y (target variable)
# data.shape [examples, features]
cols = data.shape[1]
X = data.iloc[:, :cols-1]  # 返回值含表头和序号
Y = data.iloc[:, cols-1:cols]
X.head()

Unnamed: 0,Ones,Size,Bedrooms
0,1,2104,3
1,1,1600,3
2,1,2400,3
3,1,1416,2
4,1,3000,4


In [64]:
# convert to matrices and initialize theta
X = np.matrix(X.values)  # 返回值仅有数据，无表头和序号
Y = np.matrix(Y.values)
theta = np.matrix(np.array([0, 0, 0]))

### 正规方程的解析解
<img src='Img/analytic_solutions.png'>

In [72]:
# 求解最优参数
theta = np.linalg.inv(X.T@X)@X.T@Y

In [73]:
print(theta)

[[89597.9095428 ]
 [  139.21067402]
 [-8738.01911233]]


In [83]:
# 预测价格
def predictPrice(x1, x2, theta):
    return theta[0, 0] + theta[0, 1]*x1 + theta[0, 2]*x2

# 2104,3,356283, 
price = predictPrice(2104, 3, theta.T)
price

356283.1103388978

## 梯度下降法
### 代价函数
<img src='Img/cost.png'>

In [91]:
# 代价函数
def computeCost(X, Y, theta):
    inner = np.power((X * theta.T) - Y, 2)
    return np.sum(inner) / (2 * len(X))

### 梯度更新公式
<img src='Img/gradient.png'>

In [93]:
# 梯度下降
def gradientDescent(X, Y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))
    parameters = int(theta.shape[1])
    cost = np.zeros(iters)
    
    for i in range(iters):
        error = X * theta.T - Y
        
        for j in range(parameters):
            term = np.multiply(error, X[:, j])
            temp[0, j] = temp[0, j] - alpha / len(X) * np.sum(term)
        
        theta = temp
        cost[i] = computeCost(X, Y, theta)
    
    return theta, cost

In [100]:
path = 'LinearRegressionData.txt'
data = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])

# 保存mean、std、mins、maxs、data
means = data.mean().values
stds = data.std().values
mins = data.min().values
maxs = data.max().values
data_ = data.values
data.describe()

Unnamed: 0,Size,Bedrooms,Price
count,47.0,47.0,47.0
mean,2000.680851,3.170213,340412.659574
std,794.702354,0.760982,125039.899586
min,852.0,1.0,169900.0
25%,1432.0,3.0,249900.0
50%,1888.0,3.0,299900.0
75%,2269.0,4.0,384450.0
max,4478.0,5.0,699900.0


### 特征缩放
和正规方程求解解析解的方法不同，梯度下降法需要对特征进行缩放，这样有助于梯度更新。
如下左图，为未缩放的特征，使得其函数曲面的等高线图呈现扁平的椭圆形状，这样的话梯度可能需要多次更新才能慢慢达到最低点，而经过特征缩放后的右图，梯度更新次数将少了很多。
<img src='Img/feature_scaling.png'>

In [None]:

# 特征缩放
data = (data - data.mean()) / data.std()


In [None]:
# add ones column, x0
data.insert(0, 'Ones', 1)
data.head()

In [95]:
# set X (training data) and Y (target variable)
cols = data.shape[1]
X = data.iloc[:, :cols-1]
Y = data.iloc[:, cols-1:cols]

# convert to matrices and initialize theta
X = np.matrix(X.values)
Y = np.matrix(Y.values)
theta = np.matrix(np.array([0, 0, 0]))

# perform linear regression on the data set
alpha = 0.01
iters = 1000
g, cost = gradientDescent(X, Y, theta, alpha, iters)

### 参数转为缩放前
这里要注意，需要将参数放大回缩放前的值

In [97]:
def theta_transform(theta, means, stds):
    temp = means[:-1] * theta[1:] / stds[:-1]
    theta[0] = (theta[0] - np.sum(temp)) * stds[-1] + means[-1]
    theta[1:] = theta[1:] * stds[-1] / stds[:-1]
    return theta.reshape(1, -1)

In [98]:
g_ = np.array(g.reshape(-1, 1))
means = means.reshape(-1, 1)
stds = stds.reshape(-1, 1)
transform_g = theta_transform(g_, means, stds)
transform_g

array([[88307.21151185,   138.22534685, -7709.05876589]])

In [99]:
# 预测价格
def predictPrice(x, y, theta):
    return theta[0, 0] + theta[0, 1]*x + theta[0, 2]*y

# 2104,3,356006, 
price = predictPrice(2104, 3, transform_g)
price

356006.16498795623

## 梯度下降法与正规方程解的结果不同
是因为梯度下降法最终收敛于最低点的邻近位置，而非最低点，解析解的话则是在最低点，所以结果略有差异。