In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
#使用三个变量预测价格，w1,w2,b
path = 'ex1data2.txt'
data = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])
data.head()

Unnamed: 0,Size,Bedrooms,Price
0,2104,3,399900
1,1600,3,329900
2,2400,3,369000
3,1416,2,232000
4,3000,4,539900


In [4]:
data.mean()

Size          2000.680851
Bedrooms         3.170213
Price       340412.659574
dtype: float64

In [5]:
data = (data - data.mean()) / data.std()
data.head()

Unnamed: 0,Size,Bedrooms,Price
0,0.13001,-0.223675,0.475747
1,-0.50419,-0.223675,-0.084074
2,0.502476,-0.223675,0.228626
3,-0.735723,-1.537767,-0.867025
4,1.257476,1.090417,1.595389


In [23]:
data.insert(0, 'Ones', 1)#添加一列方便向量运算

In [24]:
#从数据集中提取出X和y
cols = data.shape[1]
X = data.iloc[:,0:cols-1]#去掉最后一列得到X
y = data.iloc[:,cols-1:cols]#保留最后一列得到y

In [25]:
X.head()

Unnamed: 0,Ones,Size,Bedrooms
0,1,0.13001,-0.223675
1,1,-0.50419,-0.223675
2,1,0.502476,-0.223675
3,1,-0.735723,-1.537767
4,1,1.257476,1.090417


In [26]:
y.head()

Unnamed: 0,Price
0,0.475747
1,-0.084074
2,0.228626
3,-0.867025
4,1.595389


In [27]:
theta = np.random.rand(3,1)#随机生成theta，w1,w2和b

In [28]:
theta

array([[0.95437318],
       [0.73001461],
       [0.87233299]])

In [29]:
#X，y转为矩阵
X = np.matrix(X.values)
y = np.matrix(y.values)

In [30]:
def compute_predict_y(X,theta):
    predict_y = np.dot(X,theta)
    return predict_y

In [31]:
def computeCost(y,predict_y):
    m = y.shape[0]
    cost = np.sum(np.power((y-predict_y),2))/(2*m)
    return cost

In [32]:
def compute_gradient(X,y,predict_y):
    m = X.shape[0]
    gradients = np.dot(X.T,(predict_y - y))/m
    return gradients

In [33]:
def update_parm(theta,gradients,alpha):
    theta = theta - np.dot(alpha,gradients)
    return theta

In [34]:
def gradientDescent(X,y,theta,alpha,iters):
    cost_history = []
    theta_history = []
    theta_history.append(theta)
    for i in range(iters):
        predict_y = compute_predict_y(X,theta)
        cost = computeCost(y,predict_y)
        gradients = compute_gradient(X,y,predict_y)
        theta = update_parm(theta,gradients,alpha)
        cost_history.append(cost)
        theta_history.append(theta)
    return cost_history, theta_history, theta

In [35]:
theta = np.random.rand(3,1)#随机生成theta，w1,w2和b

In [36]:
theta

array([[0.39227581],
       [0.51703846],
       [0.981607  ]])

In [41]:
#存在数据太大计算cost的时候会溢出的错误，取决于theta的参数大小
#参考别人的方法，添加了数据预处理步骤 - 特征归一化
cost_history, theta_history, theta = gradientDescent(X,y,theta,0.1,1000)

In [42]:
cost_history

[0.23402803843002498,
 0.22310728925416667,
 0.21363956370555187,
 0.2053694711287668,
 0.19809735145088808,
 0.19166555484204842,
 0.18594830409600419,
 0.18084417597262106,
 0.17627050134646893,
 0.17215917476681705,
 0.16845350221716862,
 0.16510581606803637,
 0.16207565897081808,
 0.1593283913354066,
 0.15683411554857207,
 0.15456683818082362,
 0.15250381195569396,
 0.15062501428512093,
 0.14891273020477122,
 0.14735121565842463,
 0.1459264230677228,
 0.1446257755541249,
 0.143437979469395,
 0.14235286734205882,
 0.14136126518084638,
 0.1404548794535788,
 0.13962620009946783,
 0.1388684167211174,
 0.138175345703481,
 0.13754136646770437,
 0.13696136542299214,
 0.13643068645521472,
 0.13594508700614405,
 0.13550069896633823,
 0.135093993738552,
 0.13472175093523844,
 0.13438103025935122,
 0.13406914618690752,
 0.1337836451261744,
 0.1335222847746149,
 0.13328301543296403,
 0.13306396306762347,
 0.1328634139392189,
 0.1326798006376537,
 0.13251168938307703,
 0.13235776846849218,
 0.1

In [43]:
theta_history

[matrix([[0.13677812],
         [0.49685416],
         [0.46198086]]),
 matrix([[0.12310031],
         [0.50658652],
         [0.4328206 ]]),
 matrix([[0.11079028],
         [0.51696448],
         [0.40598094]]),
 matrix([[0.09971125],
         [0.52779769],
         [0.38119937]]),
 matrix([[0.08974012],
         [0.53892878],
         [0.35824952]]),
 matrix([[0.08076611],
         [0.55022823],
         [0.33693577]]),
 matrix([[0.0726895 ],
         [0.56158987],
         [0.31708879]]),
 matrix([[0.06542055],
         [0.57292725],
         [0.2985616 ]]),
 matrix([[0.0588785 ],
         [0.58417039],
         [0.28122635]]),
 matrix([[0.05299065],
         [0.59526321],
         [0.26497157]]),
 matrix([[0.04769158],
         [0.6061612 ],
         [0.24969973]]),
 matrix([[0.04292242],
         [0.61682955],
         [0.23532532]]),
 matrix([[0.03863018],
         [0.62724156],
         [0.22177308]]),
 matrix([[0.03476716],
         [0.63737726],
         [0.2089766 ]]),
 matri

In [44]:
theta

matrix([[-1.09276275e-16],
        [ 8.84765988e-01],
        [-5.31788197e-02]])