In [None]:
# 根据《机器学习实战》编写
import numpy as np
import matplotlib.pyplot as plt

def loadDataSet(filename):
    numFeat = len(open(filename).readline().split('\t')) - 1
    dataMat = []
    labelMat = []
    fr = open(filename)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat,labelMat

def standRegress(xArr,yArr):
    '''标准最小二乘法'''
    xMat = np.mat(xArr)
    yMat = np.mat(yArr)
    xTx = xMat.T*xMat
    if np.linalg.det(xTx) == 0.0:
        print('行列式为0，不能求逆')
        return
    ws = np.linalg.inv(xTx)*xMat.T*yMat.T
    return ws

def plotRegress(xArr,yArr):
    '''线性模型预测'''
    w = standRegress(xArr,yArr)
    xMat = np.mat(xArr)
    yMat = np.mat(yArr)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0],c='red')
    xCopy = xMat
    xCopy.sort(0)
    yHat = xCopy*w
    corr = np.corrcoef(yHat.T,yMat)
    print('相关系数为：',corr)
    ax.plot(xCopy[:,1],yHat,c='blue')
    plt.show()

def lwlr(testPoint,xArr,yArr,k=1.0):
    '''局部加权线性回归'''
    xMat = np.mat(xArr)
    yMat = np.mat(yArr)
    m = np.shape(xMat)[0]
    weights = np.eye(m)
    for i in range(m):
        diffMat = testPoint - xMat[i,:]
        weights[i][i] = np.exp(diffMat*diffMat.T/(-2.0*k**2))
    xTx = xMat.T*weights*xMat
    if np.linalg.det(xTx) == 0:
        print('行列式为0，不能求逆')
        return
    ws = np.linalg.inv(xTx) * xMat.T * weights * yMat.T
    return testPoint*ws

def lwlrTest(testArr,xArr,yArr,k=1.0):
    '''测试'''
    m = np.shape(testArr)[0]
    yHat = np.zeros(m)
    for i in range(m):
        yHat[i] = lwlr(testArr[i],xArr,yArr,k)
    return yHat

def plotlwlrRegress(xArr,yArr):
    '''画图'''
    yHat = lwlrTest(xArr,xArr,yArr,k=0.01)
    xMat = np.mat(xArr)
    yMat = np.mat(yArr)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    srtInd = xMat[:,1].argsort(0)
    xSort = xMat[srtInd][:,0,:]     #返回索引值
    ax.plot(xSort[:,1],yHat[srtInd])
    ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0],c='red')
    corr = np.corrcoef(yHat.T,yMat)
    print('相关系数为：',corr)
    plt.show()

if __name__ == '__main__':
    data,label = loadDataSet('data.txt')
    plotRegress(data,label)
    plotlwlrRegress(data,label)
