In [None]:
from sklearn import model_selection
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
def ReadExcel(pathname):
    df=pd.read_excel(pathname,sheet_name="Sheet1")
    arr=np.array(df)
    rows=np.shape(arr)[0]
    for i in range(rows):
        if arr[i][4]=='Iris-setosa':
            arr[i][4]=1
        else:
            arr[i][4]=0
    dataArr=arr[:,0:2]# 只要两列数据
    labelArr=arr[:,-1]# 仅最后一列
    labelArr=labelArr.reshape(rows,1)
    return dataArr,labelArr

In [None]:
def sigmoid(z):
    z=np.mat(z,dtype=float)
    # 否则'float' object has no attribute 'exp'
    
    return 1.0/(1+np.exp(-z))

In [None]:
#本部分为训练部分，使用误差反传
def fit(X,y,eta=0.01,n_iters=5000,eps=1e-8):
    # 注意是wx+b，要多一行
    beta=np.ones((len(X),1))

    # 按行连接两个矩阵，就是把两矩阵左右相加，要求行数相等。
    data=np.c_[beta,X]
    
    weights=np.ones((3,1))
    i_iters=0
    
    while i_iters<n_iters :
        y_sig=sigmoid(data.dot(weights))
        m=y_sig-y  #计算误差值
        weights=weights-data.transpose().dot(m)*eta   #误差反传更新参数
        i_iters+=1
        
    #打印最后的误差值
    #print(np.abs(m).sum())
    
    return weights,data

In [None]:
def pre(test_X,w,par=1):
    
    m = test_X.shape[0]
    
    if par==1:
        test_X=np.insert(test_X,0,[1])
        test_X.reshape(len(test_X),1)
        m = 1

    else:
        b=np.ones(np.shape(test_X)[0])
        test_X=np.c_[b,test_X]
             
    #由sigmoid函数的性质，z = w * x , z大于0时，sigmoid(Z)>0.5 即预测为1，反之预测为0 
    y_pre=np.dot(test_X,w)

    for k in range(m):
        if y_pre[k] > 0:
            y_pre[k] = 1
        else:
            y_pre[k] = 0
                       
    return y_pre

# calculate accuracy 计算准确率，一列是预测结果，一列是真实结果，结果相同则计数
def accuracy(y_pre,y,par):
    if par==1:
        m=1
    else:
        m, n = y.shape
    Matched = 0
    for k in range(m):
        if y_pre[k] == y[k]:
            Matched += 1
        else:
            Matched += 0
                       
    return Matched / m

In [None]:
def only_one():
    X,y=ReadExcel('./data.xlsx')
    beta,data=fit(X,y)
    plot_pre(data,y,beta)

In [None]:
#留一法
def leave_one():
    X,y=ReadExcel('./data.xlsx')
    total=X.shape[0]
    sum=0
    for k in range(total):
        test_index=k #测试集下标
        
        test_X=X[k]
        test_y=y[k]       
        
        train_X=np.delete(X,test_index,axis=0)
        train_y=np.delete(y,test_index,axis=0)
        
        weights,data=fit(train_X,train_y)
        
        y_pre=pre(test_X,weights,1)#代表使用留一法
        sum+=accuracy(y_pre,test_y,1)
        
    print('''LeaveOneOut's Accuracy: ''', sum / total)

In [None]:
# 10折交叉验证
def cross_val():
    X,y=ReadExcel('./data.xlsx')
    total=X.shape[0]
    sum=0
    num_split=int(total/10)
    # 把样本分成10等分，依次抽取一个做测试集
    for k in range(10):
        test_index=range(k*num_split,(k+1)*num_split) #测试集下标
        
        test_X=X[test_index]
        test_y=y[test_index]
        
        train_X=np.delete(X,test_index,axis=0)
        train_y=np.delete(y,test_index,axis=0)
        
        weights,data=fit(train_X,train_y)
        
        y_pre=pre(test_X,weights,0)#代表使用非留一法
        sum+=accuracy(y_pre,test_y,0)
        
    print('''10-foldCrossValidation's Accuracy: ''', sum /10)

In [None]:
if __name__ == '__main__':
    cross_val()
    leave_one()