In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import linalg
import csv
%matplotlib inline

In [4]:
THRESHMIN = 1e-10

In [5]:
# シグモイド関数の定義
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [8]:
# ロジスティック回帰のクラス作成
class LogisticRegression:
    def __init__(self, tol=0.001, max_iter=3, random_seed=0):
        self.tol = tol
        self.max_iter = max_iter
        self.random_state = np.random.RandomState(random_seed)
        self.w_ = None
        
    def fit(self, X, y):
        # 初期値作成
        self.w_ = self.random_state.randn(X.shape[1] + 1)
        # Xtilはx0が1なので、np.ones(X.shape[0])でX行分1を追加する
        # np.c_は横に結合するので、n x 1の1をXの左に追加する
        Xtil = np.c_[np.ones(X.shape[0]), X]
        # 無限大の作成
        diff = np.inf
        # 初期値をw_prevに代入
        w_prev = self.w_
        iter = 0
        # ニュートン法によって解く
        while diff > self.tol and iter < self.max_iter:
            # yhatの計算
            yhat = sigmoid(np.dot(Xtil, self.w_))
            # R(対角行列)の計算 np.clip(対角成分n計算, min=0, max=inf)
            r = np.clip(yhat * (1 - yhat), THRESHMIN, np.inf)
            # XR, XRXを計算
            XR = Xtil.T * r
            XRX = np.dot(Xtil.T * r, Xtil)
            # 1期前のself.w_を代入する
            w_prev = self.w_
            # w_newを計算　①まずXRXを除く右側を計算し、②XRXとbの計算を行う
            b = np.dot(XR, np.dot(Xtil, self.w_) - 1 / r * (yhat - y))
            # 解いたw_を代入
            self.w_ = linalg.solve(XRX, b)
            # 1つ前のw_と解いたw_の差の平均を計算
            diff = abs(w_prev - self.w_).mean()
            iter += 1
            
    def predict(self, X):
        Xtil = np.c_[np.ones(X.shape[0]), X]
        yhat = sigmoid(np.dot(Xtil, self.w_))
        return np.where(yhat > .5, 1, 0)

In [11]:
# データの読み込み
n_test = 100
X = []
y = []
with open("wdbc.data") as fp:
    for row in csv.reader(fp):
        if row[1] == "B":
            y.append(0)
        else:
            y.append(1)
        X.append(row[2:])        

In [12]:
# ロジスティック回帰による予測
y = np.array(y, dtype=np.float64)
X = np.array(X, dtype=np.float64)
y_train = y[:-n_test]
X_train = X[:-n_test]
y_test = y[-n_test:]
X_test = X[-n_test:]
model = LogisticRegression(tol=0.01)
model.fit(X_train, y_train)

  


In [13]:
y_predict = model.predict(X_test)
n_hits = (y_test == y_predict).sum()
print("Accuracy: {}/{} = {}".format(n_hits, n_test, n_hits / n_test))

Accuracy: 97/100 = 0.97


  
