In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

In [2]:
def map_feature(x1, x2, degree):

    x1.shape = (x1.size, 1)
    x2.shape = (x2.size, 1)
    out = np.ones(shape=(x1[:, 0].size, 1))

    m, n = out.shape

    for i in range(1, degree + 1):
        for j in range(i + 1):
            r = (x1 ** (i - j)) * (x2 ** j)
            out = np.append(out, r, axis=1)

    return out

def sigmoid(X):

    den = 1.0 + np.e ** (-1.0 * X)

    d = 1.0 / den

    return d

def logistic_sthocastic_gradient_descent(theta, epoc, alpha, lambda_, X, y):  

    parameters = X.shape[1]
    
    for j in np.arange(epoc):
                
        for i in np.arange(parameters):
            
            erro = sigmoid(np.dot(X[:,i].reshape(X[:,i].shape[0], 1), theta[i])) - y.reshape(y.shape[0],1)
            term = np.dot(erro, X[:,i])

            if (i == 0):
                theta[i] = np.sum(term)*alpha / parameters
                
            else:
                theta[i] = (np.sum(term)*alpha / parameters) + ((lambda_ / parameters) * theta[i])
        
        X, y = shuffle(X, y, random_state=np.random.RandomState(random.randint(1, 10000)))
            

    return theta


In [3]:
data = np.loadtxt('ex2data2.txt', delimiter=',')

X = data[:, 0:2]
y = data[:, 2]

alpha = 0.001
epoc = 1000
lambdasi = [0, 0.01, 0.25]
lambdasf = []

mapped = map_feature(X[:,0], X[:,1], 30)

mapped_with_ones = np.ones(shape=(mapped.shape[0], mapped.shape[1]+1))
mapped_with_ones[:, 1:] = mapped

initial_theta = np.zeros(shape=(mapped_with_ones.shape[1], 1))

for i in lambdasi:
    lambdasf.append(logistic_sthocastic_gradient_descent(initial_theta, epoc, alpha, i, mapped_with_ones, y))

In [9]:
np.savetxt("pesos.txt", lambdasf[2])

In [8]:
lambdasf[2]

array([[  2.39099204e-04],
       [  2.39220385e-04],
       [  1.30350174e-05],
       [  4.35867326e-05],
       [  5.88605876e-05],
       [ -6.05282288e-06],
       [  7.16744907e-05],
       [  1.42208604e-05],
       [  7.28918241e-06],
       [  3.67836066e-06],
       [  3.38442143e-05],
       [  2.91201981e-05],
       [ -1.24743414e-06],
       [  1.19806294e-05],
       [ -2.62468198e-06],
       [  4.06741787e-05],
       [  1.23481856e-05],
       [  2.80592719e-06],
       [  2.24058552e-06],
       [  4.34189786e-06],
       [  9.71405070e-07],
       [  2.75058102e-05],
       [  1.86222714e-05],
       [ -1.66945771e-07],
       [  4.49756511e-06],
       [ -4.05006377e-07],
       [  5.36661287e-06],
       [ -1.49704269e-06],
       [  2.98869261e-05],
       [  1.08924859e-05],
       [  1.50471758e-06],
       [  1.42134828e-06],
       [  1.31160642e-06],
       [  7.14126332e-07],
       [  2.96745060e-06],
       [ -3.65865014e-08],
       [  2.44699935e-05],
 

In [10]:
'''
u = np.linspace(-1, 1.5, 100)
v = np.linspace(-1, 1.5, 100)
z = np.zeros(shape=(len(u), len(v)))

for i in range(len(u)):
    for j in range(len(v)):
        aux = np.ones(shape=(lambdasf[2].shape[1], lambdasf[2].shape[0]))
        aux[:, 1:] = map_feature(np.array(u[i]), np.array(v[j]), 30)
        z[i, j] = aux.dot(np.array(lambdasf[2]))

z = z.T
'''
admitted = np.where(y == 1)
nadmitted = np.where(y == 0)

plt.scatter(X[admitted, 0], X[admitted, 1], marker='h', c='g')
plt.scatter(X[nadmitted, 0], X[nadmitted, 1], marker='x', c='r')

plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.legend(['y = 1', 'y = 0', 'Decision boundary'])
plt.show()

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model

# mapFeature(x1, x2)
# feature mapping（特徴量マッピング）をする
# 引数：特徴量ベクトル x1, x2 （同じ次元nでなければならない）
# 返り値：特徴量行列 X (nx28行列)
# 6次までの場合 1, x1, x2, x1^2, x1*x2, x2, x1^3, .... x1*x2^5, x2^6 のように28列になる
def mapFeature(x1, x2):
    degree = 6
    out = np.ones(x1.shape) # 最初の列は 1
    for i in range(1, degree+1):  # 1 から degree までループ
        for j in range(0, i+1):   # 0 から i までループ
            out = np.c_[out, (x1**(i-j) * x2**j)] # 列を増やしていく
    return out

# ここから本文
data = pd.read_csv("C:\\Users\\darle\\Desktop\\ex2data2.txt", header=None)

x1 = np.array(data[0])
x2 = np.array(data[1])
y = np.array(data[2])

# サンプルデータをプロットする
pos = (y==1) # numpy bool index
neg = (y==0) # numpy bool index
plt.scatter(x1[pos], x2[pos], marker='+', c='b') # 正例は'+'
plt.scatter(x1[neg], x2[neg], marker='o', c='y') # 負例は'o'
plt.legend(['y = 0', 'y = 1'], scatterpoints=1)
plt.xlabel("Microchip Test 1")
plt.ylabel("Microchip Test 2")

# 特徴量マッピングをする Xはnx28行列
X = mapFeature(x1, x2)

# Logistic regression model 正則化あり
model = linear_model.LogisticRegression(penalty='l2', C=0.00001)
model.fit(X, y)

# Decision Boundary(決定境界)をプロットする
px = np.arange(-1.0, 1.5, 0.1)
py = np.arange(-1.0, 1.5, 0.1)
PX, PY = np.meshgrid(px, py) # PX,PYはそれぞれ 25x25 行列
XX = mapFeature(PX.ravel(), PY.ravel()) # 特徴量マッピング。引数はravel()で625次元ベクトルに変換して渡す。XXは 625x28行列
Z = model.predict_proba(XX)[:,1] # ロジスティック回帰モデルで予測。y=1の確率は結果の2列目に入っているので取り出す。Zは625次元ベクトル
Z = Z.reshape(PX.shape) # Zを25x25行列に変換
plt.contour(PX, PY, Z, levels=[0.5], linewidths=3) # Z=0.5の等高線が決定境界となる
plt.show()