In [1]:
import sklearn
import numpy as np

In [2]:
from sklearn.datasets import load_iris
datasets = load_iris()

In [3]:
x_data = datasets["data"]
x_data[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [4]:
y_data = datasets["target"]
y_data

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [5]:
y_data = y_data.reshape([-1,1])
y_data[:3]

array([[0],
       [0],
       [0]])

In [6]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
enc.fit(y_data)  
y_data = enc.transform(y_data).toarray()
y_data[0]

array([1., 0., 0.])

In [7]:
from sklearn.preprocessing import MinMaxScaler

min_max_scaler = MinMaxScaler()
x_data_minmax = min_max_scaler.fit_transform(x_data)
x_data_minmax[:3]

array([[0.22222222, 0.625     , 0.06779661, 0.04166667],
       [0.16666667, 0.41666667, 0.06779661, 0.04166667],
       [0.11111111, 0.5       , 0.05084746, 0.04166667]])

In [8]:
x_0 =np.ones(x_data_minmax.shape[0])
x_data_minmax = np.column_stack((x_0, x_data_minmax))

x_data_minmax[:3]

array([[1.        , 0.22222222, 0.625     , 0.06779661, 0.04166667],
       [1.        , 0.16666667, 0.41666667, 0.06779661, 0.04166667],
       [1.        , 0.11111111, 0.5       , 0.05084746, 0.04166667]])

In [9]:
weights = np.random.uniform(size=(3,5)) # size=(class 수, feature 수)
weights

array([[0.05139105, 0.51737664, 0.97052329, 0.43422522, 0.26210936],
       [0.80122544, 0.9191734 , 0.20996817, 0.80428481, 0.96430556],
       [0.51203339, 0.74027678, 0.44031419, 0.18891026, 0.06584746]])

In [10]:
z = x_data_minmax.dot(weights.T) # (150, 5) dot (5, 3)
z.shape

(150, 3)

In [11]:
def softmax(z):
    e = np.exp(z) # (150, 3)
    p = e / np.sum(np.exp(z), axis=1).reshape([-1,1]) # 분모 : (150, 1), reshape을 안 하면 (1, 150)
    return p

In [12]:
softmax(z[:5]) # 한 행의 총합이 1이 되고, 각 클래스로 분류될 확률을 담은 리스트

array([[0.27132762, 0.41217565, 0.31649674],
       [0.24830464, 0.43220754, 0.31948783],
       [0.26255938, 0.41686105, 0.32057957],
       [0.2583081 , 0.42390184, 0.31779005],
       [0.27860343, 0.40547577, 0.31592081]])

In [13]:
np.sum(y_data[:2] * softmax(z[:2]), axis=1)

array([0.27132762, 0.24830464])

In [14]:
def cross_entropy_function(y, x, weights):
    z = x_data_minmax.dot(weights.T)
    result = - np.sum(
                np.sum(
                    (y * np.log(softmax(z))), axis=1).reshape((-1,1)) # (150, 3) * (150, 3), sum: (1, 150), reshape : (150, 1)
                ) # 스칼라값
    return result 

In [15]:
cross_entropy_function(y_data,x_data_minmax,weights)

183.56011882051672

In [26]:
def minimize_grdient(y, x, initial_weights, iterations = 500000, alpha=0.001):
    cost_history= []
    theta_history = []
    m = y.shape[0]
    theta = np.copy(initial_weights)
    
    number_of_classes = theta.shape[0]
    number_of_weights = theta.shape[1]
    
    for _ in range(iterations):
        original_theta = np.copy(theta)
        partial_entropy = y - softmax(x.dot(original_theta.T)) # (150, 3) - (150, 3)
        for k in range(number_of_classes):        
            for j in range(number_of_weights):
                partial_x = x[:, j] # (150,)
                theta[k][j]  = original_theta[k][j] + (
                    alpha* partial_entropy[:,k].dot(partial_x.T) ) /150
        if (_ % 10000) == 0:
            print(cross_entropy_function(y,x,theta)/150)
            cost_history.append(cross_entropy_function(y,x,theta))
    return theta, cost_history

            

In [27]:
# weights = minimize_grdient(y_data, x_data_minmax,weights)
theta, cost_history = minimize_grdient(y_data, x_data_minmax,weights)

1.2234822290326652
0.7435197336332294
0.6099931823850818
0.5375976817382121
0.4904309182921204
0.45620703933636586
0.42964060754196953
0.4080637144430257
0.389968098423936
0.37442722171272164
0.3608349876950891
0.34877505617466514
0.3379500678369514
0.32814078531992324
0.3191812420781497
0.3109429600531564
0.30332455663696084
0.29624468657645375
0.289637120203968
0.2834472316307154
0.27762944199344786
0.2721453245321274
0.26696217764510194
0.2620519348515897
0.25739032124789046
0.25295619295755306
0.24873101424807628
0.24469843947805459
0.24084397576535377
0.23715470845450018
0.2336190759085918
0.23022668338745603
0.22696814815639746
0.22383496974284237
0.22081942059069226
0.21791445337300103
0.21511362199716103
0.21241101393354797
0.2098011919626046
0.20727914379875367
0.2048402383360891
0.20248018748830077
0.20019501277691787
0.19798101596794965
0.19583475317489485
0.1937530119418696
0.19173279089879494
0.18977128164471704
0.18786585256821137
0.18601403435756347


In [28]:
rand_index= np.random.randint(0,150,30)
rand_index

array([142, 143, 122,  45, 110,  53,  86,  93,  59, 124,   5, 139,  17,
        54,   4,  67,  49,   4,   7,  89,  54,  73,  13,  72,  75,  55,
        26, 137,  92,  78])

In [29]:
y_pred = np.argmax(softmax(x_data_minmax[rand_index].dot(theta.T)),axis=1) 
y_pred

array([2, 2, 2, 0, 2, 1, 1, 1, 1, 2, 0, 2, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 2, 1, 1])

In [30]:
y_true = np.argmax(y_data[rand_index],axis=1) # argmax : 확률값 중 제일 큰 값의 인덱스를 반환
y_true

array([2, 2, 2, 0, 2, 1, 1, 1, 1, 2, 0, 2, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 2, 1, 1])

In [31]:
y_pred == y_true

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [32]:
sum(y_pred == y_true) / len(rand_index)

1.0