# Softmax  
$ h = WX + b $  
$ p_i = {\exp(h_i)\over\sum{\exp(h_i)}} $  
$ L = -\sum{T_i\log(p_i)} $  
$ {\partial L\over\partial h_i} = p_i - T_i $  
$ {\partial h_i\over\partial W_i} = X $

$ {\partial L\over\partial h_i}$ 설명    
<img src="img/fig a-5.png">

In [113]:
from load_cifar_10 import *
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [124]:
class Softmax:
    def __init__(self):
        
        self.params = {}
        self.params['W'] = 0.0001 * np.random.randn(3072, 10)
        self.params['b'] = np.ones(10)
    def forward(self, X):
        #Softmax 함수
        W = self.params['W']
        b = self.params['b']
        #p = np.exp(np.dot(X, W) + b)
        h = np.dot(X, W) + b
        #stable a
        a = np.exp(h - np.max(h, axis = 1).reshape(-1,1))
        p = a/np.sum(a, axis = 1).reshape(-1,1)
        return p
    
    def loss(self, X, T):
        
        p = self.forward(X)
        
        n = T.shape[0]
        
        log_likelihood = -np.log(p[range(n), T])
        Loss = np.sum(log_likelihood) / n
        #Loss는 데이터 개수 전부 더한거 아닌가?
        #Loss = np.sum(log_likehood)
        return Loss
    
    def accuracy(self, X, T):
        p = self.forward(X) #예측
        predict = np.argmax(p, axis = 1) #예측 결과 index 1darray 로 출력 
        
        return 1 - np.count_nonzero(predict - T)/len(T)
        
    def gradient(self, X, T, learning_rate = 0.0001):
        
        p = self.forward(X)
        #T = np.array(T)
        #t = np.zeros((T.shape[0], np.max(T) + 1)) np.max로 잡아주는게 좋으나 일부 배치에서는 인덱스 답이 작음
        t = np.zeros((T.shape[0], 10))
        t[np.arange(T.shape[0]), T] = 1
        #t는 인덱스 레이블 T를 One hot 벡터로 바꾼 것
        
        #목적함수에 대한 가중치 미분값을 담을 zero array 생성
        grads = {}
        grads['W'] = np.zeros((3072, 10))
        grads['b'] = np.zeros(10)
        #목적함수에 대한 가중치 미분값 합 구하기
        grads['W'] = (1/len(T)) * np.dot(X.T, p-t)
        grads['b'] = (1/len(T)) * np.sum(p-t, axis = 0)

        self.params['W'] -= learning_rate * grads['W']
        self.params['b'] -= learning_rate * grads['b']

In [115]:
def Processing_data(train, test):
    #change dtype
    train = np.array(train, dtype=np.float64)
    test = np.array(test, dtype=np.float64)
    
    #Reshaping
    train = np.reshape(train, (train.shape[0], -1))
    test = np.reshape(test, (test.shape[0], -1))
    
    #Normalizing
    mean_image = np.mean(train, axis = 0)
    #print(train.dtype)
    train -= mean_image
    test -= mean_image
    
    return train, test

In [116]:
cifar_10_dir = 'cifar-10-batches-py'

In [117]:
train_data, train_filenames, train_labels, test_data, test_filenames, test_labels, label_names = \
load_cifar_10_data(cifar_10_dir)

In [118]:
train_data, test_data = Processing_data(train_data, test_data)

In [119]:
train_data.shape
train_labels.shape
test_data.shape
test_labels.shape

(50000, 3072)

(50000,)

(10000, 3072)

(10000,)

In [7]:
# train_data = train_data[:20]
# train_labels = train_labels[:20]
# test_data = test_data[:10]
# test_labels = test_labels[:10]

In [125]:
softmax = Softmax()

In [126]:
iters_num = 1000
batch_size = 64
train_size = train_data.shape[0]

In [127]:
for i in range(iters_num):
    for j in range(int(train_size/batch_size)):
        #배치 데이터
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = train_data[batch_mask]
        t_batch = train_labels[batch_mask]
        softmax.gradient(x_batch, t_batch)
    if i % 40 ==0:
        print(i,"번째 반복중입니다.")
        #Accuracy와 Loss는 전체 데이터를 대상으로 보겠음.
        print("Accuracy : " , softmax.accuracy(train_data, train_labels))
        print("Loss     : " , softmax.loss(train_data, train_labels))

0 번째 반복중입니다.
Accuracy :  0.31942000000000004
Loss     :  24.550373434471144
40 번째 반복중입니다.
Accuracy :  0.31588000000000005
Loss     :  26.362522091842845
80 번째 반복중입니다.
Accuracy :  0.3345
Loss     :  23.17246412954829
120 번째 반복중입니다.
Accuracy :  0.30800000000000005
Loss     :  27.512185051746727
160 번째 반복중입니다.
Accuracy :  0.37778
Loss     :  18.664059915569055
200 번째 반복중입니다.
Accuracy :  0.34968
Loss     :  27.567814797099274
240 번째 반복중입니다.
Accuracy :  0.34097999999999995
Loss     :  25.306544815382185
280 번째 반복중입니다.
Accuracy :  0.33220000000000005
Loss     :  27.214612810485136
320 번째 반복중입니다.
Accuracy :  0.32202
Loss     :  22.210010248294807
360 번째 반복중입니다.
Accuracy :  0.36973999999999996
Loss     :  23.98068845767166
400 번째 반복중입니다.
Accuracy :  0.33792
Loss     :  29.673246869081197
440 번째 반복중입니다.
Accuracy :  0.38593999999999995
Loss     :  21.789254451318826
480 번째 반복중입니다.
Accuracy :  0.34109999999999996
Loss     :  24.32039561828143
520 번째 반복중입니다.
Accuracy :  0.3698
Loss     :  22.13377

KeyboardInterrupt: 

In [111]:
# for i in range(50):
#     softmax.gradient(train_data, train_labels)
#     if i % 5 ==0:
#         print("Accuracy : " , softmax.accuracy(train_data, train_labels))
#         print("Loss     : " , softmax.loss(train_data, train_labels))

0.15976
28.213080503499956
0.22799999999999998
32.43722659612448
0.23197999999999996
31.042650906750453
0.23141999999999996
27.68210914175905
0.22162000000000004
36.63419428172896
0.21955999999999998
41.70288534450684
0.26961999999999997
26.68141851596112
0.2388
34.00342800448953
0.23462000000000005
31.040791766268374
0.23997999999999997
33.321209963051665
