### Linear classifier
    - with SVM loss
    - with Softmax loss

### required methods

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm

In [2]:
def label_encode(data):
    name_dict = {name: val  for val, name in enumerate(np.unique(data))}
    encoded_data = list(map(lambda x: name_dict[x], data))
    return np.array(encoded_data)

def data_split(data, label, frac=0.7):
    
    num_of_label = len(np.unique(label))
    count = [ np.sum(y_data == i) for i in range(num_of_label)]  # 각 label의 갯수
    prop = [int(each * frac) for each in count]  # 각 label의 비율
    
    train_idx = [np.where(label == each)[0][:prop[each]] for each in range(num_of_label)]  # 비율에 맞게 모은 인덱스
    test_idx = [np.where(label == each)[0][prop[each]:] for each in range(num_of_label)]  # 비율에 맞게 모은 인덱스

    X_train = np.concatenate([data[each] for each in train_idx])
    y_train = np.concatenate([label[each] for each in train_idx])
    
    X_test = np.concatenate([data[each] for each in test_idx])
    y_test = np.concatenate([label[each] for each in test_idx])
    
    return X_train, y_train, X_test, y_test

### Data load: Mnist

In [3]:
path = 'C:/Users/anjae/Documents/00_Dataset/mnist/train.csv'
DF = pd.read_csv(path)
\
X_data, y_data = DF.drop(['label'], axis=1).values, DF.label.values
X_train, y_train, X_test, y_test = data_split(X_data, y_data)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((29394, 784), (29394,), (12606, 784), (12606,))

### Data load: iris

In [4]:
path = 'C:/Users/anjae/Documents/00_Dataset/iris/iris.csv'
DF = pd.read_csv(path)

# X_data, y_data = DF.drop(['Species','caseno', 'PetalWidth'], axis=1).values, label_encode(DF.Species.values)
X_data, y_data = DF.drop(['Species','caseno'], axis=1).values, label_encode(DF.Species.values)
X_train, y_train, X_test, y_test = data_split(X_data, y_data)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((105, 4), (105,), (45, 4), (45,))

In [5]:
class LinearModel(object):
    def __init__(self, shape, num_classes, model='svm', delta = 1.0, gamma = 1, lr=0.01, epoch = 1000):
        
        self.w = np.random.uniform(0, 1, [num_classes, shape]) # weight matrix 생성
        self.b = np.zeros(num_classes)
        self.gamma = gamma
        self.lr = lr
        self.epoch = epoch
        self.model = model
        self.epoch = epoch
        
        if model == 'svm':
            self.delta = delta
    
    def fit(self, X_train, y_train):
        
        for _ in range(self.epoch):
            dw = np.zeros_like(self.w)
            db = np.zeros_like(range(len(np.unique(y_train))))  # 나중에 고쳐
            
            if self.model == 'softmax':
                # forward
                scores = X_train.dot(self.w.T) + self.b
                exp_scores = np.exp(scores - np.max(scores, axis=1, keepdims=True))
                probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
                correct_logprobs = -np.log(probs[np.arange(len(X_train)), y_train])
                              
                data_loss = np.sum(correct_logprobs) / len(X_train)
                reg_loss = 0.5 * self.gamma * np.sum(self.w*self.w)  # 1/2 * gamma * l2Norm(w)
                loss = data_loss + reg_loss
                                
                # backward
                dscores = probs.copy()    
                dscores[np.arange(len(X_train)), y_train] -= 1  # 전체 데이터셋에 prob -1 취하기
                
                #print(dscores)
                
                
                dscores /= len(X_train)  # 미리 나누어준다
                
                #print(dscores)
                
                dw = dscores.T.dot(X_train)
                #dw = X_train.dot(dscores.T)   # x * (q_i - 1) for all dataset
                dw += self.gamma * self.w     # reg gradient
                db = np.sum(dscores, axis = 0, keepdims=True)
                               
                self.w = self.w - (self.lr*dw)
                self.b = self.b - (self.lr*db)
                
            else:
                # SVM
                # forward
                scores = X_train.dot(self.w.T)
                correct_scores = np.array(scores[np.arange(len(X_train)), y_train]).reshape(len(X_train),-1)
            
                margins = scores - correct_scores + self.delta
                margins[np.arange(len(X_train)), y_train] = 0
                
                
                data_loss = np.sum(np.maximum(margins, 0)) / len(X_train)
                reg_loss = 0.5 * self.gamma * np.sum(self.w * self.w)
                
                loss = data_loss + reg_loss
                
                # backward
                for i, each in enumerate(margins):
                    
                    idx = np.where(each > 0)[0]
                    
                    # margin이 있으면 실행, 없으면 실행 안함
                    for j in idx:
                        dw[y_train[i],:] += -X_train[i, :]
                        dw[j, :] += X_train[i, :]
                        
                dw += self.gamma * self.w
                self.w = self.w - (self.lr * dw)
                
    def predict(self, X_test, y_test):
        if self.model == 'softmax':
            scores = X_test.dot(self.w.T) + self.b
            normalized_scores = scores - np.max(scores, axis=1, keepdims=True)
            exp_scores = np.exp(normalized_scores)
            probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
            predict = np.argmax(probs, axis = 1)
        else:
            total = len(y_test)
            count = 0
            scores = X_test.dot(self.w.T)
            predict = np.argmax(scores, axis=1)

        print(np.sum(predict == y_test) / len(y_test))

## SVM

In [6]:
n_classes = len(np.unique(y_train))
n_smaples = X_train.shape[0]
n_features = X_train.shape[1]

clf = LinearModel(n_features, n_classes, model='svm', epoch=1000, delta= 10, gamma=0.5)
clf.fit(X_train, y_train)

In [7]:
print('Acc')
clf.predict(X_test, y_test)

Acc
0.7555555555555555


## Softmax

In [8]:
n_classes = len(np.unique(y_train))
n_smaples = X_train.shape[0]
n_features = X_train.shape[1]

clf = LinearModel(n_features, n_classes, model='softmax', epoch=2000)
clf.fit(X_train, y_train)

In [9]:
print('Acc')
clf.predict(X_test, y_test)

Acc
0.7111111111111111


### reference - SVM / Hinge loss / GD
- https://stats.stackexchange.com/questions/155088/gradient-for-hinge-loss-multiclass
- https://cs231n.github.io/linear-classify/#svm
- http://cs231n.stanford.edu/slides/2017/cs231n_2017_lecture4.pdf
- https://zhuanlan.zhihu.com/p/30965514
- https://bruceoutdoors.wordpress.com/2016/05/06/cs231n-assignment-1-tutorial-q2-training-a-support-vector-machine/
- http://cs231n.stanford.edu/slides/2017/cs231n_2017_lecture3.pdf

### reference - softmax / cross_entropy
- http://machinelearningmechanic.com/deep_learning/2019/09/04/cross-entropy-loss-derivative.html
- https://madalinabuzau.github.io/2016/11/29/gradient-descent-on-a-softmax-cross-entropy-cost-function.html