In [6]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches

In [7]:
def _gradient_descent(self, X, y):
     """
     X : ndarray of the following form, shape (n_samples, n_features)
       learning data
    
     y : ndarray of the form, shape (n_samples)
       training data results
      
     self.lam : float
       regularization parameter
      
     self.lr : float
       learning rate
    
     self.coef_ : ndarray of the following form, shape (n_features)
       parameters for X
      
     coef_reg : ndarray of the following form, shape (n_features)
       parameters used for regularization
    
     self.h0 : ndarray of the following form, shape (n_samples)
       Predicted value of train by sigmoid function (process function)
    
     Returns
     -------
     self.theta_ : ndarray of the following form, shape (n_samples, 1)
       Estimation result by linear assumption function
      
     """
     # configure kernel
     if self.kernel=='linear':
         K = np.dot(X, X.T)
     elif self.kernel=='poly':
         K = self.gamma*((np.dot(X, X.T) + self.theta)**self.d)
        
     # Tilt
     delta = 1 - y* np.dot(K.T, self.coef_*y)

# for version
# delta = np.zeros(X.shape[0])
# for i in range(X.shape[0]):
# # Columns of K are multiplied by vectors of y and coef_: delta[i] = np.sum(self.coef_[i]*y[i]*K[:, i])
# delta[i] = np.sum(self.coef_*y*K[:, i])
# delta = 1 - y * delta
        
     # update formula
     self.coef_ = self.coef_ + self.lr*delta
    
     # Since self.coef_ is greater than or equal to 0, set elements less than or equal to 0 to 0
     self.coef_ = np.where(self.coef_<0, 0, self.coef_)
    
     return

In [8]:
import numpy as np
class ScratchSVMClassifier():
    """
    Scratch implementation of SVM classifier

     Parameters
     ----------
     num_iter : int
       number of iterations
     lr : float
       learning rate
     kernel : str
       kernel type. Linear kernel (linear) or polynomial kernel (poly)
     threshold : float
       threshold for choosing support vectors
     verbose : bool
       True to print the learning process

     Attributes
     ----------
     self.n_support_vectors : int
       number of support vectors
     self.index_support_vectors : ndarray of the following form, shape (n_support_vectors,)
       support vector index
     self.X_sv : ndarray of the form, shape(n_support_vectors, n_features)
       Support vector features
     self.lam_sv : ndarray of the form, shape(n_support_vectors, 1)
       undetermined multiplier of the support vector
     self.y_sv : ndarray of the form, shape(n_support_vectors, 1)
       support vector labels

     """

    def __init__(self, num_iter, lr, kernel='linear',  gamma=1,
                 theta=0, d=1, threshold=1e-5, verbose=False):
        # Record hyperparameters as attributes
        self.iter = num_iter
        self.lr = lr
        self.kernel = kernel
        self.threshold = threshold
        self.verbose = verbose
        # support vectors
        self.s_X = 0
        self.s_y = 0
        # parameters of the polynomial kernel
        self.gamma = gamma
        self.theta = theta
        self.d = d
        # Initialize parameters and predictors
        self. coef_ = 0
        # Initialize category max and min of y
        self. y_max = 0
        self. y_min = 0

    def fit(self, X, y, X_val=None, y_val=None):
        """
        SVM分類器を学習する。検証データが入力された場合はそれに対する精度もイテレーションごとに計算する。

        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            訓練データの特徴量
        y : 次の形のndarray, shape (n_samples, )
            訓練データの正解値
        X_val : 次の形のndarray, shape (n_samples, n_features)
            検証データの特徴量
        y_val : 次の形のndarray, shape (n_samples, )
            検証データの正解値
        """
        # make y one-dimensional
        y = y.ravel()
        y_val = y_val.ravel()
    
        # get the category maximum and minimum of y
        self.y_max = y.max()
        self.y_min = y.min()
    
        # -1, 1 of y
        # "-1, 1" instead of "0, 1" for support vectors
        if self.y_max == 2:
            y = np.where(y==2, 1, -1)
            y_val = np.where(y_val==2, 1, -1)
        else:
            y = np.where(y==1, 1, -1)
            y_val = np.where(y_val==1, 1, -1)
    
        # Set parameters (normal distribution with mean 0 and variance 1)
        # "Normal distribution ✖️0.001" because it is easy to converge when it is small
        self.coef_ = np.random.normal(0, 1, X.shape[0])*0.01
    
        for iter_count in range(self.iter):
            if self.verbose:
                #Output learning process when #verbose is set to True
                print("{}th learning".format(iter_count))
        
            # steepest descent
            # update self.coef_
            _gradient_descent(self, X, y)
    
        # Remove 0 elements (so that they can be used when estimating)
        coef_index = np.where(self.coef_>self.threshold)
        # Determination of support vectors
        # Since the above index is a tuple, it is not necessary to specify the second dimension of X)
        self.coef_ = self.coef_[coef_index]
        self.s_X = X[coef_index]
        self.s_y = y[coef_index]

    def predict(self, X):
        """
         Estimate labels using SVM classifier.

         Parameters
         ----------
         X : ndarray of the following form, shape (n_samples, n_features)
             sample

         Returns
         -------
             An ndarray of the form, shape (n_samples, 1)
             Estimation result by SVM classifier
         """
         # configure kernel
        if self.kernel=='linear':
            K = np.dot(X, self.s_X.T)
        elif self.kernel=='poly':
            K = self.gamma*((np.dot(X, self.s_X.T) + self.theta)**self.d)
        
        # branch to correspond to category max, min
        if self.y_max == 2:
            if self.y_min == 1:
                return np.where(np.dot(K, self.coef_*self.s_y)<0, 1, 2)
            else:
                return np.where(np.dot(K, self.coef_*self.s_y)<0, 0, 2)
        else:
            return np.where(np.dot(K, self.coef_*self.s_y)<0, 0, 1)

[Problem 2] Determination of support vectors
Treat samples whose computed Lagrangian multiplier λ is greater than a set threshold as support vectors. Support vectors are required when estimating. Write code to determine the support vectors and store them as instance variables.

Threshold is a hyperparameter, but a good starting point is around 1e-5. If you can output the number of support vectors, you can check if the learning is going well.

[Question 3] Estimation
At the time of estimation, the kernel function calculates the features of the data to be estimated and the features of the support vectors. The sign of f(x) obtained is the classification result.

[Question 4] Learning and Estimation
Please learn and estimate the scratch implementation for binary classification of simple dataset 1 prepared in Sprint of Introduction to Machine Learning Scratch.

Compare it with the scikit-learn implementation to see if it works.

Please use scikit-learn for index values such as Accuracy, Precision, and Recall.

In [12]:
from sklearn.datasets import load_iris
iris = load_iris()
X = pd.DataFrame(data=iris.data, columns=iris.feature_names)
y = pd.DataFrame(data=iris.target, columns=['species'])
df = pd.concat([X, y], axis=1)
# extraction
df_2ex =df.query('species == 1 | species == 2')[["sepal length (cm)", "petal length (cm)", 'species']]
X = df_2ex[["sepal length (cm)", "petal length (cm)",]]
y = df_2ex[["species"]]
# ndarray conversion
X_array = X.values
# One-dimensionalization with respect to y (required for graphing)
y_array = np.ravel(y.values)
# Split
X_train, X_valid, y_train, y_valid = train_test_split(
     X_array, y_array, test_size=0.25, random_state=0)
# Standardization
scaler = StandardScaler()
scaler.fit(X_train)
X_train_std = scaler.transform(X_train)
X_valid_std = scaler.transform(X_valid)

In [13]:
# learn

# _init__(self, num_iter, lr, kernel='linear', gamma=1,
# theta=0, d=1, threshold=1e-5, verbose=False):
ssvc = ScratchSVMClassifier(5000, 0.01, threshold=1e-5)
ssvc.fit(X_train_std, y_train, X_valid_std, y_valid)
y_valid_predict = ssvc.predict(X_valid_std)

NameError: name 'd' is not defined