In [19]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_covtype
from sklearn import linear_model 

import torch
import torch.optim as optim

class LogisticRegression:
    def __init__(self):
        pass

    def fit(self, X, y, lr=0.1, niter=100, momentum=0, random_state=None, verbose=False):
        '''
        Train a multiclass logistic regression model on the given training set.

        Parameters
        ----------
        X: training examples, represented as an input array of shape (n_sample,
           n_features).
        y: labels of training examples, represented as an array of shape
           (n_sample,) containing the classes for the input examples
        lr: learning rate for gradient descent
        niter: number of gradient descent updates
        momentum: the momentum constant (see assignment task sheet for an explanation)

        Returns
        -------
        self: fitted model
        '''
        self.classes_ = np.unique(y)
        self.class2int = dict((c, i) for i, c in enumerate(self.classes_))
        y = np.array([self.class2int[c] for c in y])

        n_features = X.shape[1]
        n_classes = len(self.classes_)

        self.intercept_ = np.zeros(n_classes)
        self.coef_ = np.zeros((n_classes, n_features))

        # Implement your gradient descent training code here; uncomment the code below to do "random training"
        #self.intercept_ = np.random.randn(*self.intercept_.shape)
        #self.coef_ = np.random.randn(*self.coef_.shape)
        
        '''
        Numpy to Torch
        '''
        coef = torch.from_numpy(self.coef_)
        coef.requires_grad = True
        intercept = torch.from_numpy(self.intercept_)
        intercept.requires_grad = True
        
        optimizer = optim.SGD([coef, intercept], lr=lr, momentum=momentum)
        
        X = torch.from_numpy(X)
        
        '''
        Training Process
        @ = torch.mm()
        torch.mm()、torch.matmul()、@
        '''
        for i in range(niter):
            scores = X @ coef.T + intercept
            scores = scores - torch.max(scores, dim=1)[0].reshape(-1, 1)
            scores = torch.exp(scores)
            probs = scores/scores.sum(dim=1).reshape(-1, 1)
            ll = -torch.log(probs[np.arange(0, len(y)), y]).mean()
            
            if verbose:
                print('[%4d]: %.3f' % (i, ll.data))
                
            optimizer.zero_grad()
            ll.backward()
            optimizer.step()
            
        self.coef_ = coef.detach().numpy()
        self.intercept_ = intercept.detach().numpy()


        return self

    def predict_proba(self, X):
        '''
        Predict the class distributions for given input examples.

        Parameters
        ----------
        X: input examples, represented as an input array of shape (n_sample,
           n_features).

        Returns
        -------
        y: predicted class lables, represened as an array of shape (n_sample,
           n_classes)
        '''

        # replace pass with your code
        scores = X @ self.coef_.T + self.intercept_
        scores = scores - np.max(scores, axis=1).reshape(-1, 1)
        scores = np.exp(scores)
        return scores/scores.sum(axis=1).reshape(-1, 1)

    def predict(self, X):
        '''
        Predict the classes for given input examples.

        Parameters
        ----------
        X: input examples, represented as an input array of shape (n_sample,
           n_features).

        Returns
        -------
        y: predicted class lables, represened as an array of shape (n_sample,)
        '''

        # replace pass with your code
        scores = X @ self.coef_.T + self.intercept_
        indices = np.argmax(scores, axis=1)
        return self.classes_[indices]


if __name__ == '__main__':
    X, y = fetch_covtype(return_X_y=True)
    X_tr, X_ts, y_tr, y_ts = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = LogisticRegression()
    clf.fit(X_tr, y_tr, lr=2e-7, niter=200);
    print(accuracy_score(y_tr, clf.predict(X_tr)))
    print(accuracy_score(y_ts, clf.predict(X_ts)))

0.502343204461186
0.5032242518817698
