In [59]:
import numpy as np

class LinearRegression:
    def __init__(self, num_iters, lr):
        self.w = None # (p+1, 1)
        self.num_iters = num_iters
        self.lr = lr
        
    def loss(self, Y, Y_pred):
        return (np.sum(np.square(Y_pred - Y))) / Y.shape[0]
    
    def loss_gradient(self, Y, X):
        # loss = (1/n) sum (xi^Tw - yi)**2 
        # gradient of loss: (2/n) sum (xi^Tw - yi) * xi
        Y_pred = np.dot(X, self.w) # (n, 1)
        gradients = np.zeros((self.w.shape[0], 1))
        for y, y_pred, x in zip(Y, Y_pred, X):
            # y_pred : (1, 1). y : (1,1). x: (1, p)
            curr_grad = (2/Y.shape[0]) * np.dot((y_pred[0] - y[0]), x.T.reshape(self.w.shape))
            gradients += curr_grad
        return (1/Y.shape[0]) * gradients
    
    def fit(self, X, Y):
        # X: (n, p) matrix
        # Y: (n, 1) matrix
        n, p = X.shape
        
        # adds bias term to X by adding feat with value 1. X goes from (n,p) to (n,p+1)
        X = np.hstack((X, np.ones((n, 1))))
        
        # adds bias term to w by adding a feature. w goes from (p,1) to (p+1,1)
        self.w = np.zeros((p+1, 1))
        
        for _ in range(self.num_iters):            
            # here, depends on if you want to do SGD or GD or batch GD.
            # we will do GD
            Y_pred = np.dot(X, self.w) # (n, 1) matrix
            self.w -= self.lr * self.loss_gradient(Y, X)
            print(f"Current iteration loss: {self.loss(Y, Y_pred)}")
    
    def predict(self, X, Y):
        n, p = X.shape
        X = np.hstack((X, np.ones((n, 1))))
        Y_pred = np.dot(X, self.w) # (n, 1) matrix
        print(f"Loss = {self.loss(Y, Y_pred)}")
        print(f"True labels: {Y}")
        print(f"Pred labels: {Y_pred}")

In [60]:
lr = LinearRegression(10, 0.2)
X = np.array([[1], [2], [3], [4], [5], [6]])
y = np.array([[1], [2], [3], [4], [5], [6]])
lr.fit(X, y)
lr.predict(X, y)

Current iteration loss: 15.166666666666666
Current iteration loss: 0.07446502057613165
Current iteration loss: 0.009270350048265005
Current iteration loss: 0.008773262945776973
Current iteration loss: 0.008559936433970235
Current iteration loss: 0.008352938358936397
Current iteration loss: 0.008150950866275182
Current iteration loss: 0.007953847777418458
Current iteration loss: 0.007761510958045657
Current iteration loss: 0.007573825152009422
Loss = 0.007390677890333659
True labels: [[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
Pred labels: [[1.15020201]
 [2.10442544]
 [3.05864887]
 [4.0128723 ]
 [4.96709573]
 [5.92131916]]


In [245]:
class LogisticRegression:
    # https://ml-cheatsheet.readthedocs.io/en/latest/logistic_regression.html
    def __init__(self, num_iters, lr):
        self.w = None # (p+1, 1)
        self.num_iters = num_iters
        self.lr = lr
        
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
    
    def loss(self, Y, Y_pred):
        # binary cross entropy loss
        return -1/Y.shape[0] * (np.sum(Y * np.log(Y_pred) + (1-Y) * np.log(1-Y_pred)))
    
    def loss_gradient(self, Y, Y_pred, X):
        # Y-pred: (n,1)
        # Y: (n,1)
        # X: (n,p)
        # output: (p, 1)
        #transform to: (n, 1)
        gradient_per_feat = (2/Y.shape[0]) * np.dot(X.T, (Y_pred - Y))
        return gradient_per_feat.reshape(self.w.shape)
        
    def fit(self, X, Y):
        # X: (n, p)
        # Y: (n, 1)
        # w: (p, 1)
        n, p = X.shape
        X = np.hstack((X, np.ones((n, 1))))
        self.w = np.zeros((p+1, 1))
        
        for _ in range(self.num_iters):
            Y_pred = self.sigmoid(np.dot(X, self.w))
            print(f"curr iteration loss: {self.loss(Y, Y_pred)}")
            lg = self.loss_gradient(Y, Y_pred, X)
            self.w -= self.lr * lg
            
    def decision_boundary(self, prob):
        return 1 if prob >= .5 else 0
    
    def predict(self, X, Y):
        n, p = X.shape
        X = np.hstack((X, np.ones((n, 1))))
        Y_pred = self.sigmoid(np.dot(X, self.w))
        
        decision_boundary = np.vectorize(self.decision_boundary)
        print(Y)
        print(decision_boundary(Y_pred))
        print(f"curr iteration loss: {self.loss(Y, Y_pred)}")

In [251]:
lr = LogisticRegression(100, 0.3)
X = np.array([[1], [2], [3], [4], [5], [6]])
y = np.array([[1], [1], [1], [0], [0], [0]])
lr.fit(X, y)
lr.predict(X, y)

curr iteration loss: 0.6931471805599452
curr iteration loss: 0.680523187218805
curr iteration loss: 0.6030037231000347
curr iteration loss: 0.5861778505153081
curr iteration loss: 0.5635648277944661
curr iteration loss: 0.5480646829293379
curr iteration loss: 0.5318260845147702
curr iteration loss: 0.5181209899550075
curr iteration loss: 0.5048929195256662
curr iteration loss: 0.4928291128584051
curr iteration loss: 0.4813995056792133
curr iteration loss: 0.4706687683890909
curr iteration loss: 0.46049073457446604
curr iteration loss: 0.450830078503481
curr iteration loss: 0.4416325264008363
curr iteration loss: 0.4328639714630258
curr iteration loss: 0.4244939403228932
curr iteration loss: 0.4164967437535292
curr iteration loss: 0.40884963853248407
curr iteration loss: 0.4015318190099877
curr iteration loss: 0.39452418209298423
curr iteration loss: 0.38780891681628404
curr iteration loss: 0.38136943474424645
curr iteration loss: 0.37519022175097716
curr iteration loss: 0.3692567790584

In [None]:
from collections import defaultdict

class NaiveBayes:
    
    def __init__(self):
        feat_idx_to_Y_to_X = defaultdict(lambda: defaultdict(int))
        count_y = defaultdict(int)
        
    def predict(self, X, Y):
        for y in count_y.keys():
            for feat_idx in X.shape[1]:
                
        
    def fit(self, X, Y):
        # Calculate P(X|Y) and P(Y) given X and Y
        count_by_class = np.bincount(Y.flatten())
        for y in np.unique(Y.flatten()):
            y_idx = (Y == y)
            count_y[y] = y_idx.shape[0]
            for feat_idx in X.shape[1]:
                relevant_X = X[y_idx, feat_idx]
                feat_idx_to_Y_to_X[feat_idx][y] = relevant_X.shape[0]
        

In [252]:
a = np.array([[1],[2], [3]])

In [254]:
np.bincount(a.flatten())

array([0, 1, 1, 1])