In [633]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

In [255]:
class LogisticRegression:
    def __init__(self, n_inputs, sgd_thresh = 10e-8, sgd_max_iters = 2500, alpha = 0.0001):
        self.n_inputs = n_inputs
        self.weights = np.random.rand(self.n_inputs, 1).astype('f')
        #print(self.weights)
        self.bias = 1.0
        self.alpha = alpha
        self.sgd_thresh = sgd_thresh
        self.sgd_max_iters = sgd_max_iters
        
    def sigmoid(self, x):
        return 1/(1+np.exp(-1*x))
    
    def predict(self, x):
        x = np.array(x).reshape(-1,1)
        return int(np.round(self.sigmoid(np.dot(self.weights.T, x) + self.bias)))
    
    def log_loss(self, X, Y):
        #dot = [np.dot(self.weights.T, x) for x in X]
        
        y_hats = [self.sigmoid(np.dot(self.weights.T, x) + self.bias) for x in X]
        self.yhats = y_hats
        #print(np.sum(y_hats))
        J_w_b = (-1/len(Y))*np.sum([y*np.log(y_hat)+(1-y)*np.log(1-y_hat) for y,y_hat in zip(Y,y_hats)])
        return J_w_b

    def stochastic_gradient_descent(self,X,Y):
        X = np.array(X)
        Y = np.array(Y)
        J = self.log_loss(X,Y)
        for i in range(self.sgd_max_iters):
            idx = np.random.randint(len(X))
            x_sample, y_sample = X[idx], Y[idx]
            diff = self.predict(x_sample) - y_sample

            self.weights = self.weights - (self.alpha*diff*x_sample).reshape(-1,1)

            self.bias = self.bias - self.alpha*diff

            
            if self.log_loss(X,Y) - J < self.sgd_thresh:
                break
            #print(self.log_loss(X,Y), J)
            J = self.log_loss(X,Y)
            
    def score(self, x_test, y_test):
        x_test = np.array(x_test)
        y_test = np.array(y_test)
        correct = 0
        total = len(x_test)
        for x,y in zip(x_test, y_test):
            if self.predict(x) ==  y:
                correct += 1
        return correct/total

In [427]:
path = "/home/oisin/MAI_work/ongoing_assignments/DeepLearning/data/"

In [512]:
data = pd.read_csv(path+"blobs250.csv")

In [429]:
x_train, x_test, y_train, y_test = train_test_split(data.drop(['Class'], axis = 1), data['Class']) 

In [259]:
x_train.shape[1]

2

In [510]:
lr = LogisticRegression(x_train.shape[1],sgd_max_iters=200,sgd_thresh=-np.inf,alpha=10e-2)

lr.stochastic_gradient_descent(x_train,y_train)

In [430]:
x_test = np.array(x_test)
y_test = np.array(y_test)

In [511]:
lr.score(x_test, y_test)

0.87

In [188]:
np.dot(lr.weights.T, x_[0].reshape(-1,1)) + -1.0

array([[-8.13353062]])

In [198]:
correct = 0
total = len(x_test)
for x,y in zip(x_test, y_test):
    if lr.predict(x) ==  y:
        correct += 1
print(correct/total)

1.0


In [239]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [268]:
points = []
for x in np.linspace(-1,1):
    for y in np.linspace(-3,3):
        if lr.predict((x,y)) < 1:
            points.append((x,y))

In [317]:
count = 0
total = len(y_test)
for x,y in zip(x_test,y_test):
    if lr.predict(x) == y:
        count += 1
count/total, count, total

(0.976, 244, 250)

In [520]:
bool(0.022)

True

In [3]:
import numpy as np

In [630]:
class Layer:
    
    def __init__(self, prev_no_nodes, no_nodes, activation_function, is_input=False):
        if not(is_input):
            self.weights = np.random.normal(0.01,1,size = (prev_no_nodes,no_nodes))
            self.bias = np.random.normal(0.01,1, size = (no_nodes,1))
            self.act_f = activation_function
            
        self.no_nodes = no_nodes
        self.is_input = is_input

    def fwdprop_output(self, X):
        if self.is_input:
            self.a = X
            self.z = self.a
            return X
        X = X.reshape(-1,1)
        self.z = np.dot(self.weights.T,X) + self.bias
        self.a = self.act_f(self.z)
        return self.a

    
class Network:
    
    def __init__(self, no_nodes_layer, activation_function, loss_function = "log_loss", lamb = 0):
        '''
        TODO: Sort out the fact that the first layer doesn't have an activation function (DOESNT FUCKIN NEED ONE AHAHHA)
        TODO: Add the loss_functions
        TODO: Finish train and predict/score
        '''
        self.no_nodes_layer = no_nodes_layer
        self.activation_function = activation_function
        self.input_size = no_nodes_layer[0]
        self.no_layers = len(self.no_nodes_layer)
        self.lamb = lamb
        
        if isinstance(no_nodes_layer, list):
            '''
            TODO: add functionality so that differnet layers can have different activation functions 
            '''
            assert(self.no_layers == len(no_nodes_layer))
            assert(self.no_nodes_layer[-1] == 1 or self.no_nodes_layer[-1] == 2)
            self.layers = [Layer(self.no_nodes_layer[i-1],self.no_nodes_layer[i],activation_function) for i in range(1,len(self.no_nodes_layer))]
            input_layer = Layer(0,no_nodes=self.no_nodes_layer[0],activation_function=activation_function, is_input=True)
            self.layers = [input_layer] + self.layers

        else:
            #Come up with a better default
            self.layers = [Layer(no_nodes_layer,1)]

        self.W = np.zeros(0)
        #turn all the weight matrices into one long weight vector so one can find the l_p norm of it.
        self.W = np.concatenate([self.W] +[layer.weights.flatten() for layer in self.layers[1:]])
        #print(np.linalg.norm(self.W, 2))
    
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    def log_loss(self,X,Y):
        y_hats = [self.fwdpropagate(x) for x in X]
        J_w_b = (-1)*sum([y*np.log(y_hat)+(1-y)*np.log(1-y_hat) for y,y_hat in zip(Y,y_hats)])
        J_w_b += self.lamb*np.linalg.norm(self.W, 2)
        return J_w_b
        
        
    def fwdpropagate(self, _input):
        
        if len(_input) != self.layers[0].no_nodes:
            print(f"Input must be of length {self.layers[0].no_nodes}, it is of length {len(_input)}")
        
        self.a_s = []   
        a = _input

        
        for layer in self.layers:
            a = layer.fwdprop_output(a)
            self.a_s.append(a)
        
        #self.layer_outputs = []
        y_hat = a
        #self.layer_outputs.append(y_hat)
        
        self.prediction = int(np.round(y_hat))

        
    def d_dout_sig(self, x):
        return self.sigmoid(x)*(1.-self.sigmoid(x))
    
    
    def d_dout_loss(self,X):
        pass
    
    def train(self, x_train, y_train):
        raise NotImplementedError

    def predict(self, x_test):
        self.fwdpropagate(x_test)
        return self.prediction

    def stochastic_gradient_descent(self, x_train, y_train, alpha = 0.001, n_iter = 10):
        x_train = np.array(x_train)
        y_train = np.array(y_train)
        comb = np.c_[x_train.reshape(len(x_train), -1), y_train.reshape(len(y_train), -1)]
        
        for _ in range(n_iter):
            x_train_c = comb[:, :x_train.size//len(x_train)].reshape(x_train.shape)
            y_train_c = comb[:, x_train.size//len(x_train):].reshape(y_train.shape)
            np.random.shuffle(comb)
        
            for x,y in tqdm(zip(x_train_c, y_train_c)):
                self.backward_propagate_error(x,y)
                for i,layer in enumerate(self.layers):
                    if layer.is_input:
                        continue
                    else:
                        layer.weights -= (alpha*np.array(self.delta_w[i]))
                        layer.bias -= alpha*np.array(self.delta_b[i])
    
    
    def score(self, x_test, y_test):
        x_test = np.array(x_test)
        y_test = np.array(y_test)
        correct = 0
        total = len(x_test)
        for x,y in zip(x_test, y_test):
            self.fwdpropagate(x)
            if self.predict(x) ==  y:
                correct += 1
        return correct/total
    
    
    
    def backward_propagate_error(self, x, y):
        ''' tick  '''
        self.fwdpropagate(x)
        delta = [None]*self.no_layers  
        delta_w = [None]*self.no_layers 
        delta_b = [None]*self.no_layers
   
        for i in range(self.no_layers - 1, -1, -1):
            if i == self.no_layers - 1:
                if y == 1:
                    q = (y/self.layers[i].a)
                else:
                    q = -1*((1-y)/(1-self.layers[i].a))
                delta[i] = -1*q*self.d_dout_sig(self.layers[i].z)
            else:
                delta[i] = ((self.layers[i+1].weights)@(delta[i+1]))*self.d_dout_sig(self.layers[i].z).reshape(-1,1)
                delta_w[i+1] = ((delta[i+1])@(self.layers[i].a.reshape(1,-1))).T + 2*self.lamb*self.layers[i+1].weights
                delta_b[i+1] = delta[i+1]

        self.delta_w = delta_w
        self.delta_b = delta_b
        self.delta = delta

In [555]:
nn = Network([2,4,4,1],activation_function=lambda x:1/(1+np.exp(-x)), lamb = 0.001)

In [556]:
nn.stochastic_gradient_descent(x_train, y_train, alpha=0.01, n_iter=1000)

In [326]:
x_test = np.array(x_test)
y_test = np.array(y_test)

In [557]:
nn.score(x_test, y_test)

0.94

In [493]:
nn.delta[-1]

array([[0.]])

In [568]:
nn.score(x_test,y_test)

0.94

In [566]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dick = pickle.load(fo, encoding='bytes')
    return dick

In [651]:
path_to_images = "/home/oisin/MAI_work/ongoing_assignments/DeepLearning/cifar-10-batches-py/"
datas = [unpickle(path_to_images+"data_batch_"+str(i)) for i in range(1,6)]
x = [d[b'data'] for d in datas]
y = [d[b'labels'] for d in datas]



In [660]:
x_ = np.concatenate(x)
y_ = np.concatenate(y)

In [661]:
data =  [[imput_x, imput_y - 2] for imput_x, imput_y in zip(x_,y_) if imput_y == 2 or imput_y == 3]

In [662]:
data

[[array([164, 105, 118, ...,  29,  26,  44], dtype=uint8), 0],
 [array([125, 110, 102, ...,  82,  84,  86], dtype=uint8), 1],
 [array([17, 17, 17, ..., 20, 18, 16], dtype=uint8), 0],
 [array([110, 142, 151, ...,  60,  60,  62], dtype=uint8), 1],
 [array([197, 198, 201, ...,  61,  63,  57], dtype=uint8), 0],
 [array([252, 249, 250, ..., 113, 109,  88], dtype=uint8), 1],
 [array([73, 71, 77, ..., 55, 38, 29], dtype=uint8), 0],
 [array([131, 124, 116, ...,  69,  73,  73], dtype=uint8), 1],
 [array([  7,   7,   5, ...,  76,  81, 119], dtype=uint8), 1],
 [array([169, 131, 193, ..., 223, 221, 220], dtype=uint8), 1],
 [array([110, 223, 243, ...,   5,   5,   4], dtype=uint8), 1],
 [array([ 98, 119, 109, ...,  75,  58,  72], dtype=uint8), 1],
 [array([145, 145, 148, ..., 160, 150, 167], dtype=uint8), 0],
 [array([127, 148, 192, ..., 103, 107, 117], dtype=uint8), 0],
 [array([ 77,  76,  78, ..., 127, 134, 135], dtype=uint8), 0],
 [array([191, 190, 190, ..., 197, 196, 195], dtype=uint8), 0],
 [ar

In [663]:
_all_data = np.array(data)

  _all_data = np.array(data)


In [664]:
x_train, x_test, y_train, y_test = train_test_split(_all_data[:,0],_all_data[:,1])

In [665]:
len(x_train)

7500

In [666]:
imp_shape = len(x_train[0])

In [668]:
net = Network([imp_shape,200,200,1],activation_function=lambda x:1/(1+np.exp(-x)), lamb = 0.005)

In [672]:
net.stochastic_gradient_descent(x_train,y_train, n_iter=1)

  net = Network([imp_shape,200,200,1],activation_function=lambda x:1/(1+np.exp(-x)), lamb = 0.005)
  return 1/(1+np.exp(-x))
7500it [01:53, 66.16it/s]


In [673]:
net.score(x_test, y_test)

  net = Network([imp_shape,200,200,1],activation_function=lambda x:1/(1+np.exp(-x)), lamb = 0.005)


0.5808

In [686]:
net.delta[-3]


array([[-0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [ 5.23300628e-075],
       [ 0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [ 0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [ 0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
       [ 0.00000000e+000],
       [-0.00000000e+000],
 