In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np 
from sklearn.linear_model import SGDClassifier
cancer = load_breast_cancer()


class SingleLayer:

    def __init__(self, learning_rate=0.1, l1=0, l2=0):
        self.w = None
        self.b = None
        self.losses = []
        self.val_losses = []
        self.w_history = []
        self.lr = learning_rate
        self.l1 = l1
        self.l2 = l2

    def forpass(self, x):
        z = np.dot(x, self.w) + self.b
        return z

    def backpropa(self, x, err):
        m = len(x)
        w_grad = np.dot(x.T, err)/m
        b_grad = np.sum(err)/m
        return w_grad, b_grad

    def fit(self, x, y, epochs=100, x_val=None, y_val=None):
        y = y.reshape(-1,1)
        y_val = y_val.reshape(-1,1)
        m = len(x)
        self.w = np.ones((x.shape[1],1))
        self.b = 0
        self.w_history.append(self.w.copy())
        for i in range(epochs):
            z = self.forpass(x)
            a = self.activation(z)
            err = -(y-a)
            w_grad, b_grad = self.backpropa(x, err)
            w_grad += (self.l1*np.sign(self.w) + self.l2*self.w)/m
            self.w -= self.lr*w_grad
            self.b -= b_grad
            self.w_history.append(self.w.copy())
            a = np.clip(a, 1e-10, 1-1e-10)
            loss = np.sum(-(y*np.log(a) + (1-y)*np.log(1-a)))
            self.losses.append((loss+self.reg_loss())/m)
            self.update_val_loss(x_val,y_val)

    def activation(self, z):
        z = np.clip(z, -100, None)
        a = 1/ (1+ np.exp(-z))
        return a

    def predict(self, x):
        z = self.forpass(x)
        return z>0

    def score(self, x, y):
        return np.mean(self.predict(x)==y.reshape(-1,1))

    def update_val_loss(self,x_val,y_val):
        z = self.forpass(x_val)
        a = self.activation(z)
        a = np.clip(a,1e-10,1-1e-10)
        val_loss = np.sum(-(y_val*np.log(a)+(1-y_val)*np.log(1-a)))
        self.val_losses.append((val_loss + self.reg_loss())/len(y_val))

    def reg_loss(self):
      return self.l1*np.sum(np.abs(self.w)) + self.l2/(2*np.sum(self.w**2))


class DualLayer(SingleLayer):

  def __init__(self, units=10, learning_rate=0.1, l1=0, l2=0):
      self.units = units
      self.w1 = None
      self.w2 = None
      self.b1 = None
      self.b2 = None
      self.a1 = None
      self.losses=[]
      self.val_losses=[]
      self.lr=learning_rate
      self.l1=l1
      self.l2=l2

  def forpass(self,x):
      z1 = np.dot(x,self.w1)+self.b1
      self.a1 = self.activation(z1)
      z2 = np.dot(self.a1,self.w2) + self.b2
      return z2

  def backpropa(self, x, err):
      m = len(x)
      w2_grad = np.dot(self.a1.T, err)/m
      b2_grad = np.sum(err)/m
      err_to_hidden = np.dot(err, self.w2.T) *self.a1 *(1-self.a1)
      w1_grad = np.dot(x.T, err_to_hidden)/m
      b1_grad = np.sum(err_to_hidden)/m
      return w1_grad, b1_grad, w2_grad, b2_grad

  def init_weights(self, n_features):
      self.w1 = np.ones((n_features, self.units))
      self.b1 = np.zeros(self.units)
      self.w2 = np.ones((self.units, 1))
      self.b2 = 0

  def fit(self, x, y, epochs=100, x_val=None, y_val=None):
      y = y.reshape(-1,1)
      y_val = y_val.reshape(-1,1)
      m = len(x)
      self.init_weights(x.shape[1])
      for i in range(epochs):
          a = self.training(x, y, m)
          a = np.clip(a,1e-10,1-1e-10)
          loss =  np.sum(-(y*np.log(a)+(1-y)*np.log(1-a)))
          self.losses.append((loss+self.reg_loss())/m)
          self.update_val_loss(x_val,y_val)

  def training(self, x, y, m):
      z = self.forpass(x)
      a = self.activation(z)
      err = -(y-a)
      w1_grad, b1_grad, w2_grad, b2_grad = self.backpropa(x,err)
      w1_grad += (self.l1*np.sign(np.sign(self.w1)) + self.l2*self.w1)/m
      w2_grad += (self.l1*np.sign(np.sign(self.w2)) + self.l2*self.w2)/m
      self.w1 -= self.lr*w1_grad
      self.w2 -= self.lr*w2_grad
      self.b1 -= self.lr*b1_grad
      self.b2 -= self.lr*b2_grad
      return a

  def reg_loss(self):
      return self.l1*(np.sum(np.abs(self.w1)) + np.sum(np.abs(self.w2))) + self.l2/2*(np.sum(np.abs(self.w1**2))+np.sum(np.abs(self.w2**2)))


class RandomInitNetwork(DualLayer):

    
    def init_weights(self,n_features):
        np.random.seed(42)
        self.w1 = np.random.normal(0,1,(n_features,self.units))
        self.b1 = np.zeros(self.units)
        self.w2 = np.random.normal(0,1,(self.units,1))
        self.b2 = 0


class MiniBatchNetwork(RandomInitNetwork):

    def __init__(self, units=10, batch_size=32, learning_rate=0.1, l1=0, l2=0):
        super().__init__(units, learning_rate, l1, l2)
        self.batch_size = batch_size

    def fit(self, x, y, epochs=100, x_val=None, y_val=None):
        self.init_weights(x.shape[1])
        y_val = y_val.reshape(-1,1)
        np.random.seed(42)
        for i in range(epochs):
            loss = 0
            for x_batch, y_batch in self.gen_betch(x,y):
                y_batch = y_batch.reshape(-1,1)
                m = len(x_batch)
                a = self.training(x_batch, y_batch, m)
                a = np.clip(a, 1e-10, 1-1e-10)
                loss += np.sum(-(y_batch*np.log(a)+(1-y_batch)*np.log(1-a)))
            self.losses.append((loss+self.reg_loss())/len(x))
            self.update_val_loss(x_val,y_val)

    def gen_betch(self, x, y):
        length = len(x)
        bins = length // self.batch_size
        if length%self.batch_size:
            bins += 1
        indexes = np.random.permutation(np.arange(len(x)))
        x = x[indexes]
        y = y[indexes]
        for i in range(bins):
            start = self.batch_size*i
            end = self.batch_size*(i+1)
            yield x[start:end], y[start:end]


class MultiClassNetwork:


    def __init__(self, units=10, batch_size=32, learning_rate=0.1, l1=0, l2=0):
        self.units = units
        self.batch_size = batch_size
        self.w1 = None
        self.w2 = None
        self.b1 = None
        self.b2 = None
        self.a1 = None
        self.losses=[]
        self.val_losses=[]
        self.lr=learning_rate
        self.l1=l1
        self.l2=l2

    def forpass(self,x):
        z1 = np.dot(x,self.w1)+self.b1
        self.a1 = self.sigmoid(z1)
        z2 = np.dot(self.a1,self.w2) + self.b2
        return z2

    def backpropa(self, x, err):
        m = len(x)
        w2_grad = np.dot(self.a1.T, err)/m
        b2_grad = np.sum(err)/m
        err_to_hidden = np.dot(err, self.w2.T) *self.a1 *(1-self.a1)
        w1_grad = np.dot(x.T, err_to_hidden)/m
        b1_grad = np.sum(err_to_hidden)/m
        return w1_grad, b1_grad, w2_grad, b2_grad

    def sigmoid(self, z):
        z = np.clip(z,-100,None)
        a = 1/(1+np.exp(-z))
        return a

    def softmax(self, z):
        z = np.clip(z,None,100)
        exp_z = np.exp(z)
        return exp_z/np.sum(exp_z, axis=1).reshape(-1,1)

    def init_weights(self, n_features, n_classes):
        self.w1 = np.random.normal(0,1,(n_features, self.units))
        self.b1 = np.zeros(self.units)
        self.w2 = np.random.normal(0,1,(self.units, n_classes))
        self.b2 = np.zeros(n_classes)

    def fit(self, x, y, epochs=100, x_val=None, y_val=None):
          self.init_weights(x.shape[1],y.shape[1])
          np.random.seed(42)
          for i in range(epochs):
              loss = 0
              print('.',end='')
              for x_batch, y_batch in self.gen_betch(x,y):
                  a = self.training(x_batch, y_batch)
                  a = np.clip(a, 1e-10, 1-1e-10)
                  loss += np.sum(y_batch*np.log(a))
              self.losses.append((loss+self.reg_loss())/len(x))
              self.update_val_loss(x_val,y_val)

    def gen_betch(self, x, y):
        length = len(x)
        bins = length // self.batch_size
        if length%self.batch_size:
            bins += 1
        indexes = np.random.permutation(np.arange(len(x)))
        x = x[indexes]
        y = y[indexes]
        for i in range(bins):
            start = self.batch_size*i
            end = self.batch_size*(i+1)
            yield x[start:end], y[start:end]

    def training(self, x, y):
        m = len(x)
        z = self.forpass(x)
        a = self.softmax(z)
        err = -(y-a)
        w1_grad, b1_grad, w2_grad, b2_grad = self.backpropa(x,err)
        w1_grad += (self.l1*np.sign(np.sign(self.w1)) + self.l2*self.w1)/m
        w2_grad += (self.l1*np.sign(np.sign(self.w2)) + self.l2*self.w2)/m
        self.w1 -= self.lr*w1_grad
        self.w2 -= self.lr*w2_grad
        self.b1 -= self.lr*b1_grad
        self.b2 -= self.lr*b2_grad
        return a

    def predict(self, x):
        z = self.forpass(x)
        return np.argmax(z,axis=1)
        
    def score(self, x, y):
        return np.mean(self.predict(x)==np.argmax(y, axis=1))

    def reg_loss(self):
        return self.l1*(np.sum(np.abs(self.w1)) + np.sum(np.abs(self.w2))) + self.l2/2*(np.sum(np.abs(self.w1**2))+np.sum(np.abs(self.w2**2)))

    def update_val_loss(self, x_val, y_val):
        z = self.forpass(x_val)
        a = self.softmax(z)
        a = np.clip(a,1e-10,1-1e-10)
        val_loss = np.sum(-y_val*np.log(a))
        self.val_losses.append((val_loss+self.reg_loss())/len(y_val))


class ConvolutionNetwork:

    def __init__(self, n_kernels=10, units=10, batch_size=32, learning_rate=0.1):
        self.n_kernels = n_kernels
        self.kernel_size = 3
        self.optimizer = None
        self.conv_w = None
        self.conv_b = None
        self.w1 = None
        self.b1 = None
        self.w2 = None
        self.b2 = None
        self.units = units
        self.batch_size = batch_size
        self.a1 = None
        self.losses = []
        self.val_losses = []
        self.lr = learning_rate


    def forpass(self, x):
      c_out = tf.nn.conv2d(x, self.conv_w, strides=1, padding='SAME') + self.conv_b
      r_out = tf.nn.relu(c_out)
      p_out = tf.nn.max_pool2d(r_out, ksize=2, strides=2, padding='VALID')
      f_out = tf.reshape(p_out, [x.shape[0],-1])
      z1 = tf.matmul(f_out, self.w1) + self.b1
      a1 = tf.nn.relu(z1)
      z2 = tf.matmul(a1, self.w2) + self.b2
      return z2


    def init_weights(self, input_shape, n_classes):
        g = tf.initializers.glorot_uniform()
        self.conv_w = tf.Variable(g((3,3,1,self.n_kernels)))
        self.conv_b = tf.Variable(np.zeros(self.n_kernels), dtype=float)
        n_features = 14*14*self.n_kernels
        self.w1 = tf.Variable(g((n_features, self.units)))
        self.b1 = tf.Variable(np.zeros(self.units), dtype=float)
        self.w2 = tf.Variable(g((self.units, n_classes)))
        self.b2 = tf.Variable(np.zeros(n_classes), dtype=float)


    def fit(self, x, y, x_val=None, y_val=None, epochs=100):
        self.init_weights(x.shape, y.shape[1])
        self.optimizer = tf.optimizers.SGD(learning_rate=self.lr)
        for i in range(epochs):
            print('에포크',i,end=' ')
            batch_losses = []
            for x_batch, y_batch in self.gen_batch(x,y):
                print('.', end='')
                self.training(x_batch, y_batch)
                batch_losses.append(self.get_loss(x_batch, y_batch))
            self.losses.append(np.mean(batch_losses))
            self.val_losses.append(self.get_loss(x_val,y_val))

      
    def gen_batch(self, x, y):
        bins = len(x)//self.batch_size
        indexes = np.random.permutation(np.arange(len(x)))
        x = x[indexes]
        y = y[indexes]
        for i in range(bins):
            start = self.batch_size*i
            end = self.batch_size*(i+1)
            yield x[start:end], y[start:end]


    def training(self, x, y):
        m = len(x)
        with tf.GradientTape() as tape:
            z = self.forpass(x)
            loss = tf.nn.softmax_cross_entropy_with_logits(y,z)
            loss = tf.reduce_mean(loss)
        weights_list = [self.conv_w, self.conv_b, self.w1, self.b1, self.w2, self.b2]
        print(loss)
        grads = tape.gradient(loss, weights_list)
        self.optimizer.apply_gradients(zip(grads,weights_list))

    
    def predict(self, x):
        z = self.forpass(x)
        return np.argmax(z.numpy(), axis=1)


    def score(self, x, y):
        return np.mean(self.predict(x)==np.argmax(y,axis=1))


    def get_loss(self, x, y):
        z = self.forpass(x)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y,z))
        return loss.numpy()



import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, SimpleRNN
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

(x_train_all, y_train_all), (x_test,y_test) = imdb.load_data(skip_top=20, num_words=100)

for i in range(len(x_train_all)):
    x_train_all[i] = [w for w in x_train_all[i] if w>2 ]

word_to_index = imdb.get_word_index()
index_to_word = {word_to_index[k]: k for k in word_to_index}

np.random.seed(42)
random_index = np.random.permutation(25000)
x_train = x_train_all[random_index[:20000]]
y_train = y_train_all[random_index[:20000]]
x_val = x_train_all[random_index[20000:]]
y_val = y_train_all[random_index[20000:]]
maxlen = 100
x_train_seq = pad_sequences(x_train, maxlen=maxlen)
x_val_seq = pad_sequences(x_val, maxlen=maxlen)
x_train_onehot = to_categorical(x_train_seq)
x_val_onehot = to_categorical(x_val_seq)

model = Sequential()
model.add(SimpleRNN(32, input_shape=(100,100)))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 32)                4256      
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 4,289
Trainable params: 4,289
Non-trainable params: 0
_________________________________________________________________
