<figure>
  <IMG SRC="https://upload.wikimedia.org/wikipedia/commons/thumb/d/d5/Fachhochschule_Südwestfalen_20xx_logo.svg/320px-Fachhochschule_Südwestfalen_20xx_logo.svg.png" WIDTH=250 ALIGN="right">
</figure>

# Machine Learning
### Sommersemester 2023
Prof. Dr. Heiner Giefers

In [None]:
import numpy as np
import matplotlib.pyplot as plt


def make_spirals(N=100, dim=2, classes=2, random_state=0):
    np.random.seed(random_state)
    X = np.zeros((N*classes,dim))
    num_train_examples = X.shape[0]
    y = np.zeros(N*classes, dtype='uint8')
    for j in range(classes):
        ix = range(N*j,N*(j+1))
        r = np.linspace(0.0,1,N) # radius
        k = classes+1
        t = np.linspace(j*k,(j+1)*k,N) + np.random.randn(N)*0.2 # theta
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        y[ix] = j
    return X, y

N=60
dim=2
classes=3
random_state=0

X, y = make_spirals(N, dim, classes, random_state)
fig = plt.figure()
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
plt.xlim([-1,1])
plt.ylim([-1,1])



In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, multi_class='ovr').fit(X, y)
clf.score(X, y)

In [None]:
cmap = plt.get_cmap('Set1', 3)
h = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
fig = plt.figure()
plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, ec='black', cmap=cmap)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())


In [None]:
def sigmoid(x):
    x = 1/(1+np.exp(-x))
    return x

def sigmoid_grad(x):
    return (x)*(1-x)

def relu(x):
    return np.maximum(0,x)

def relu_grad(x):
    x[x<=0] = 0
    x[x>0] = 1
    return x

In [None]:

#function to train a three layer neural net with either RELU or sigmoid nonlinearity via vanilla grad descent

def three_layer_net(NONLINEARITY,X,y, model, step_size, reg, epochs=1000):
    #parameter initialization
    
    h= model['h']
    h2= model['h2']
    W1= model['W1']
    W2= model['W2']
    W3= model['W3']
    b1= model['b1']
    b2= model['b2']
    b3= model['b3']
    
    
    # some hyperparameters


    # gradient descent loop
    num_examples = X.shape[0]
    plot_array_1=[]
    plot_array_2=[]
    plot_array_3=[]
    for i in range(epochs):

        #FOWARD PROP

        if NONLINEARITY== 'RELU':
            hidden_layer = relu(np.dot(X, W1) + b1)
            hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2)
            scores = np.dot(hidden_layer2, W3) + b3

        elif NONLINEARITY == 'SIGM':
            hidden_layer = sigmoid(np.dot(X, W1) + b1)
            hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2)
            scores = np.dot(hidden_layer2, W3) + b3

        exp_scores = np.exp(scores)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # [N x K]

        # compute the loss: average cross-entropy loss and regularization
        corect_logprobs = -np.log(probs[range(num_examples),y])
        data_loss = np.sum(corect_logprobs)/num_examples
        reg_loss = 0.5*reg*np.sum(W1*W1) + 0.5*reg*np.sum(W2*W2)+ 0.5*reg*np.sum(W3*W3)
        loss = data_loss + reg_loss
        
        if i % 500 == 0:
            print("iteration %d: loss %f" % (i, loss))

        if i % 1 == 0:
            predicted_class = np.argmax(scores, axis=1)
            ca = np.mean(predicted_class == y)
            plot_array_3.append(ca)
        else:
            plot_array_3.append(ca)
        
        # compute the gradient on scores
        dscores = probs
        dscores[range(num_examples),y] -= 1
        dscores /= num_examples

 
        # BACKPROP HERE
        dW3 = (hidden_layer2.T).dot(dscores)
        db3 = np.sum(dscores, axis=0, keepdims=True)


        if NONLINEARITY == 'RELU':

            #backprop ReLU nonlinearity here
            dhidden2 = np.dot(dscores, W3.T)
            dhidden2[hidden_layer2 <= 0] = 0
            
            dW2 =  np.dot( hidden_layer.T, dhidden2)
            
            plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
            db2 = np.sum(dhidden2, axis=0)
            dhidden = np.dot(dhidden2, W2.T)
            dhidden[hidden_layer <= 0] = 0
            
        elif NONLINEARITY == 'SIGM':

            #backprop sigmoid nonlinearity here
            dhidden2 = dscores.dot(W3.T)*sigmoid_grad(hidden_layer2)
            dW2 = (hidden_layer.T).dot(dhidden2)
            
            plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
            db2 = np.sum(dhidden2, axis=0)
            dhidden = dhidden2.dot(W2.T)*sigmoid_grad(hidden_layer)

        
        dW1 =  np.dot(X.T, dhidden)
        plot_array_1.append(np.sum(np.abs(dW1))/np.sum(np.abs(dW1.shape)))
        db1 = np.sum(dhidden, axis=0)

        # add regularization
        dW3+= reg * W3
        dW2 += reg * W2
        dW1 += reg * W1
        
        #option to return loss, grads -- uncomment next comment
        grads={}
        grads['W1']=dW1
        grads['W2']=dW2
        grads['W3']=dW3
        grads['b1']=db1
        grads['b2']=db2
        grads['b3']=db3
        #return loss, grads
        
        
        # update
        W1 += -step_size * dW1
        b1 += -step_size * db1
        W2 += -step_size * dW2
        b2 += -step_size * db2
        W3 += -step_size * dW3
        b3 += -step_size * db3
    # evaluate training set accuracy
    if NONLINEARITY == 'RELU':
        hidden_layer = relu(np.dot(X, W1) + b1)
        hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2)
    elif NONLINEARITY == 'SIGM':
        hidden_layer = sigmoid(np.dot(X, W1) + b1)
        hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2)
    scores = np.dot(hidden_layer2, W3) + b3
    predicted_class = np.argmax(scores, axis=1)
    print ('training accuracy: %.2f' % (np.mean(predicted_class == y)))
    #return cost, grads
    return plot_array_1, plot_array_2, plot_array_3, W1, W2, W3, b1, b2, b3


In [None]:
#Initialize toy model, train sigmoid net

h=50
h2=50
num_train_examples = X.shape[0]

model={}
model['h'] = h # size of hidden layer 1
model['h2']= h2# size of hidden layer 2
model['W1']= 0.1 * np.random.randn(dim,h)
model['b1'] = np.zeros((1,h))
model['W2'] = 0.1 * np.random.randn(h,h2)
model['b2']= np.zeros((1,h2))
model['W3'] = 0.1 * np.random.randn(h2,classes)
model['b3'] = np.zeros((1,classes))

(sigm_array_1, sigm_array_2, sigm_array_3, s_W1, s_W2,s_W3, s_b1, s_b2,s_b3) = \
    three_layer_net('SIGM', X,y,model, step_size=1e-1, reg=1e-3, epochs=15000)

In [None]:
#Re-initialize model, train relu net

model={}
model['h'] = h # size of hidden layer 1
model['h2']= h2# size of hidden layer 2
model['W1']= 0.1 * np.random.randn(dim,h)
model['b1'] = np.zeros((1,h))
model['W2'] = 0.1 * np.random.randn(h,h2)
model['b2']= np.zeros((1,h2))
model['W3'] = 0.1 * np.random.randn(h2,classes)
model['b3'] = np.zeros((1,classes))

(relu_array_1, relu_array_2, relu_array_3, r_W1, r_W2,r_W3, r_b1, r_b2,r_b3) = \
    three_layer_net('RELU', X,y,model, step_size=1e-1, reg=1e-3, epochs=5000)

In [None]:
fig, ax1 = plt.subplots(figsize=(15,8))
ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

p0 = ax1.plot(np.array(sigm_array_1), ":", c='r', label="Sigmoid 1. Schicht")
p1 = ax1.plot(np.array(sigm_array_2), ":", c='g', label="Sigmoid 2. Schicht")
p2 = ax2.plot(np.array(sigm_array_3), ":", c='b', label="Sigmoid Accuracy")
p3 = ax1.plot(np.array(relu_array_1), "-", c='r', label="ReLU 1. Schicht")
p4 = ax1.plot(np.array(relu_array_2), "-", c='g', label="ReLU 2. Schicht")
p5 = ax2.plot(np.array(relu_array_3), "-", c='b', label="ReLU Accuracy")
plt.title('Summe der absoluten Gewichte')

# added these three lines
p = p0+p1+p2+p3+p4+p5
labs = [l.get_label() for l in p]
ax1.legend(p, labs, loc=5)

In [None]:

cmap = plt.get_cmap('Set1', 3)
h = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
Z = np.dot(sigmoid(np.dot(sigmoid(np.dot(np.c_[xx.ravel(), yy.ravel()], s_W1) + s_b1), s_W2) + s_b2), s_W3) + s_b3
Z = np.argmax(Z, axis=1)
Z = Z.reshape(xx.shape)
fig = plt.figure()
plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, ec='black', cmap=cmap)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())


### Referenzen

[1] Andrej Karpathy, ["*Neural Networks Case Study*"](https://cs231n.github.io/neural-networks-case-study/), Kurs CS231n: "Convolutional Neural Networks for Visual Recognition", Stanford University