<center><h1>1-ab: Introduction to Neural Networks</h1></center>
<b>Students:
<pre>
VU Anh Thu            
LE Thi Minh Nguyet    


In [None]:
!wget https://github.com/rdfia/rdfia.github.io/raw/master/data/2-ab.zip
!unzip -j 2-ab.zip
!wget https://github.com/rdfia/rdfia.github.io/raw/master/code/2-ab/utils-data.py

In [None]:
import math
import torch
from torch.autograd import Variable
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%run 'utils-data.py'

# Part 1 : Forward and backward passes "by hands"

In [None]:
def init_params(nx, nh, ny):
    """
    nx, nh, ny: integers
    out params: dictionnary
    """
    params = {}

    # TODO
    params["Wh"] = 0.3*torch.randn((nh, nx))
    params["Wy"] = 0.3*torch.randn((ny, nh))
    params["bh"] = 0.3*torch.randn((1, nh))
    params["by"] = 0.3*torch.randn((1, ny))
    # END TODO

    return params

In [None]:
def forward(params, X):
    """
    params: dictionnary
    X: (n_batch, dimension)
    """
    outputs = {}

    # TODO
    outputs["X"] = X
    outputs["htilde"] = torch.mm(X, params['Wh'].t()) + params["bh"]
    outputs["h"] = torch.tanh(outputs["htilde"])
    outputs["ytilde"] = torch.mm(outputs["h"], params['Wy'].t()) + params["by"]
    exp_ytilde = torch.exp(outputs["ytilde"])
    outputs["yhat"] = exp_ytilde/torch.sum(exp_ytilde, dim=1, keepdim=True)
    # END TODO

    return outputs['yhat'], outputs

In [None]:
def loss_accuracy(Yhat, Y):
    # TODO
    L = -torch.sum(Y*torch.log(Yhat))/Y.shape[0]

    _, indsY = torch.max(Y, 1)
    _, indsYhat = torch.max(Yhat, 1)
    acc = torch.sum(indsY == indsYhat).float()/Y.shape[0]
    # END TODO

    return L, acc

In [None]:
def backward(params, outputs, Y):
    grads = {}

    # TODO
    grad_ytilde = outputs['yhat'] - Y
    grads["Wy"] = torch.mm(grad_ytilde.t(), outputs['h'])
    grad_htilde = grad_ytilde.mm(params["Wy"]) * (1-outputs["h"]**2)
    grads["Wh"] = torch.mm(grad_htilde.t(), outputs["X"])
    grads["by"] = torch.sum(grad_ytilde, dim=0, keepdim=True)
    grads["bh"] = torch.sum(grad_htilde, dim=0, keepdim=True)
    # END TODO

    return grads

In [None]:
def sgd(params, grads, eta):
    # update the params values
    # TODO
    params["Wh"] = params["Wh"] - eta*grads["Wh"]
    params["Wy"] = params["Wy"] - eta*grads["Wy"]
    params["bh"] = params["bh"] - eta*grads["bh"]
    params["by"] = params["by"] - eta*grads["by"]
    # END TODO

    return params

## Global learning procedure "by hands"

In [None]:
# init
data = CirclesData()
data.plot_data()
N = data.Xtrain.shape[0]
Nbatch = 10
nx = data.Xtrain.shape[1]
nh = 10
ny = data.Ytrain.shape[1]
eta = 0.03

params = init_params(nx, nh, ny)

curves = [[],[], [], []]

# epoch
for iteration in range(150):

    # permute
    perm = np.random.permutation(N)
    Xtrain = data.Xtrain[perm, :]
    Ytrain = data.Ytrain[perm, :]

    # batches
    for j in range(N // Nbatch):

        indsBatch = range(j * Nbatch, (j+1) * Nbatch)
        X = Xtrain[indsBatch, :]
        Y = Ytrain[indsBatch, :]

        # Optimization algorithm on the batch (X,Y)
        # TODO
        _, outputs = forward(params, X)
        grads = backward(params, outputs, Y)
        params = sgd(params, grads, eta)
        # END TODO


    Yhat_train, _ = forward(params, data.Xtrain)
    Yhat_test, _ = forward(params, data.Xtest)
    Ltrain, acctrain = loss_accuracy(Yhat_train, data.Ytrain)
    Ltest, acctest = loss_accuracy(Yhat_test, data.Ytest)
    Ygrid, _ = forward(params, data.Xgrid)

    title = 'Iter {}: Acc train {:.1f}% ({:.2f}), acc test {:.1f}% ({:.2f})'.format(iteration, acctrain*100, Ltrain, acctest*100, Ltest)
    data.plot_data_with_grid(Ygrid, title)

    curves[0].append(acctrain)
    curves[1].append(acctest)
    curves[2].append(Ltrain)
    curves[3].append(Ltest)

fig = plt.figure()
plt.plot(curves[0], label="acc. train")
plt.plot(curves[1], label="acc. test")
plt.plot(curves[2], label="loss train")
plt.plot(curves[3], label="loss test")
plt.legend()
plt.show()

# Part 2 : Simplification of the backward pass with `torch.autograd`



In [None]:
def init_params(nx, nh, ny):
    """
    nx, nh, ny: integers
    out params: dictionnary
    """
    params = {}

    # activate autograd on the network weights
    # TODO
    params["Wh"] = 0.3*torch.randn((nh, nx))
    params["Wy"] = 0.3*torch.randn((ny, nh))
    params["bh"] = 0.3*torch.randn((1, nh))
    params["by"] = 0.3*torch.randn((1, ny))

    params["Wh"].requires_grad = True
    params["Wy"].requires_grad = True
    params["bh"].requires_grad = True
    params["by"].requires_grad = True
    # END TODO

    return params

The function `forward` remains unchanged from previous part.

The function `backward` is no longer used because of "autograd".

In [None]:
def sgd(params, eta):
    # update the network weights and reset to zero the gradient accumulators
    # TODO
    with torch.no_grad():
      params["Wh"] -= eta*params["Wh"].grad
      params["Wy"] -= eta*params["Wy"].grad
      params["bh"] -= eta*params["bh"].grad
      params["by"] -= eta*params["by"].grad

      params["Wh"].grad.zero_()
      params["Wy"].grad.zero_()
      params["bh"].grad.zero_()
      params["by"].grad.zero_()
    # END TODO

    return params

## Global learning procedure with autograd

In [None]:
# init
data = CirclesData()
data.plot_data()
N = data.Xtrain.shape[0]
Nbatch = 10
nx = data.Xtrain.shape[1]
nh = 10
ny = data.Ytrain.shape[1]
eta = 0.03

params = init_params(nx, nh, ny)

curves = [[],[], [], []]

# epoch
for iteration in range(150):

    # permute
    perm = np.random.permutation(N)
    Xtrain = data.Xtrain[perm, :]
    Ytrain = data.Ytrain[perm, :]

    # batches
    for j in range(N // Nbatch):

        indsBatch = range(j * Nbatch, (j+1) * Nbatch)
        X = Xtrain[indsBatch, :]
        Y = Ytrain[indsBatch, :]

        # Optimization algorithm on the batch (X,Y)
        # TODO
        Yhat, _ = forward(params, X)
        L, _ = loss_accuracy(Yhat, Y)
        L.backward()
        params = sgd(params, eta)
        # END TODO

    Yhat_train, _ = forward(params, data.Xtrain)
    Yhat_test, _ = forward(params, data.Xtest)
    Ltrain, acctrain = loss_accuracy(Yhat_train, data.Ytrain)
    Ltest, acctest = loss_accuracy(Yhat_test, data.Ytest)
    Ygrid, _ = forward(params, data.Xgrid)

    title = 'Iter {}: Acc train {:.1f}% ({:.2f}), acc test {:.1f}% ({:.2f})'.format(iteration, acctrain*100, Ltrain, acctest*100, Ltest)
    # detach() is used to remove the predictions from the computational graph in autograd
    data.plot_data_with_grid(Ygrid.detach(), title)

    curves[0].append(acctrain.detach())
    curves[1].append(acctest.detach())
    curves[2].append(Ltrain.detach())
    curves[3].append(Ltest.detach())

fig = plt.figure()
plt.plot(curves[0], label="acc. train")
plt.plot(curves[1], label="acc. test")
plt.plot(curves[2], label="loss train")
plt.plot(curves[3], label="loss test")
plt.legend()
plt.show()

# Part 3 : Simplification of the forward pass with `torch.nn`

`init_params` and `forward` are replaced by the `init_model` function which defines the network architecture and the loss.

In [None]:
def init_model(nx, nh, ny):
    # TODO
    model = torch.nn.Sequential(
        torch.nn.Linear(nx, nh),
        torch.nn.Tanh(),
        torch.nn.Linear(nh, ny),
    )

    loss = torch.nn.CrossEntropyLoss()
    # END TODO

    return model, loss

In [None]:
def loss_accuracy(loss, Ytilde, Y):
    # TODO
    _, indsY = torch.max(Y, 1)
    Yhat = torch.nn.Softmax(dim=1)(Ytilde)
    _, indsYhat = torch.max(Yhat, 1)

    L = loss(Ytilde, indsY)
    acc = torch.sum(indsY == indsYhat).float()/Y.shape[0]
    # END TODO

    return L, acc

In [None]:
def sgd(model, eta):
    # update the network weights and reset to zero the gradient accumulators
    # TODO
    with torch.no_grad():
      for param in model.parameters():
        param -= eta*param.grad
      model.zero_grad()
    # END TODO

    return model

## Global learning procedure with autograd and `torch.nn`

In [None]:
# init
data = CirclesData()
data.plot_data()
N = data.Xtrain.shape[0]
Nbatch = 10
nx = data.Xtrain.shape[1]
nh = 10
ny = data.Ytrain.shape[1]
eta = 0.03

model, loss = init_model(nx, nh, ny)

curves = [[],[], [], []]

# epoch
for iteration in range(150):

    # permute
    perm = np.random.permutation(N)
    Xtrain = data.Xtrain[perm, :]
    Ytrain = data.Ytrain[perm, :]

    # batches
    for j in range(N // Nbatch):

        indsBatch = range(j * Nbatch, (j+1) * Nbatch)
        X = Xtrain[indsBatch, :]
        Y = Ytrain[indsBatch, :]

        # optimization algorithm on the batch (X,Y)
        # TODO
        Ytilde = model(X)
        L, _ = loss_accuracy(loss, Ytilde, Y)
        L.backward()
        model = sgd(model, eta)
        # END TODO

    Ytilde_train = model(data.Xtrain)
    Ytilde_test = model(data.Xtest)
    Ltrain, acctrain = loss_accuracy(loss, Ytilde_train, data.Ytrain)
    Ltest, acctest = loss_accuracy(loss, Ytilde_test, data.Ytest)
    Ygrid = model(data.Xgrid)

    title = 'Iter {}: Acc train {:.1f}% ({:.2f}), acc test {:.1f}% ({:.2f})'.format(iteration, acctrain*100, Ltrain, acctest*100, Ltest)
    data.plot_data_with_grid(torch.nn.Softmax(dim=1)(Ygrid.detach()), title)

    curves[0].append(acctrain.detach())
    curves[1].append(acctest.detach())
    curves[2].append(Ltrain.detach())
    curves[3].append(Ltest.detach())

fig = plt.figure()
plt.plot(curves[0], label="acc. train")
plt.plot(curves[1], label="acc. test")
plt.plot(curves[2], label="loss train")
plt.plot(curves[3], label="loss test")
plt.legend()
plt.show()

# Part 4 : Simplification of the SGD with `torch.optim`

In [None]:
def init_model(nx, nh, ny, eta):
    # TODO
    model = torch.nn.Sequential(
        torch.nn.Linear(nx, nh),
        torch.nn.Tanh(),
        torch.nn.Linear(nh, ny),
    )

    loss = torch.nn.CrossEntropyLoss()

    optim = torch.optim.SGD(model.parameters(), lr=eta)
    # END TODO

    return model, loss, optim

The `sgd` function is replaced by calling the `optim.zero_grad()` before the backward and `optim.step()` after.

## Algorithme global d'apprentissage (avec autograd, les couches `torch.nn` et `torch.optim`)

In [None]:
# init
data = CirclesData()
data.plot_data()
N = data.Xtrain.shape[0]
Nbatch = 10
nx = data.Xtrain.shape[1]
nh = 10
ny = data.Ytrain.shape[1]
eta = 0.03

model, loss, optim = init_model(nx, nh, ny, eta)

curves = [[],[], [], []]

# epoch
for iteration in range(150):

    # permute
    perm = np.random.permutation(N)
    Xtrain = data.Xtrain[perm, :]
    Ytrain = data.Ytrain[perm, :]

    # batches
    for j in range(N // Nbatch):

        indsBatch = range(j * Nbatch, (j+1) * Nbatch)
        X = Xtrain[indsBatch, :]
        Y = Ytrain[indsBatch, :]

        # optimization algorithm on the batch (X,Y)
        # TODO
        optim.zero_grad()
        Ytilde = model(X)
        L, _ = loss_accuracy(loss, Ytilde, Y)
        L.backward()
        optim.step()
        # END TODO

    Ytilde_train = model(data.Xtrain)
    Ytilde_test = model(data.Xtest)
    Ltrain, acctrain = loss_accuracy(loss, Ytilde_train, data.Ytrain)
    Ltest, acctest = loss_accuracy(loss, Ytilde_test, data.Ytest)
    Ygrid = model(data.Xgrid)

    title = 'Iter {}: Acc train {:.1f}% ({:.2f}), acc test {:.1f}% ({:.2f})'.format(iteration, acctrain*100, Ltrain, acctest*100, Ltest)
    data.plot_data_with_grid(torch.nn.Softmax(dim=1)(Ygrid.detach()), title)

    curves[0].append(acctrain.detach())
    curves[1].append(acctest.detach())
    curves[2].append(Ltrain.detach())
    curves[3].append(Ltest.detach())

fig = plt.figure()
plt.plot(curves[0], label="acc. train")
plt.plot(curves[1], label="acc. test")
plt.plot(curves[2], label="loss train")
plt.plot(curves[3], label="loss test")
plt.legend()
plt.show()

## **Extension: Experiments on different values of different hyperparameters**

In [None]:
# Different learning rates

data = CirclesData()
N = data.Xtrain.shape[0]
nx = data.Xtrain.shape[1]
ny = data.Ytrain.shape[1]

batch_size = 10
hidden_units = 10
learning_rates = [0.01, 0.1, 1]

results_lr = {}

for eta in learning_rates:
    model, loss, optim = init_model(nx, hidden_units, ny, eta)
    curves = [[], [], [], []]

    for iteration in range(150):
        perm = np.random.permutation(N)
        Xtrain = data.Xtrain[perm, :]
        Ytrain = data.Ytrain[perm, :]

        for j in range(N // batch_size):
            indsBatch = range(j * batch_size, (j + 1) * batch_size)
            X = Xtrain[indsBatch, :]
            Y = Ytrain[indsBatch, :]

            optim.zero_grad()
            Ytilde = model(X)
            L, _ = loss_accuracy(loss, Ytilde, Y)
            L.backward()
            optim.step()

        # Evaluate on train and test data
        Ytilde_train = model(data.Xtrain)
        Ytilde_test = model(data.Xtest)
        Ltrain, acctrain = loss_accuracy(loss, Ytilde_train, data.Ytrain)
        Ltest, acctest = loss_accuracy(loss, Ytilde_test, data.Ytest)

        # Save accuracy and loss
        curves[0].append(acctrain.detach())  # Train accuracy
        curves[1].append(acctest.detach())   # Test accuracy
        curves[2].append(Ltrain.detach())  # Train loss
        curves[3].append(Ltest.detach())   # Test loss

    results_lr[eta] = curves

# Plot learning curves for different learning rates
fig, axs = plt.subplots(1, 2, figsize=(12, 6))

for eta, curves in results_lr.items():
    axs[0].plot(curves[0], label=f"Train Acc, LR={eta}")
    axs[0].plot(curves[1], label=f"Test Acc, LR={eta}")
    axs[1].plot(curves[2], label=f"Train Loss, LR={eta}")
    axs[1].plot(curves[3], label=f"Test Loss, LR={eta}")

axs[0].set_title("Accuracy vs. Epochs (Learning Rate)")
axs[0].legend()
axs[0].set_xlabel("Epochs")
axs[0].set_ylabel("Accuracy")

axs[1].set_title("Loss vs. Epochs (Learning Rate)")
axs[1].legend()
axs[1].set_xlabel("Epochs")
axs[1].set_ylabel("Loss")

plt.show()

In [None]:
# Different batch sizes

data = CirclesData()
N = data.Xtrain.shape[0]
nx = data.Xtrain.shape[1]
ny = data.Ytrain.shape[1]

learning_rate = 0.03
hidden_units = 10
batch_sizes = [1, 30, 100]

results_batch = {}

for Nbatch in batch_sizes:
    model, loss, optim = init_model(nx, hidden_units, ny, learning_rate)
    curves = [[], [], [], []]

    for iteration in range(150):
        perm = np.random.permutation(N)
        Xtrain = data.Xtrain[perm, :]
        Ytrain = data.Ytrain[perm, :]

        for j in range(N // Nbatch):
            indsBatch = range(j * Nbatch, (j + 1) * Nbatch)
            X = Xtrain[indsBatch, :]
            Y = Ytrain[indsBatch, :]

            Ytilde = model(X)
            L, _ = loss_accuracy(loss, Ytilde, Y)
            L.backward()
            optim.step()
            optim.zero_grad()

        # Evaluate on train and test data
        Ytilde_train = model(data.Xtrain)
        Ytilde_test = model(data.Xtest)
        Ltrain, acctrain = loss_accuracy(loss, Ytilde_train, data.Ytrain)
        Ltest, acctest = loss_accuracy(loss, Ytilde_test, data.Ytest)

        # Save accuracy and loss
        curves[0].append(acctrain.detach())  # Train accuracy
        curves[1].append(acctest.detach())   # Test accuracy
        curves[2].append(Ltrain.detach())  # Train loss
        curves[3].append(Ltest.detach())   # Test loss

    results_batch[Nbatch] = curves

# Plot learning curves for different batch sizes
fig, axs = plt.subplots(1, 2, figsize=(12, 6))

for Nbatch, curves in results_batch.items():
    axs[0].plot(curves[0], label=f"Train Acc, Batch={Nbatch}")
    axs[0].plot(curves[1], label=f"Test Acc, Batch={Nbatch}")
    axs[1].plot(curves[2], label=f"Train Loss, Batch={Nbatch}")
    axs[1].plot(curves[3], label=f"Test Loss, Batch={Nbatch}")

axs[0].set_title("Accuracy vs. Epochs (Batch Size)")
axs[0].legend()
axs[0].set_xlabel("Epochs")
axs[0].set_ylabel("Accuracy")

axs[1].set_title("Loss vs. Epochs (Batch Size)")
axs[1].legend()
axs[1].set_xlabel("Epochs")
axs[1].set_ylabel("Loss")

plt.show()

In [None]:
# Different hidden units

data = CirclesData()
N = data.Xtrain.shape[0]
nx = data.Xtrain.shape[1]
ny = data.Ytrain.shape[1]

learning_rate = 0.03
batch_size = 10
hidden_units_list = [1, 30, 100]

results_hidden = {}

for nh in hidden_units_list:
    model, loss, optim = init_model(nx, nh, ny, learning_rate)
    curves = [[], [], [], []]

    for iteration in range(150):
        perm = np.random.permutation(N)
        Xtrain = data.Xtrain[perm, :]
        Ytrain = data.Ytrain[perm, :]

        for j in range(N // batch_size):
            indsBatch = range(j * batch_size, (j + 1) * batch_size)
            X = Xtrain[indsBatch, :]
            Y = Ytrain[indsBatch, :]

            Ytilde = model(X)
            L, _ = loss_accuracy(loss, Ytilde, Y)
            L.backward()
            optim.step()
            optim.zero_grad()

        # Evaluate on train and test data
        Ytilde_train = model(data.Xtrain)
        Ytilde_test = model(data.Xtest)
        Ltrain, acctrain = loss_accuracy(loss, Ytilde_train, data.Ytrain)
        Ltest, acctest = loss_accuracy(loss, Ytilde_test, data.Ytest)

        # Save accuracy and loss
        curves[0].append(acctrain.detach())  # Train accuracy
        curves[1].append(acctest.detach())   # Test accuracy
        curves[2].append(Ltrain.detach())  # Train loss
        curves[3].append(Ltest.detach())   # Test loss

    results_hidden[nh] = curves

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

for nh, curves in results_hidden.items():
    axs[0].plot(curves[0], label=f"Train Acc, Hidden={nh}")
    axs[0].plot(curves[1], label=f"Test Acc, Hidden={nh}")
    axs[1].plot(curves[2], label=f"Train Loss, Hidden={nh}")
    axs[1].plot(curves[3], label=f"Test Loss, Hidden={nh}")

axs[0].set_title("Accuracy vs. Epochs (Hidden Units)")
axs[0].legend()
axs[0].set_xlabel("Epochs")
axs[0].set_ylabel("Accuracy")

axs[1].set_title("Loss vs. Epochs (Hidden Units)")
axs[1].legend()
axs[1].set_xlabel("Epochs")
axs[1].set_ylabel("Loss")

plt.show()

# Part 5 : MNIST

Apply the code from previous part code to the MNIST dataset.

In [None]:
# init
data = MNISTData()
N = data.Xtrain.shape[0]
Nbatch = 100
nx = data.Xtrain.shape[1]
nh = 100
ny = data.Ytrain.shape[1]
eta = 0.03

# TODO
model, loss, optim = init_model(nx, nh, ny, eta)

curves = [[],[], [], []]

# epoch
for iteration in range(150):

    # permute
    perm = np.random.permutation(N)
    Xtrain = data.Xtrain[perm, :]
    Ytrain = data.Ytrain[perm, :]

    # batches
    for j in range(N // Nbatch):

        indsBatch = range(j * Nbatch, (j+1) * Nbatch)
        X = Xtrain[indsBatch, :]
        Y = Ytrain[indsBatch, :]

        optim.zero_grad()
        Ytilde = model(X)
        L, _ = loss_accuracy(loss, Ytilde, Y)
        L.backward()
        optim.step()

    Ytilde_train = model(data.Xtrain)
    Ytilde_test = model(data.Xtest)
    Ltrain, acctrain = loss_accuracy(loss, Ytilde_train, data.Ytrain)
    Ltest, acctest = loss_accuracy(loss, Ytilde_test, data.Ytest)

    title = 'Iter {}: Acc train {:.1f}% ({:.2f}), acc test {:.1f}% ({:.2f})'.format(iteration, acctrain*100, Ltrain, acctest*100, Ltest)
    print (title)

    curves[0].append(acctrain.detach())
    curves[1].append(acctest.detach())
    curves[2].append(Ltrain.detach())
    curves[3].append(Ltest.detach())

fig = plt.figure()
plt.plot(curves[0], label="acc. train")
plt.plot(curves[1], label="acc. test")
plt.plot(curves[2], label="loss train")
plt.plot(curves[3], label="loss test")
plt.legend()
plt.show()
# END TODO

# Part 6: Bonus: SVM


Train a SVM model on the Circles dataset.

Ideas :
- First try a linear SVM (sklearn.svm.LinearSVC dans scikit-learn). Does it work well ? Why ?
- Then try more complex kernels (sklearn.svm.SVC). Which one is the best ? why ?
- Does the parameter C of regularization have an impact? Why ?

In [None]:
# data
data = CirclesData()
Xtrain = data.Xtrain.numpy()
Ytrain = data.Ytrain[:, 0].numpy()

Xgrid = data.Xgrid.numpy()

Xtest = data.Xtest.numpy()
Ytest = data.Ytest[:, 0].numpy()

def plot_svm_predictions(data, predictions):
      plt.figure(2)
      plt.clf()
      plt.imshow(np.reshape(predictions, (40,40)))
      plt.plot(data._Xtrain[data._Ytrain[:,0] == 1,0]*10+20, data._Xtrain[data._Ytrain[:,0] == 1,1]*10+20, 'bo', label="Train")
      plt.plot(data._Xtrain[data._Ytrain[:,1] == 1,0]*10+20, data._Xtrain[data._Ytrain[:,1] == 1,1]*10+20, 'ro')
      plt.plot(data._Xtest[data._Ytest[:,0] == 1,0]*10+20, data._Xtest[data._Ytest[:,0] == 1,1]*10+20, 'b+', label="Test")
      plt.plot(data._Xtest[data._Ytest[:,1] == 1,0]*10+20, data._Xtest[data._Ytest[:,1] == 1,1]*10+20, 'r+')
      plt.xlim(0,39)
      plt.ylim(0,39)
      plt.clim(0.3,0.7)
      plt.draw()
      plt.pause(1e-3)

## **Extension: Experiments on different kernels and diffirent values of $C$**

#### **Linear SVM**

In [None]:
import sklearn.svm

svm = sklearn.svm.LinearSVC()
svm.fit(Xtrain, Ytrain)

In [None]:
# Print results

Ytest_pred = svm.predict(Xtest)
accuracy = np.sum(Ytest == Ytest_pred) / len(Ytest)
print(f"Accuracy : {100 * accuracy:.2f}%")
Ygrid_pred = svm.predict(Xgrid)
plot_svm_predictions(data, Ygrid_pred)

#### **More complex kernels**

In [None]:
# Gaussian kernel (rbf)
svm = sklearn.svm.SVC(kernel='rbf')
svm.fit(Xtrain, Ytrain)

In [None]:
# Print results
Ytest_pred = svm.predict(Xtest)
accuracy = np.sum(Ytest == Ytest_pred) / len(Ytest)
print(f"Accuracy : {100 * accuracy:.2f}%")
Ygrid_pred = svm.predict(Xgrid)
plot_svm_predictions(data, Ygrid_pred)

In [None]:
# Polynomial kernel
svm = sklearn.svm.SVC(kernel='poly')
svm.fit(Xtrain, Ytrain)

In [None]:
# Print results
Ytest_pred = svm.predict(Xtest)
accuracy = np.sum(Ytest == Ytest_pred) / len(Ytest)
print(f"Accuracy : {100 * accuracy:.2f}%")
Ygrid_pred = svm.predict(Xgrid)
plot_svm_predictions(data, Ygrid_pred)

In [None]:
# Sigmoid kernel
svm = sklearn.svm.SVC(kernel='sigmoid')
svm.fit(Xtrain, Ytrain)

In [None]:
# Print results
Ytest_pred = svm.predict(Xtest)
accuracy = np.sum(Ytest == Ytest_pred) / len(Ytest)
print(f"Accuracy : {100 * accuracy:.2f}%")
Ygrid_pred = svm.predict(Xgrid)
plot_svm_predictions(data, Ygrid_pred)

#### **Impacts of C**

In [None]:
C = np.linspace(0.01, 2., 200)
acc_train = []
acc_test = []

for c in C:
  svm = sklearn.svm.SVC(kernel='rbf', C=c)
  svm.fit(Xtrain, Ytrain)
  Ytrain_pred = svm.predict(Xtrain)
  Ytest_pred = svm.predict(Xtest)
  acc_train.append(100*np.sum(Ytrain == Ytrain_pred) / len(Ytrain))
  acc_test.append(100*np.sum(Ytrain == Ytest_pred) / len(Ytest))

In [None]:
# Plot the accuracy curves
plt.plot(C, acc_train, label="Train")
plt.plot(C, acc_test, label="Test")
plt.xlabel("C")
plt.ylabel("Accuracy")
plt.legend()
plt.show()