<a href="https://colab.research.google.com/github/abeebyekeen/DLforBeginners/blob/main/day1_session1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#day 1 session 1: introduction to binary classification

**scenario**: binary classification ($y \in \{1,0\}$) <br>
**dataset**: almost linearly separable dataset #1 in 2-d ($\mathbf{x^i} = [x_0^i,x_1^i]$) <br>
**model**: linear classifier (1-layer perceptron):
   $$ y=   \left\{
\begin{array}{ll}
      1 & w_0 x_0 + w_1 x_1 + b > 0 \\
      0 & w_0 x_0 + w_1 x_1 + b \leq 0 \\
\end{array}
\right.  $$<br>


**tasks** <br>
1. draw (on paper) the plot of $h = w_0 x_0 + w_1 x_1 + b$, where $x_0$ and $x_1$ are variables and $w_0$, $w_1$, $b$ are parameters

2. visualize dataset #1; guess the best value of parameters of the linear classifier; guess the rates of TN, FP, FN, TP and ACC <br>

3. implement the linear classifier (without bias (why?)) to maximize ACC, train and infer<br>

4. rewrite dataset #1 (move all points top-right by 0.25 and 0.25); repeat task 3; if performance is poor come up with a way to improve it

5. rewrite dataset #1 so that the number of data points from class 0 is 7 times as many as those from class 1; repeat task 3; if performance is poor come up with a way to improve it

6. rewrite objective function to improve Precision ( $\frac{TP}{TP+FP}$ ), Recall or Sensitivity ($\frac{TP}{TP+FN}$), and Specificity or Selectivity ($\frac{TN}{TN+FP}$); repeat task 3 and see what happens to accuracy




##part 0: dependencies and functions
utility code that you don't have to read

In [None]:
# dependencies
import numpy as np
import math
import scipy
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from IPython.display import Javascript

In [None]:
# function for visualizing dataset
def visualize_data(X,Y,figuretitle):
    fig = plt.figure()
    ax = plt.gca()
    C0 = ax.scatter(X[Y==0,0],X[Y==0,1],c='b')
    C1 = ax.scatter(X[Y==1,0],X[Y==1,1],c='r')
    #c = ['b' if Y[i]==0 else 'r' for i in range(len(Y))]
    #scatter = ax.scatter(X[:,0], X[:,1], c=c)
    ax.set_aspect('equal')
    ax.xaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
    legend1 = ax.legend((C0,C1),('y=0','y=1'),loc='upper left',)
    plt.xlabel('x0')
    plt.ylabel('x1')
    plt.title(figuretitle)
    plt.plot()

In [None]:
# function for visualizing loss function
def visualize_loss(Jout, w0_start, w0_end, w1_start, w1_end, step):
    x = np.arange(w0_start - 0.5*step, w0_end + 1.0*step, step)
    y = np.arange(w1_start - 0.5*step, w1_end + 1.0*step, step)
    z = Jout
    fig, ax = plt.subplots()
    c = ax.pcolormesh(x, y, z)
    ax.set_aspect('equal')
    ax.set_xticks(x[:-1]+0.5*step)
    ax.set_yticks(y[:-1]+0.5*step)
    fig.colorbar(c, ax=ax)

In [None]:
#modified from a piece of code from erwan-simon: "https://gist.github.com/erwan-simon/e3baef06a00bb9a39a6968acf78121ee"
#from torch.autograd import Variable
def plot_decision_boundary(X,Y,w,model):
    dataset= X
    labels = Y
    color_map='Paired'
    color_map = plt.get_cmap(color_map)
    # Define region of interest by data limits
    x0min, x0max = dataset[:, 0].min() - 1, dataset[:, 0].max() + 1
    x1min, x1max = dataset[:, 1].min() - 1, dataset[:, 1].max() + 1
    steps = 1000
    x0_span = np.linspace(x0min, x0max, steps)
    x1_span = np.linspace(x1min, x1max, steps)
    x0s, x1s = np.meshgrid(x0_span, x1_span)

    # Make predictions across region of interest
    #model.eval()
    #labels_predicted = model(Variable(torch.from_numpy(np.c_[xx.ravel(), yy.ravel()]).float()))
    labels_predicted = model(w, np.c_[x0s.ravel(), x1s.ravel()])
    # Plot decision boundary in region of interest
    #labels_predicted = [0 if value <= 0.5 else 1 for value in labels_predicted.detach().numpy()]
    z = np.array(labels_predicted).reshape(x0s.shape)

    fig, ax = plt.subplots()
    contourf_ = ax.contourf(x0s, x1s, z, cmap=color_map, alpha=0.5)
    cbar = fig.colorbar(contourf_)
    #cbar.set_clim( vmin, vmax )

    # Get predicted labels on training data and plot
    #train_labels_predicted = model(dataset)

    C0 = ax.scatter(X[Y==0,0],X[Y==0,1],c='b')
    C1 = ax.scatter(X[Y==1,0],X[Y==1,1],c='r')
    ax.set_aspect('equal')
    #ax.set(xlim=(-3, 3), ylim=(-3, 3))
    plt.xlabel('x0')
    plt.ylabel('x1')
    plt.title("infering data, true labels, decision bounary")
    plt.plot()

In [None]:
# print the rates of TN, FP, FN, TP, and ACC
def evaluate(Y,Y_pred):
    total = len(Y)
    TN = np.count_nonzero(np.logical_and(Y==0.0,Y_pred==0.0)) / total
    FP = np.count_nonzero(np.logical_and(Y==0.0,Y_pred==1.0)) / total
    FN = np.count_nonzero(np.logical_and(Y==1.0,Y_pred==0.0)) / total
    TP = np.count_nonzero(np.logical_and(Y==1.0,Y_pred==1.0)) / total
    ACC = TN+TP
    print('TN rate:',TN,', FP rate: ',FP,', FN rate: ',FN,', TP rate: ',TP)
    print('ACC=',ACC)

##part 1: preparations

In [None]:
# the function that creates dataset #1
def create_data_1(N):
    X0 = np.random.rand(N,2).astype(np.float32)-0.5
    X1 = np.random.rand(N,2).astype(np.float32)-0.5
    X0[:,0] = X0[:,0] - 0.25
    X1[:,0] = X1[:,0] + 0.25
    X0[:,1] = X0[:,1] - 0.25
    X1[:,1] = X1[:,1] + 0.25
    X = np.concatenate((X0,X1), axis=0)
    Y0 = np.zeros((N)).astype(np.float32)
    Y1 = np.ones( (N)).astype(np.float32)
    Y = np.concatenate((Y0,Y1), axis=0)
    new_id = np.random.permutation(2*N)
    X = X[new_id]
    Y = Y[new_id]
    return X,Y

In [None]:
# call the function above to actually create dataset #1
SEED = 2023
np.random.seed(SEED)
N=5000
X,Y=create_data_1(N)
X_train = X[:N]
Y_train = Y[:N]
X_infer = X[N:]
Y_infer = Y[N:]

display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 9999})'''))
visualize_data(X,Y,"dataset")
visualize_data(X_train,Y_train,"dataset train")
visualize_data(X_infer,Y_infer,"dataset infer")

In [None]:
# build a linear classifier model
def MyLinearClassifier(w, x):
    y = None # edit here
    return y

In [None]:
# formulate performance evaluation function
def MyErrorRate(w,x,y):
    y_pred = MyLinearClassifier(w, x)
    diff = np.logical_xor(y, y_pred)
    error_rate = diff.sum() / len(diff)
    return error_rate

## part 2: train

In [None]:
# here we look for the best parameters for our model
w0_start, w0_end, w1_start, w1_end, step = -4, 4, -4, 4, 1.0
rranges = (slice(w0_start, w0_end+step, step), slice(w1_start, w1_end+step, step))
w_opt, fval, grid, Jout = scipy.optimize.brute(MyErrorRate, ranges=rranges, args=(X_train, Y_train), full_output=True, finish=None, disp=False, workers=1)
print(w_opt)
print(fval)
print(grid)
print(Jout)

In [None]:
# here we plot the
visualize_loss(Jout, w0_start,w0_end,w1_start,w1_end,step)

## part 3: infer

In [None]:
# use the model with the best parameters to classify dataset we left aside
Y_infer_pred = MyLinearClassifier(w_opt, X_infer)

In [None]:
# evaluate the classification performance with most basic metrics
evaluate(Y_infer,Y_infer_pred)

In [None]:
# visualize the decision boundary together with the infer dataset
plot_decision_boundary(X_infer,Y_infer_pred,w_opt,MyLinearClassifier)