In [1]:
import numpy as np 
import random
import matplotlib.pyplot as plt

In [2]:
def gen_xn(N, d=2):
    return np.random.uniform(-1,1,(N+1000,d))


In [3]:
def gen_f(xn, d=2):
    x1=np.random.uniform(-1,1,d)
    x2=np.random.uniform(-1,1,d)

    m= (x1[1]-x2[1])/(x1[0]-x2[0])
    c=x1[1]-m*x1[0]

    y_f=[]
    for i,x in enumerate(xn):
        if x[1] > x[0]*m+c:
            y_f.append(1)
        else: 
            y_f.append(-1)

    out=np.concatenate((xn, np.array(y_f).reshape(-1, 1)), axis=-1)

    return out[:N], out[N:]
    

In [4]:
def sign(X):
    out=[]

    for x in X:
        if x>0:
            out.append(1)
        elif x<0:
            out.append(-1)
        elif x==0:
            out.append(0)
    return out

In [5]:
def add_noise(data,percentage=0.1):
    idx=random.sample(range(data.shape[0]), int(percentage*data.shape[0]))
    data[idx, -1]=data[idx,-1]*-1
    return data

In [6]:
def linear_regression(data):
    x,y=data[:,:-1], data[:,-1]
    x=np.concatenate((np.ones((x.shape[0], 1)), x), axis=-1)
    xt=np.linalg.pinv(x)
    return xt.dot(y)

In [7]:
def linear_regression_calc(x,w):
    x=np.concatenate((np.ones((x.shape[0], 1)),x), axis=-1)
    return sign(x.dot(w))

In [8]:
def PLA(i_p, w):
    x,y= i_p[:, :2], i_p[:, -1]    
    x=np.concatenate((np.ones((x.shape[0], 1)), x), axis=-1)
    i=0
    while(True):
        i+=1
        h=sign(np.sum(w*x, axis=1))
        score=np.sum(abs(h-y))
        idx=np.argwhere((h-y)!=0)
        if score ==0: break
        idx=idx[np.random.randint(idx.shape[0])]
        
        w=w+y[idx]*x[idx].reshape(w.shape)

    return w, i

In [9]:
def non_linear_transoformation(data):
    x1,x2,y   = data[:,0],data[:,1],data[:,2]
    out       = np.zeros((data.shape[0], 6))
    out[:,0]  = x1
    out[:,1]  = x2
    out[:,2]  = np.multiply(x1,x2)
    out[:,3]  = np.multiply(x1,x1)
    out[:,4]  = np.multiply(x2,x2)
    out[:,5]  = y
    
    return out

In [10]:
def gen_f_lr(xn, d=2):
    x1,x2 = xn[:,0], xn[:,1]
    y_f   = sign(x1**2+x2**2-0.6)
    out=np.concatenate((xn, np.array(y_f).reshape(-1, 1)), axis=-1)
    return(out[:N], out[N:])

## Questions 5,6

In [11]:
N        = 100
N_exp    = 1000
Ein, Eout= [], []
for _ in range(N_exp):
    xn        = gen_xn(N)
    din, dout = gen_f(xn, d=2)
    w         = linear_regression(din)
    y_in_w    = linear_regression_calc(din[:,:-1], w)
    y_in      = din[:,-1]
    Ein.append (np.mean(np.not_equal(y_in_w,y_in)))
    y_out_w   = linear_regression_calc(dout[:,:-1], w)
    y_out     = dout[:,-1]
    Eout.append(np.mean(np.not_equal(y_out_w,y_out)))
    
print("Average Ein  = ", np.mean(Ein ))
print("Average Eout = ", np.mean(Eout))

Average Ein  =  0.04012
Average Eout =  0.04810300000000001


## Question 7

In [12]:
N        = 10
N_exp    = 1000
it=[]
for _ in range(N_exp):
    xn        = gen_xn(N)
    din, dout = gen_f(xn, d=2)
    w         = linear_regression(din)
    _,itr       = PLA(din, w)
    it.append(itr)
print('Converged after:', np.mean(it))

Converged after: 5.114


## Question 8 

In [13]:
N        = 1000
N_exp    = 1000
Ein= []
for _ in range(N_exp):
    xn        = gen_xn(N)
    din, _    = gen_f_lr(xn, d=2)
    din_noisy = add_noise(din)
    w         = linear_regression(din_noisy)
    y_in_w    = linear_regression_calc(din[:,:-1], w)
    y_in      = din[:,-1]
    Ein.append (np.mean(np.not_equal(y_in_w,y_in)))

    
print("Average Ein  = ", np.mean(Ein ))

Average Ein  =  0.503481


In [14]:
N        = 1000
N_exp    = 1000
ws_=[]
Ein, Eout= np.zeros((N_exp, 5)), []

g=[]
g.append([-1, -0.05, 0.08, 0.13, 1.5 , 1.5 ])
g.append([-1, -0.05, 0.08, 0.13, 1.5 , 15  ])
g.append([-1, -0.05, 0.08, 0.13, 15  , 1.5 ])
g.append([-1, -1.5 , 0.08, 0.13, 0.05, 0.05])
g.append([-1, -0.05, 0.08, 1.5 , 0.15, 0.15])


for e in range(N_exp):
    xn          = gen_xn(N)
    din, dout  = gen_f_lr(xn, d=2)

    din_noisy   = add_noise(din)
    d_out_noisy = add_noise(dout)
    
    din_trans   = non_linear_transoformation(din_noisy)
    dout_trans  = non_linear_transoformation(d_out_noisy)
    w           = linear_regression(din_trans)
    y_in_w      = linear_regression_calc(din_trans[:,:-1], w)
    for i in range(5): 
        y_g= linear_regression_calc(din_trans[:,:-1], g[i])
        Ein[e,i]= np.mean(np.not_equal(y_in_w,y_g))
    y_out_w   = linear_regression_calc(dout_trans[:,:-1], w)
    y_out     = dout[:,-1]
    Eout.append(np.mean(np.not_equal(y_out_w,y_out)))
Ein=np.mean(Ein, axis=0)
print("The hypothesis is clossest to g[", np.where(Ein == np.amin(Ein))[0][0], ']')
print("Average Eout = ", np.mean(Eout))

The hypothesis is clossest to g[ 0 ]
Average Eout =  0.125926
