In [1]:
import numpy as np
import math
import sklearn.datasets
from sklearn.svm import SVC
import sys
import copy
import random

In [2]:
X,y = sklearn.datasets.make_hastie_10_2()
X_train = X[0:8000,:]
y_train = y[0:8000]
X_test = X[8000:,:]
y_test = y[8000:]
np.set_printoptions(threshold=sys.maxsize)

# Exercise 1

1. Implement the AdaBoost ensemble algorithm by completing the following code:

In [3]:
class AdaBoost:
    
    def __init__(self, weakModel, T):
        
        self.weakModel = weakModel
        self.T = T
        self.models = []
        
    def fit(self, X, y, K):
        
        sample_weight = np.ones(len(X)) / len(X)
        
        for t in range(0,self.T):
            
            #Deep copy del modello
            current_model = copy.deepcopy(self.weakModel)
            
            #Train del modello con i pesi specifici
            current_model.fit(X, y, sample_weight=sample_weight)
            
            #Predizione
            y_pred = current_model.predict(X)

            #Calcolo dell'errore
            error = sum(sample_weight * [1 if yy_pred != y[idx] else 0 for idx, yy_pred in enumerate(y_pred)])
            
            #Affidabilità del modello
            alfa = np.log((1 - error) / error) / 2
            
            #Aggiornamento dei pesi
            for i in range(len(sample_weight)):
                sample_weight[i] = sample_weight[i] * np.exp(-alfa * y_pred[i] * y[i])
                
            #Normalizzazione
            sample_weight = sample_weight / sum(sample_weight)
            
            #Memorizzzione del modello con il suo alfa
            self.models.append([alfa, current_model])
            
            #Stampa dello score del modello corrente
            if t%K == 0:
                print("Iterazione ", t)
                print("Loss corrente: " + str(error))
                print("Alfa del modello corrente: " + str(alfa))
                
                print("Score di Adaboost ")
                y_pred = self.predict(X)
                self.print_score(y, y_pred)
            
        return self

    def predict(self, X):
        
        #Vettore contenente la somma delle predizioni dei modelli
        y_sum_of_prediction = np.zeros(len(X))
        
        for i in range(len(self.models)):
            
            current_model = self.models[i][1]
            alfa = self.models[i][0]

            # add to the final prediction, the weighted prediction of current model, weighted by its reliability alfa
            y_sum_of_prediction = np.add(y_sum_of_prediction, current_model.predict(X) * alfa)
        
        #Funzione step per ritornare per ogni istanza 1 o -1    
        return [1 if y >= 0 else -1 for y in y_sum_of_prediction]
    
    def print_score(self, y_actual, y_predicted):
        
        n_samples = len(y_predicted)
        
        errors = (1 - np.dot(y_actual, y_predicted) / n_samples) / 2

        print("% di errori: " + str(errors * 100) + " \n")

In the implementation you are free to assume:
- that the problem is a binary classification problem with labels in $\{-1, +1\}$.
- that the weakModel can fit a weighted sample set by means of the call `weakModel.fit(X,y,sample_weight=w)` where `w` is a vector of length $|y|$.

2. Test your implementation on the dataset loaded above and using an SVC with a polynomial kernel. 

3. evaluate the AdaBoost performances as usual by calculating the classification error and compare it with the classification error of the weak model.

**Note 1**:  
since the labels are bound to be in ${+1, -1}$, the classification error (i.e., the number of incorrectly classified examples over the total number of examples) can be easily computed as:
$$
   error(y,y') = \frac{N - y \cdot y'}{2N} = \frac{1}{2} - \frac{y \cdot y'}{2N},
$$
where $N$ is the total number of examples. The formula can be derived noticing that $y \cdot y'$ calculates the number $N_c$ of examples correctly classified  minus the number $N_{\bar c}$ of examples incorrectly classified. We have then $y \cdot y' = N_c - N_{\bar c}$ and by noticing that $N = N_c + N_{\bar c}$:
$$
   N - y \cdot y' = N_c + N_{\bar c} - N_c + N_{\bar c} = 2 N_{\bar c} \Rightarrow \frac{N - y \cdot y'}{2 N} = \frac{N_{\bar c}}{N}
$$

**Note 2**:
do not forget to deepcopy your base model before fitting it to the new data

**Note 3**:
The SVC model allows specifying weights, but it *does not* work well when weights are normalized (it works well when the weights are larger). The following class takes normalized weights and denormalize them before passing them to the SVC classifier:

```python
    class SVC_:
        def __init__(self, kernel="rbf", degree="3"):
            self.svc = SVC(kernel=kernel, degree=degree)

        def fit(self, X,y,sample_weight=None):
            if sample_weight is not None:
                sample_weight = sample_weight * len(X)

            self.svc.fit(X,y,sample_weight=sample_weight)
            return self

        def predict(self, X):
            return self.svc.predict(X)
```

In [4]:
class SVC_:
    def __init__(self, kernel="rbf", degree="3", coef0=0):
        self.svc = SVC(kernel=kernel, degree=degree, coef0=coef0, gamma="scale")

    def fit(self, X, y, sample_weight=None):
        if sample_weight is not None:
            sample_weight = sample_weight * len(X)

        self.svc.fit(X, y, sample_weight=sample_weight)
        return self

    def predict(self, X):
        return self.svc.predict(X)

In [5]:
weakModel= SVC_(kernel="poly", degree=3, coef0=0)
adaboost = AdaBoost(weakModel, 100).fit(X_train,y_train,10)
y_train_predicted = adaboost.predict(X_train)
y_test_predicted  = adaboost.predict(X_test)

print("Score sul training set")

adaboost.print_score(y_train, y_train_predicted)

print("Score sul test set")
adaboost.print_score(y_test, y_test_predicted)

Iterazione  0
Loss corrente: 0.3509999999999752
Alfa del modello corrente: 0.3073232466191665
Score di Adaboost 
% di errori: 35.099999999999994 

Iterazione  10
Loss corrente: 0.38451750215342967
Alfa del modello corrente: 0.23520860119407447
Score di Adaboost 
% di errori: 20.85 

Iterazione  20
Loss corrente: 0.3938554075842484
Alfa del modello corrente: 0.215567351419645
Score di Adaboost 
% di errori: 16.4125 

Iterazione  30
Loss corrente: 0.4101405652505821
Alfa del modello corrente: 0.18169215965558866
Score di Adaboost 
% di errori: 14.687499999999998 

Iterazione  40
Loss corrente: 0.44025400809773974
Alfa del modello corrente: 0.1200656217320331
Score di Adaboost 
% di errori: 14.075 

Iterazione  50
Loss corrente: 0.45221816543799837
Alfa del modello corrente: 0.09585618264147742
Score di Adaboost 
% di errori: 13.424999999999997 

Iterazione  60
Loss corrente: 0.4465898878164908
Alfa del modello corrente: 0.1072293217306829
Score di Adaboost 
% di errori: 12.8 

Iterazione

# Exercise 2

1. Write a weak learner to be used with the AdaBoost algorithm you just wrote. The weak learner that you will implement is the most inaccurate weak learner possible: it basically works by extracting a linear model at random and trying to use that model to classify the examples. Being extracted at random the models it generates do not guarantee that the weighted error $\epsilon_t$ is smaller than $0.5$. The algorithm solves this problem by flipping the decisions whenever it finds out that $\epsilon_t > 0.5$ (i.e., if the weighted error is larger than $0.5$ it reverses the sign of all the weights so that the decision surface stays the same, but the regions where it predicts $+1$ and $-1$ are reversed).

    It shall work as follows:

    - it creates a random linear model by generating the needed weight vector $\mathbf{w}$ at random (**note**: these are the weights of the linear model, they are *NOT* related in any way to the weights of the examples); each weight shall be sampled from U(-1,1);
    - it evaluates the weighted loss $\epsilon_t$ on the given dataset and flip the linear model if $\epsilon_t > 0.5$;
    - at prediction time it predicts +1 if $\mathbf{x} \cdot \mathbf{w} > 0$; it predicts -1 otherwise.

In [6]:
class RandomLinearModel:
    
    def __init__(self):
        self.w = []
        self.t = 0
    
    def loss(self, y, y_predicted, example_weights):
        
        return sum(example_weights * [1 if y_predicted != y[idx] else 0 for idx, y_predicted in enumerate(y_predicted)])
        
    def fit(self,X,y, sample_weight=None):
        
        numero_di_features = np.shape(X)[1]
        
        #Generazione valori random per w e t
        for i in range(numero_di_features):
            self.w.append(random.uniform(-1, 1))

        self.w = np.array(self.w)
        self.t = random.uniform(-1, 1)

        y_predicted = self.predict(X)

        #Flip dei parametri del modello se la loss è abbastanza grande
        if self.loss(y, y_predicted, sample_weight) > 0.5:
            self.w = self.w * -1
            self.t = self.t * -1

        return self       
        
    def predict(self,X):
        #Ritorna -1 o 1 in base al segno della classificazione
        return np.array([-1 if y < 0 else 1 for y in (np.dot(X, self.w) + self.t)])

2. Learn an AdaBoost model using the RandomLinearModel weak learner printing every $K$ iterations the weighted error and the current error of the ensemble (you are free to choose $K$ so to make your output just frequent enough to let you know what is happening but without flooding the console with messages). Evaluate the training and test error of the final ensemble model.

In [7]:
rs = RandomLinearModel()
a = AdaBoost(rs,10000)
a.fit(X_train,y_train, 2000)

y_train_ = a.predict(X_train)
y_test_ = a.predict(X_test)

print("SCORES ON TRAINING SET")
a.print_score(y_train, y_train_)

print("SCORES ON TEST SET")
a.print_score(y_test, y_test_)

Iterazione  0
Loss corrente: 0.4976249999999591
Alfa del modello corrente: 0.004750035724523781
Score di Adaboost 
% di errori: 49.762499999999996 

Iterazione  2000
Loss corrente: 0.4992914484514749
Alfa del modello corrente: 0.0014171040456512653
Score di Adaboost 
% di errori: 35.699999999999996 

Iterazione  4000
Loss corrente: 0.49604734387983856
Alfa del modello corrente: 0.007905476924591225
Score di Adaboost 
% di errori: 30.65 

Iterazione  6000
Loss corrente: 0.4982383583631855
Alfa del modello corrente: 0.0035232978525259603
Score di Adaboost 
% di errori: 25.362499999999997 

Iterazione  8000
Loss corrente: 0.49932831019559626
Alfa del modello corrente: 0.0013433804169267166
Score di Adaboost 
% di errori: 22.325 

SCORES ON TRAINING SET
% di errori: 18.862499999999997 

SCORES ON TEST SET
% di errori: 43.5 



3. Write few paragraphs about what you think about the experiment and about the results you obtained.