In [7]:
# imports 
import random
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

Generate a random dataset with 20 samples. Each sample should have two input
features and 1 output label (-1 or 1). 

In [42]:
np.random.seed(120)

# generate random data with last column as target
data = np.random.rand(20,2)
target = np.random.choice([-1,1], size=(20,))

data_with_y = np.concatenate((data, target.reshape(-1,1)), axis=1)

print(data_with_y)

[[ 0.67795555  0.5129588   1.        ]
 [ 0.62370571  0.47714247  1.        ]
 [ 0.46025981  0.95086487 -1.        ]
 [ 0.9845756   0.85442255  1.        ]
 [ 0.59109035  0.20319637 -1.        ]
 [ 0.26236016  0.06641525 -1.        ]
 [ 0.64693501  0.71873295  1.        ]
 [ 0.35104926  0.95254348 -1.        ]
 [ 0.59547743  0.99450621 -1.        ]
 [ 0.54367354  0.07171263 -1.        ]
 [ 0.14068668  0.20727832 -1.        ]
 [ 0.50552741  0.53539293  1.        ]
 [ 0.53145508  0.23291118 -1.        ]
 [ 0.60550138  0.70289106 -1.        ]
 [ 0.06662988  0.2366768   1.        ]
 [ 0.15895731  0.97330243 -1.        ]
 [ 0.71025347  0.08285516  1.        ]
 [ 0.54836989  0.1424483  -1.        ]
 [ 0.10721273  0.61757862 -1.        ]
 [ 0.91187275  0.90530837 -1.        ]]


Here we train a weak learer decision tree that only has a max-depth of 1. Output the feature and threshold. 

In [45]:
# minimal stump
clf = DecisionTreeClassifier(max_depth=1)

# train it 

clf.fit(data, target)

# then extract the feature and threshold of the it's split
feature_index = clf.tree_.feature[0]
threshold_index = clf.tree_.threshold[0]

print(f"Feature: {feature_index}, Threshold: {threshold_index}")


Feature: 0, Threshold: 0.6146035194396973


Based on the prediction errors, calculate the coefficient αj and updated weights. Out-
put the updated weights.  (Follow AdaBoost Algorithm)

In [54]:
# m is num_itr 
def adaBoost_train(data,m):
    # initialize the weights and set them to uniform weights that sum to 1
    n = data.shape[0]
    weights = np.ones(n) / n

    alphas = []
    errors = []
    y_preds = np.zeros(n)
    
    
    for _ in range(m):
       
        # initialize a decision tree with weak stump and train the tree using the data, target and weights  
        classifier = DecisionTreeClassifier(max_depth=1)
        classifier.fit(data[:, :-1], data[:, -1], sample_weight=weights)
        
        # predict class labels 
        y_pred = classifier.predict(data[:, :-1])
        
        
        error = np.sum(weights * (y_pred != data[:, -1]))
        
        errors.append(error)
        
        # compute the alpha and append to list
        alpha = 0.5 * np.log((1 - error)/ float(error) )
        alphas.append(alpha)
        
        y_preds += alpha * y_pred
        
        # update weights here 
        weights = weights * np.exp(-alpha * data[:, -1] * y_pred)
        
        
        # normalize the weights so they sum to 1 
        weights = weights / np.sum(weights)
        
        print(f"Iteration: {_}, Error: {error}, Alpha: {alpha}, Weights: {weights}")
        
    # compute final prediction here 
    final_pred = np.sign(y_preds)
    
    return final_pred 
    
adaBoost_train(data_with_y, 10)

Iteration: 0, Error: 0.15000000000000002, Alpha: 0.8673005276940532, Weights: [0.02941176 0.02941176 0.02941176 0.02941176 0.02941176 0.02941176
 0.02941176 0.02941176 0.02941176 0.02941176 0.02941176 0.16666667
 0.02941176 0.02941176 0.16666667 0.02941176 0.02941176 0.02941176
 0.02941176 0.16666667]
Iteration: 1, Error: 0.23529411764705885, Alpha: 0.589327498170823, Weights: [0.01923077 0.01923077 0.01923077 0.01923077 0.0625     0.0625
 0.01923077 0.01923077 0.01923077 0.0625     0.0625     0.10897436
 0.0625     0.0625     0.10897436 0.01923077 0.01923077 0.0625
 0.0625     0.10897436]
Iteration: 2, Error: 0.20512820512820512, Alpha: 0.6772728314026553, Weights: [0.046875   0.046875   0.01209677 0.046875   0.03931452 0.03931452
 0.046875   0.01209677 0.01209677 0.03931452 0.03931452 0.265625
 0.03931452 0.03931452 0.06854839 0.01209677 0.046875   0.03931452
 0.03931452 0.06854839]
Iteration: 3, Error: 0.2424395161290322, Alpha: 0.5696755598236067, Weights: [0.03093812 0.03093812 0.

array([ 1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1., -1.,  1., -1.,
       -1.,  1., -1.,  1., -1., -1., -1.])