In [24]:
import torch
import numpy as np
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch import nn
from torch import optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn import metrics
from sklearn import ensemble
from sklearn import tree

from adaboost import MyAdaBoostClassifier


## 1. MLP Classification on CIFAR10 Dataset

### Predefine super-parameters

In [2]:
lr = 5e-4  # learning rate
l2_lambda = 1e-2  # L2 regularization penalty factor
batch_szie = 64
width, height = 32, 32  # for CIFAR10 images, with size 3*32*32

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("PyTorch Version:", torch.__version__)
print("Torch device:", device)


PyTorch Version: 1.10.0+cu111
Torch device: cuda


### Load data

In [3]:
cifar10_data = datasets.CIFAR10(root='./data/', train=True, download=True,
                                transform=transforms.Compose([transforms.ToTensor()]))
data = [d[0].data.cpu().numpy() for d in cifar10_data]
data_mean = np.mean(data)
data_std = np.std(data)
print(data_mean, data_std)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data/
0.4733649 0.25156906


In [4]:
trainset = datasets.CIFAR10(root='./data/', train=True, download=True,transform=transforms.Compose([
    # transforms.ToTensor(), transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
    transforms.ToTensor(), transforms.Normalize(mean=data_mean, std=data_std)
]))
trainloader = DataLoader(trainset, batch_size=batch_szie, shuffle=True, num_workers=4)
testset = datasets.CIFAR10(root='./data/', train=False, download=True,transform=transforms.Compose([
    transforms.ToTensor(), transforms.Normalize(mean=data_mean, std=data_std)
]))
testloader = DataLoader(testset, batch_size=batch_szie, shuffle=True, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


### Define Neural Network modelm

In [18]:
class MLP(nn.Module):
    def __init__(self, n_channel, width, height):
        super(MLP, self).__init__()
        self.flatter = nn.Flatten()
        self.fc1 = nn.Linear(n_channel * width * height, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        h = self.flatter(x)
        h = F.relu(self.fc1(h))
        h = F.relu(self.fc2(h))
        h = F.relu(self.fc3(h))
        h = F.relu(self.fc4(h))
        y = F.softmax(self.fc5(h), dim=1)
        return y


In [19]:
def train(model: nn.Module, dataloader: DataLoader, optimizer: optim.Optimizer, loss_func: nn.Module, device=torch.device('cpu')):
    """
    Training process for one epoch
    :param model: Neural Network instance, in type of nn.Module
    :param dataloader: DataLoader instance, for training
    :param optimizer: optimizer for updating parameters of the GNN model
    :param loss_func: loss function in type of nn.Module
    :param device: default is CPU, requires to be set to use GPU
    """    
    correct = 0
    total_loss = 0
    for i, (data, labels) in enumerate(dataloader):
        data, labels = data.to(device), labels.to(device)
        output = model(data)
        loss = loss_func(output, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print('Train batch: {}, Train loss: {:.4f}'.format(i + 1, loss.item()))

        total_loss += loss.item()
        predicts = output.argmax(dim=1)
        correct += predicts.eq(labels.view_as(predicts)).sum().item()

    total_loss /= len(dataloader)
    accuracy = correct / len(dataloader.dataset)
    print('Train loss: {:.4f}, Train acccuracy: {:.4f}'.format(total_loss, accuracy))
    return total_loss, accuracy

def test(model: nn.Module, dataloader: DataLoader, loss_func: nn.Module, device=torch.device('cpu')):
    """
    Testing process for one epoch
    :param model: Neural Network instance, in type of nn.Module
    :param dataloader: DataLoader instance, for testing
    :param loss_func: loss function in type of nn.Module
    :param device: default is CPU, requires to be set to use GPU
    """    
    correct = 0
    total_loss = 0

    with torch.no_grad():
        for i, (data, labels) in enumerate(dataloader):
            data, labels = data.to(device), labels.to(device)
            output = model(data)
            total_loss += loss_func(output, labels).item()
            predicts = output.argmax(dim=1)
            correct += predicts.eq(labels.view_as(predicts)).sum().item()

    total_loss /= len(dataloader)
    accuracy = correct / len(dataloader.dataset)
    print('Test loss: {:.4f}, Test acccuracy: {:.4f}'.format(total_loss, accuracy))
    return total_loss, accuracy



### Training and Evaluating

1. `num_epoch` is 50 (without L2 regularization)

In [20]:
mlp50 = MLP(3, width, height).to(device)
optimizer = optim.Adam(mlp50.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()
num_epoch = 50

In [21]:
train_loss, test_loss = [], []
train_accu, test_accu = [], []

for epoch in range(1, num_epoch + 1):
    print('--- Epoch {} --- (No L2 regularization)'.format(epoch))
    trloss, traccu = train(mlp50, trainloader, optimizer, loss_func, device)
    teloss, teaccu = test(mlp50, trainloader, loss_func, device)
    train_loss.append(trloss)
    train_accu.append(traccu)
    test_loss.append(teloss)
    test_accu.append(teaccu)

np.savetxt('train-50.txt', np.vstack([train_loss, train_accu]))
np.savetxt('test-50.txt', np.vstack([test_loss, test_accu]))

--- Epoch 1 --- (No L2 regularization)
Train batch: 100, Train loss: 2.0628
Train batch: 200, Train loss: 2.1209
Train batch: 300, Train loss: 2.0395
Train batch: 400, Train loss: 2.0498
Train batch: 500, Train loss: 2.0787
Train batch: 600, Train loss: 2.1073
Train batch: 700, Train loss: 2.0036
Train loss: 2.1094, Train acccuracy: 0.3442
Test loss: 2.0628, Test acccuracy: 0.3921
--- Epoch 2 --- (No L2 regularization)
Train batch: 100, Train loss: 1.9963
Train batch: 200, Train loss: 2.0831
Train batch: 300, Train loss: 2.0820
Train batch: 400, Train loss: 2.1244
Train batch: 500, Train loss: 2.0432
Train batch: 600, Train loss: 2.0024
Train batch: 700, Train loss: 2.0769
Train loss: 2.0497, Train acccuracy: 0.4047
Test loss: 2.0236, Test acccuracy: 0.4330
--- Epoch 3 --- (No L2 regularization)
Train batch: 100, Train loss: 2.0864
Train batch: 200, Train loss: 1.8947
Train batch: 300, Train loss: 2.0788
Train batch: 400, Train loss: 2.0070
Train batch: 500, Train loss: 2.0327
Train ba

2. `num_epoch` is 250 (without L2 regularization)

In [22]:
mlp250 = MLP(3, width, height).to(device)
optimizer = optim.Adam(mlp250.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()
num_epoch = 250

In [23]:
train_loss, test_loss = [], []
train_accu, test_accu = [], []

for epoch in range(1, num_epoch + 1):
    print('--- Epoch {} --- (No L2 regularization)'.format(epoch))
    trloss, traccu = train(mlp250, trainloader, optimizer, loss_func, device)
    teloss, teaccu = test(mlp250, testloader, loss_func, device)

    train_loss.append(trloss)
    train_accu.append(traccu)
    test_loss.append(teloss)
    test_accu.append(teaccu)

np.savetxt('train-250.txt', np.vstack([train_loss, train_accu]))
np.savetxt('test-250.txt', np.vstack([test_loss, test_accu]))

--- Epoch 1 --- (No L2 regularization)
Train batch: 100, Train loss: 2.1694
Train batch: 200, Train loss: 2.0751
Train batch: 300, Train loss: 2.0717
Train batch: 400, Train loss: 2.0465
Train batch: 500, Train loss: 2.1582
Train batch: 600, Train loss: 2.0783
Train batch: 700, Train loss: 2.0229
Train loss: 2.1129, Train acccuracy: 0.3401
Test loss: 2.0701, Test acccuracy: 0.3861
--- Epoch 2 --- (No L2 regularization)
Train batch: 100, Train loss: 2.0278
Train batch: 200, Train loss: 2.0994
Train batch: 300, Train loss: 2.0962
Train batch: 400, Train loss: 2.0318
Train batch: 500, Train loss: 2.0306
Train batch: 600, Train loss: 1.9750
Train batch: 700, Train loss: 1.9456
Train loss: 2.0499, Train acccuracy: 0.4063
Test loss: 2.0418, Test acccuracy: 0.4120
--- Epoch 3 --- (No L2 regularization)
Train batch: 100, Train loss: 2.0853
Train batch: 200, Train loss: 1.8514
Train batch: 300, Train loss: 2.0537
Train batch: 400, Train loss: 2.0546
Train batch: 500, Train loss: 1.8954
Train ba

1. `num_epoch` is 50 (with L2 regularization)

In [24]:
mlp50 = MLP(3, width, height).to(device)
optimizer = optim.Adam(mlp50.parameters(), lr=lr, weight_decay=l2_lambda)
loss_func = nn.CrossEntropyLoss()
num_epoch = 50

In [25]:
train_loss, test_loss = [], []
train_accu, test_accu = [], []

for epoch in range(1, num_epoch + 1):
    print('--- Epoch {} --- (With L2 regularization)'.format(epoch))
    trloss, traccu = train(mlp50, trainloader, optimizer, loss_func, device)
    teloss, teaccu = test(mlp50, testloader, loss_func, device)

    train_loss.append(trloss)
    train_accu.append(traccu)
    test_loss.append(teloss)
    test_accu.append(teaccu)

np.savetxt('train-50-l2.txt', np.vstack([train_loss, train_accu]))
np.savetxt('test-50-l2.txt', np.vstack([test_loss, test_accu]))

--- Epoch 1 --- (With L2 regularization)
Train batch: 100, Train loss: 2.2884
Train batch: 200, Train loss: 2.2751
Train batch: 300, Train loss: 2.2817
Train batch: 400, Train loss: 2.2316
Train batch: 500, Train loss: 2.2530
Train batch: 600, Train loss: 2.2458
Train batch: 700, Train loss: 2.2627
Train loss: 2.2729, Train acccuracy: 0.1583
Test loss: 2.2639, Test acccuracy: 0.1648
--- Epoch 2 --- (With L2 regularization)
Train batch: 100, Train loss: 2.2943
Train batch: 200, Train loss: 2.2411
Train batch: 300, Train loss: 2.2649
Train batch: 400, Train loss: 2.2604
Train batch: 500, Train loss: 2.2514
Train batch: 600, Train loss: 2.2615
Train batch: 700, Train loss: 2.2605
Train loss: 2.2623, Train acccuracy: 0.1717
Test loss: 2.2622, Test acccuracy: 0.1758
--- Epoch 3 --- (With L2 regularization)
Train batch: 100, Train loss: 2.2661
Train batch: 200, Train loss: 2.2941
Train batch: 300, Train loss: 2.2745
Train batch: 400, Train loss: 2.2691
Train batch: 500, Train loss: 2.2646
Tr

1. `num_epoch` is 250 (with L2 regularization)

In [26]:
mlp250 = MLP(3, width, height).to(device)
optimizer = optim.Adam(mlp250.parameters(), lr=lr, weight_decay=l2_lambda)
loss_func = nn.CrossEntropyLoss()
num_epoch = 250

In [27]:
train_loss, test_loss = [], []
train_accu, test_accu = [], []

for epoch in range(1, num_epoch + 1):
    print('--- Epoch {} --- (With L2 regularization)'.format(epoch))
    trloss, traccu = train(mlp250, trainloader, optimizer, loss_func, device)
    teloss, teaccu = test(mlp250, testloader, loss_func, device)

    train_loss.append(trloss)
    train_accu.append(traccu)
    test_loss.append(teloss)
    test_accu.append(teaccu)

np.savetxt('train-250-l2.txt', np.vstack([train_loss, train_accu]))
np.savetxt('test-250-l2.txt', np.vstack([test_loss, test_accu]))

--- Epoch 1 --- (With L2 regularization)
Train batch: 100, Train loss: 2.2806
Train batch: 200, Train loss: 2.2553
Train batch: 300, Train loss: 2.2992
Train batch: 400, Train loss: 2.2296
Train batch: 500, Train loss: 2.2518
Train batch: 600, Train loss: 2.2614
Train batch: 700, Train loss: 2.2833
Train loss: 2.2669, Train acccuracy: 0.1648
Test loss: 2.2648, Test acccuracy: 0.1779
--- Epoch 2 --- (With L2 regularization)
Train batch: 100, Train loss: 2.2473
Train batch: 200, Train loss: 2.2713
Train batch: 300, Train loss: 2.2885
Train batch: 400, Train loss: 2.2753
Train batch: 500, Train loss: 2.2510
Train batch: 600, Train loss: 2.2709
Train batch: 700, Train loss: 2.2473
Train loss: 2.2620, Train acccuracy: 0.1779
Test loss: 2.2634, Test acccuracy: 0.1774
--- Epoch 3 --- (With L2 regularization)
Train batch: 100, Train loss: 2.2799
Train batch: 200, Train loss: 2.2767
Train batch: 300, Train loss: 2.2599
Train batch: 400, Train loss: 2.2490
Train batch: 500, Train loss: 2.2733
Tr

### Results comparison

In [33]:
train_50 = np.loadtxt('output/train-50.txt')
test_50 = np.loadtxt('output/test-50.txt')
train_50_l2 = np.loadtxt('output/train-50-l2.txt')
test_50_l2 = np.loadtxt('output/test-50-l2.txt')

## 2. Adaboost 

### load data
`adult_train.csv`, `adult_test.csv`

In [None]:
train_data = np.loadtxt('data/adult_train.csv', delimiter=',')
test_data = np.loadtxt('data/adult_test.csv', delimiter=',')
idx_train, idx_test = np.random.choice(range(len(train_data)), 20000), np.random.choice(range(len(test_data)), 10000)
train_data, test_data = train_data[idx_train], test_data[idx_test]
X_train, y_train = train_data[:,:-1], train_data[:,-1]
X_test, y_test = test_data[:,:-1], test_data[:,-1]
# convert {0,1} into {-1,1}
y_train = np.sign(y_train - 0.5)
y_test = np.sign(y_test - 0.5)
print(X_train.shape, X_test.shape)

(20000, 14) (10000, 14)


### Compare effetc of self-implemented adaboost and built-in adaboost in sklearn

In [None]:
booster = MyAdaBoostClassifier(n_estimators=10, max_depth=3, rand_seed=1234)
booster.fit(X_train, y_train)

In [None]:
t = tree.DecisionTreeClassifier(max_depth=3)
t.fit(X_train, y_train)

ada = ensemble.AdaBoostClassifier(base_estimator=tree.DecisionTreeClassifier(max_depth=3), n_estimators=10)
ada.fit(X_train, y_train)


AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                         class_weight=None,
                                                         criterion='gini',
                                                         max_depth=3,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort='deprecated',
                          

In [None]:
# ensemble classifier accuracy v.s. single decision tree
print('my self-implemented adaboost: ', metrics.accuracy_score(booster.predict(X_test), y_test))
print('single decision tree:', metrics.accuracy_score(t.predict(X_test), y_test))
print('built-in adaboost in sklearn:', metrics.accuracy_score(ada.predict(X_test), y_test))

my self-implemented adaboost:  0.8429
single decision tree: 0.8417
built-in adaboost in sklearn: 0.8579
