In [1]:

import os
import os.path as osp
import numpy as np
import tqdm
from tqdm.contrib import tenumerate
from utils import load_data, ModelTorch
import pickle as pkl

import torch.utils.data
import torch
import torch.nn as nn

# Main Train

In [2]:
eval_every = 0.1
def train(batch_size, lr, loss_lambda, num_epochs, save_dir):    
    os.makedirs(save_dir, exist_ok=True)
    # data
    train_samples, test_samples = load_data('data1.mat')
    train_data = np.stack([sample['x'] for sample in train_samples])
    generator = torch.Generator('cpu')
    generator.manual_seed(42)
    train_loader = torch.utils.data.DataLoader(
        train_samples, batch_size=batch_size, shuffle=True, generator=generator
    )

    test_loader = torch.utils.data.DataLoader(
        test_samples, batch_size=1, shuffle=True
    )


    # model
    model = ModelTorch(1, train_data=train_data, loss_lambda=loss_lambda)
    optim = torch.optim.SGD(model.parameters(), lr=lr)
    accs = []
    epoch_losses = []

    def eval():
        num_correct, num_samples = 0, 0
        for i, batch in tenumerate(test_loader):
            x = batch['x']
            y = batch['y']
            pred = model(x)
            pred = pred > 0
            gt = y > 0
            num_correct += torch.sum(gt == pred)
            num_samples += y.shape[0]
        acc = num_correct / num_samples
        return acc

    for epoch in range(num_epochs):    
        epoch_loss, num_epoch_sample = 0, 0
        for i, batch in tenumerate(train_loader):
            x = batch['x']
            y = batch['y']
            pred = model(x)
            loss = - torch.mean(torch.log(torch.sigmoid(y * pred)))
            loss.backward()
            optim.step()
            optim.zero_grad()
            epoch_loss += loss.item()
            num_epoch_sample += x.shape[0]
            if i != 0 and i % int(len(train_loader) * eval_every) == 0:
                print(f'evaluating on {epoch}, step {i} ')
                acc = eval()
                print(f'step loss: {loss.item()}')
                print(f'acc: {100 * acc: .02f}% on epoch {epoch}, step {i}')

            # pred = model.predict(x)
            # pred = pred > 0
            # print(pred.sum() / pred.shape[0])
            # gt = y > 0
            # print(f'acc: {100 * np.sum(gt == pred) / x.shape[0]: .02f}%')    
        # print(epoch_loss)
        epoch_losses.append(epoch_loss)

        with open(osp.join(save_dir, f'weight_epoch{epoch}.pkl'), 'wb') as f:
            pkl.dump(model.weights, f)
        acc = eval()
        accs.append(acc)
        print(f'acc: {100 * acc: .02f}% on epoch {epoch}')

    return epoch_losses, accs
            

In [3]:
experiment_result_dict = {}
loss_lambda=0
num_epochs=1
lr=0.01

# Batch Size 1 Setting

In [4]:
epoch_losses, accs = train(
    batch_size = 1,
    save_dir = 'sgd_bsz1',
    lr = lr,
    loss_lambda = loss_lambda,
    num_epochs=num_epochs,
)

experiment_result_dict['batch_size=1'] = {
    'accs': accs,
    'epoch_losses': epoch_losses
}


10.077141124806595


  0%|          | 0/10000 [00:00<?, ?it/s]

evaluating on 0, step 1000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: 0.009443746879696846
acc:  71.40% on epoch 0, step 1000
evaluating on 0, step 2000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: 3.3790829181671143
acc:  51.40% on epoch 0, step 2000
evaluating on 0, step 3000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: -0.0
acc:  50.20% on epoch 0, step 3000
evaluating on 0, step 4000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: 1.1251670122146606
acc:  62.80% on epoch 0, step 4000
evaluating on 0, step 5000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: -0.0
acc:  86.90% on epoch 0, step 5000
evaluating on 0, step 6000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: -0.0
acc:  68.90% on epoch 0, step 6000
evaluating on 0, step 7000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: -0.0
acc:  52.10% on epoch 0, step 7000
evaluating on 0, step 8000 


  0%|          | 0/1000 [00:00<?, ?it/s]

step loss: -0.0
acc:  88.40% on epoch 0, step 8000
evaluating on 0, step 9000 


  0%|          | 0/1000 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Batch Size 10

In [None]:
epoch_losses, accs = train(
    batch_size = 10,
    save_dir = 'sgd_bsz10',
    lr = lr, # seems like need to down scale the learning rate for batch size 100???
    loss_lambda = 0,
    num_epochs = num_epochs,
)
# converges when bsz = 15
# doesn't converges  when bsz >= 20
experiment_result_dict['batch_size=100'] = {
    'accs': accs,
    'epoch_losses': epoch_losses
}


# Batch Size 100

In [None]:
epoch_losses, accs = train(
    batch_size = 100,
    save_dir = 'sgd_bsz100',
    lr = lr, # seems like need to down scale the learning rate for batch size 100???
    loss_lambda = 0,
    num_epochs = num_epochs,
)
# converges when bsz = 15
# doesn't converges  when bsz >= 20
experiment_result_dict['batch_size=100'] = {
    'accs': accs,
    'epoch_losses': epoch_losses
}


# Gradient Descent

In [None]:
epoch_losses, accs = train(
    batch_size = 10000,
    save_dir = 'gd',
    lr = lr ,
    loss_lambda = loss_lambda,
    num_epochs=num_epochs,
)

experiment_result_dict['gradient_descent'] = {
    'accs': accs,
    'epoch_losses': epoch_losses
}


# Plotting

In [None]:
experiment_result_dict

In [None]:
# accs
from matplotlib import pyplot as plt

for experiment_name, experiment_result in experiment_result_dict.items():
    plt.plot(experiment_result['accs'], label=experiment_name)

plt.title('sgd accuracy curve')
plt.show()


# epoch losses

for experiment_name, experiment_result in experiment_result_dict.items():
    plt.plot(np.array(experiment_result['epoch_losses']), label=experiment_name)

plt.title('epoch losses')
plt.legend()
plt.show()

# Train BFGS