In [175]:
import sys
import copy
from datetime import datetime

import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torch.optim.optimizer import Optimizer, required

from logger import Logger

from solver import Solver
from crossngover import CrossN

torch.manual_seed(0)

<torch._C.Generator at 0x7fe110fc0dd0>

In [176]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=False, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=256,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [184]:
len(trainloader.dataset.data)

50000

In [190]:
trainset.data.shape

(50000, 32, 32, 3)

In [186]:
train_one, train_two = torch.utils.data.random_split(trainset, [40000,10000])

In [194]:
train_one

<torch.utils.data.dataset.Subset at 0x7fe0ec06a6a0>

In [192]:
train_one_one = torch.utils.data.DataLoader(train_one, batch_size=256,
                                         shuffle=False, num_workers=2)

In [193]:
train_one_one

<torch.utils.data.dataloader.DataLoader at 0x7fe19fd2c278>

In [3]:
def train_models(params, net, device=0):
    
    mode = params['mode']
    evo_step = int(params['evo_step'])
    
    experiment_note = ''
    path = ''
    for key in params:
        experiment_note += key +'_'+ params[key]+'\n'
        path +=  '_'+ params[key]
    

    logger = Logger(path, experiment_note)
    
    print(logger.path)
    print(experiment_note)

    lr = 0.001

    optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0001)
    criterion = nn.CrossEntropyLoss()
    evo_optim = CrossN()

    def validation(net, dataloader, device=0):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in dataloader:
                images, labels = data
                images = images.cuda(device)
                labels = labels.cuda(device)
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        return 100.0 * correct / total

    
    solver = Solver(
        net,
        optimizer,
        logger,
        criterion,
        validation,
        evo_optim, 
        trainloader,
        testloader,
        testloader,  
        epochs=30,
        evo_step=evo_step,
        child_count=20,
        best_child_count=3,
        mode=mode,
        debug=True,
        lr=lr,
        device=device)

    logger.add_post_result(f'start: {datetime.now()}')
    solver.start()
    logger.add_post_result(f'finish: {datetime.now()}')
    torch.save(net.state_dict(), logger.path + '/model_last.chk')
    logger.close()
    
    

def train_three_types(model, TF, name):
    modes = ['evo_only', 'gradient', 'evo_cross']
    
    evo_step = 10

    for mode in modes: 
#         orig_stdout = sys.stdout
#         f = open(f'outputs/{name}_{mode}.txt', 'w')
#         sys.stdout = f

        
        params = {'net_name':name,
             'preptrained':TF,
             'mode':mode,
             'evo_step':str(evo_step)}
        temp_model = copy.deepcopy(model)
        train_models(params, temp_model)
#         sys.stdout = orig_stdout
#         f.close()
        torch.cuda.empty_cache()
    print('Finished')

In [195]:
net = torchvision.models.resnet18(pretrained=False)

num_ftrs = net.fc.in_features

classes = ('plane', 'car', 'bird', 'cat',
        'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

net.classifier = nn.Linear(num_ftrs, len(classes))
net.cuda()
print()




In [207]:
state = net.state_dict()
len(state)

124

In [208]:
params = list(net.parameters())

In [227]:
net.layer1

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [203]:
state['conv1.weight'].grad

In [170]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

In [171]:
optim_d = optimizer.state_dict()

In [172]:
optim_d

{'state': {},
 'param_groups': [{'lr': 0.001,
   'betas': (0.9, 0.999),
   'eps': 1e-08,
   'weight_decay': 0,
   'amsgrad': False,
   'params': [140604486520696,
    140604483471616,
    140604483472624,
    140604483472552,
    140604483472696,
    140604483470464,
    140604483470320,
    140604483469960,
    140604483473344,
    140604483729592,
    140604483729952,
    140604483729808,
    140604486269400,
    140604484021488,
    140604484019904,
    140604481841120,
    140604484685112,
    140604486250288,
    140604481663192,
    140604481663048,
    140604481663552,
    140604484265520,
    140604483976864,
    140604486539160,
    140604481663984,
    140604481664056,
    140604481664200,
    140604481664488,
    140604481664416,
    140604481664632,
    140604481665568,
    140604481665496,
    140604481665712,
    140604481666072,
    140604481666000,
    140604481666216,
    140604481664992,
    140604481664920,
    140604481665136,
    140604481666432,
    14060448166542

In [168]:
for l in state:
    if 'conv' in l.lower():
        print(l)
        print(state[l].size())

conv1.weight
torch.Size([64, 3, 7, 7])
layer1.0.conv1.weight
torch.Size([64, 64, 3, 3])
layer1.0.conv2.weight
torch.Size([64, 64, 3, 3])
layer1.1.conv1.weight
torch.Size([64, 64, 3, 3])
layer1.1.conv2.weight
torch.Size([64, 64, 3, 3])
layer2.0.conv1.weight
torch.Size([128, 64, 3, 3])
layer2.0.conv2.weight
torch.Size([128, 128, 3, 3])
layer2.1.conv1.weight
torch.Size([128, 128, 3, 3])
layer2.1.conv2.weight
torch.Size([128, 128, 3, 3])
layer3.0.conv1.weight
torch.Size([256, 128, 3, 3])
layer3.0.conv2.weight
torch.Size([256, 256, 3, 3])
layer3.1.conv1.weight
torch.Size([256, 256, 3, 3])
layer3.1.conv2.weight
torch.Size([256, 256, 3, 3])
layer4.0.conv1.weight
torch.Size([512, 256, 3, 3])
layer4.0.conv2.weight
torch.Size([512, 512, 3, 3])
layer4.1.conv1.weight
torch.Size([512, 512, 3, 3])
layer4.1.conv2.weight
torch.Size([512, 512, 3, 3])


In [157]:
net.conv1

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [5]:
train_three_types(net, 'F', 'resnet18_256_test_lr')

./experiments/2020-01-31/_resnet18_256_test_lr_F_evo_only_10_23:48
net_name_resnet18_256_test_lr
preptrained_F
mode_evo_only
evo_step_10

Start training

first test
started score - 0.0
Epoch: 0	 Iterations: 0


TypeError: cuda(): argument 'device' (position 1) must be torch.device, not CrossEntropyLoss

mobilenet from F:
- Evo + Cross - 
- Evo Only - 
- Standard - 

resnet from F:
- Evo + Cross  - 
- Evo Only - 
- Standard - 

mobilenet pretrained T:
- Evo + Cross - 
- Evo Only  - 
- Standard - 

resnet pretrained T:
- Evo + Cross  
- Evo Only 
- Standard 

In [205]:
grad_of_params = {}
for name, parameter in net.named_parameters():
    grad_of_param[name] = parameter.grad

NameError: name 'grad_of_param' is not defined

In [2]:
torch.distributions.Uniform()

NameError: name 'loc' is not defined

In [2]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
#from torchviz import make_dot
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split

device = 'cuda' if torch.cuda.is_available() else 'cpu'

np.random.seed(42)
x = np.random.rand(100, 1)
true_a, true_b = 1, 2
y = true_a + true_b*x + 0.1*np.random.randn(100, 1)

x_tensor = torch.from_numpy(x).float()
y_tensor = torch.from_numpy(y).float()

class CustomDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.x)

dataset = TensorDataset(x_tensor, y_tensor) # dataset = CustomDataset(x_tensor, y_tensor)

train_dataset, val_dataset = random_split(dataset, [80, 20])

train_loader = DataLoader(dataset=train_dataset, batch_size=16)
val_loader = DataLoader(dataset=val_dataset, batch_size=20)


def pr(m):
    if 'Linear' == m.__class__.__name__:
        print(m.weight.grad)

class ManualLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)
        self.linear_two = nn.Linear(1, 2)
        self.linear_one = nn.Linear(2, 1)

    def forward(self, x):
        x = self.linear(x)
        x = self.linear_two(x)
        x =  self.linear_one(x)
        return self.linear(x)

def make_train_step(model, loss_fn, optimizer):
    def train_step(x, y):
        optimizer.zero_grad()
        model.train()
        yhat = model(x)
        loss = loss_fn(y, yhat)
        loss.backward()
        #model.apply(pr)
        optimizer.step()
        return loss.item()
    return train_step

# Estimate a and b
torch.manual_seed(42)

model = ManualLinearRegression().to(device) # model = nn.Sequential(nn.Linear(1, 1)).to(device)
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=1e-1)
train_step = make_train_step(model, loss_fn, optimizer)

n_epochs = 100
training_losses = []
validation_losses = []
print(model.state_dict())

for epoch in range(n_epochs):
    batch_losses = []
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        loss = train_step(x_batch, y_batch)
        batch_losses.append(loss)
    training_loss = np.mean(batch_losses)
    training_losses.append(training_loss)

    with torch.no_grad():
        val_losses = []
        for x_val, y_val in val_loader:
            x_val = x_val.to(device)
            y_val = y_val.to(device)
            model.eval()
            yhat = model(x_val)
            val_loss = loss_fn(y_val, yhat).item()
            val_losses.append(val_loss)
        validation_loss = np.mean(val_losses)
        validation_losses.append(validation_loss)

    #print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")

#print(model.state_dict())


OrderedDict([('linear.weight', tensor([[0.7645]], device='cuda:0')), ('linear.bias', tensor([0.8300], device='cuda:0')), ('linear_two.weight', tensor([[-0.2343],
        [ 0.9186]], device='cuda:0')), ('linear_two.bias', tensor([-0.2191,  0.2018], device='cuda:0')), ('linear_one.weight', tensor([[-0.3443,  0.4153]], device='cuda:0')), ('linear_one.bias', tensor([0.6233], device='cuda:0'))])


In [68]:
def pro(m): 
    if 'Linear' == m.__class__.__name__:
        u = torch.distributions.Uniform(0,1)
        uni = u.sample(sample_shape=m.weight.grad.size()).cuda(0)
        print(f'UNI:{uni}, GRAD:{m.weight.grad}')
        print(f'PROD:{uni*m.weight.grad}')
        pass
    

In [69]:
model.apply(pro) #.linear.weight.grad

UNI:tensor([[0.1352]], device='cuda:0'), GRAD:tensor([[-1.5181]], device='cuda:0')
PROD:tensor([[-0.2053]], device='cuda:0')
UNI:tensor([[0.9012],
        [0.8918]], device='cuda:0'), GRAD:tensor([[ 0.2293],
        [-0.7210]], device='cuda:0')
PROD:tensor([[ 0.2067],
        [-0.6430]], device='cuda:0')
UNI:tensor([[0.1182, 0.4613]], device='cuda:0'), GRAD:tensor([[ 0.1151, -0.3553]], device='cuda:0')
PROD:tensor([[ 0.0136, -0.1639]], device='cuda:0')


ManualLinearRegression(
  (linear): Linear(in_features=1, out_features=1, bias=True)
  (linear_two): Linear(in_features=1, out_features=2, bias=True)
  (linear_one): Linear(in_features=2, out_features=1, bias=True)
)

In [13]:
model

ManualLinearRegression(
  (linear): Linear(in_features=1, out_features=1, bias=True)
  (linear_two): Linear(in_features=1, out_features=2, bias=True)
  (linear_one): Linear(in_features=2, out_features=1, bias=True)
)

m = torch.distributions.Uniform(0,1)

In [83]:
import torch

torch.manual_seed(0)
u = torch.distributions.Uniform(-1,1)
u.sample()

tensor(-0.0075)

In [84]:
u.sample()

tensor(0.5364)

In [85]:
u.sample()

tensor(-0.8230)