Skip to content

Commit

Permalink
[unit test] refactor test tensor (#1005)
Browse files Browse the repository at this point in the history
* polish test_gpt

* update op unit tests

* update test model
  • Loading branch information
ver217 committed May 19, 2022
1 parent ad536e3 commit 8e3d0ad
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 290 deletions.
2 changes: 1 addition & 1 deletion tests/components_to_test/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from . import repeated_computed_layer, resnet, nested_model, bert, no_leaf_module, simple_net
from . import repeated_computed_layer, resnet, nested_model, bert, no_leaf_module, simple_net, gpt
79 changes: 79 additions & 0 deletions tests/components_to_test/gpt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import torch
import torch.nn as nn
from .registry import non_distributed_component_funcs
from transformers import GPT2Config, GPT2LMHeadModel
from .utils.dummy_data_generator import DummyDataGenerator
from colossalai.utils.cuda import get_current_device


class DummyDataLoader(DummyDataGenerator):
vocab_size = 50304
batch_size = 4
seq_len = 1024

def generate(self):
input_ids = torch.randint(0,
DummyDataLoader.vocab_size, (DummyDataLoader.batch_size, DummyDataLoader.seq_len),
device=get_current_device())
attention_mask = torch.ones_like(input_ids)
return input_ids, attention_mask


class GPTLMModel(nn.Module):

def __init__(self,
hidden_size=768,
num_layers=12,
num_attention_heads=12,
max_seq_len=1024,
vocab_size=50304,
checkpoint=False):
super().__init__()
self.checkpoint = checkpoint
self.model = GPT2LMHeadModel(
GPT2Config(n_embd=hidden_size,
n_layer=num_layers,
n_head=num_attention_heads,
n_positions=max_seq_len,
n_ctx=max_seq_len,
vocab_size=vocab_size,
resid_pdrop=0.0,
embd_pdrop=0.0,
attn_pdrop=0.0))
if checkpoint:
self.model.gradient_checkpointing_enable()

def forward(self, input_ids, attention_mask):
# Only return lm_logits
return self.model(input_ids=input_ids, attention_mask=attention_mask, use_cache=not self.checkpoint)[0]


def gpt2_s(checkpoint=True):
return GPTLMModel(checkpoint=checkpoint)


def gpt2_m(checkpoint=True):
return GPTLMModel(hidden_size=1024, num_layers=24, num_attention_heads=16, checkpoint=checkpoint)


class GPTLMLoss(nn.Module):

def __init__(self):
super().__init__()
self.loss_fn = nn.CrossEntropyLoss()

def forward(self, logits, labels):
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
# Flatten the tokens
return self.loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))


@non_distributed_component_funcs.register(name='gpt2')
def get_training_components():

trainloader = DummyDataLoader()
testloader = DummyDataLoader()

criterion = GPTLMLoss()
return gpt2_s, trainloader, testloader, torch.optim.Adam, criterion
30 changes: 30 additions & 0 deletions tests/test_tensor/_utils/_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
import os
import random
import numpy as np
import torch
import torch.distributed as dist
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode


def set_seed(seed):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True


def check_equal(A, B):
Expand All @@ -25,3 +39,19 @@ def broadcast_tensor_chunk(tensor, chunk_size=1, local_rank=0):

def tensor_equal(A, B):
return torch.allclose(A, B, rtol=1e-3, atol=1e-1)


def tensor_shard_equal(tensor: torch.Tensor, shard: torch.Tensor):
assert tensor.ndim == shard.ndim
if tensor.shape == shard.shape:
return tensor_equal(tensor, shard)
else:
dims_not_eq = torch.nonzero(torch.tensor(tensor.shape) != torch.tensor(shard.shape))
if dims_not_eq.numel() == 1:
# 1D shard
dim = dims_not_eq.item()
world_size = gpc.get_world_size(ParallelMode.PARALLEL_1D)
rank = gpc.get_local_rank(ParallelMode.PARALLEL_1D)
return tensor_equal(tensor.chunk(world_size, dim)[rank], shard)
else:
raise NotImplementedError
26 changes: 7 additions & 19 deletions tests/test_tensor/test_addmm_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from colossalai.utils import free_port
from functools import partial
from colossalai.core import global_context as gpc
from _utils import tensor_shard_equal, tensor_equal


class Conv1D(nn.Module):
Expand Down Expand Up @@ -45,13 +46,6 @@ def init_1d_row(weight, bias):
weight.set_spec(spec)


def check_grad_1d_row(model: torch.nn.Module, weight, bias):
rank = gpc.get_local_rank(ParallelMode.PARALLEL_1D)
size = gpc.get_world_size(ParallelMode.PARALLEL_1D)
assert torch.allclose(model.weight.grad.chunk(size, 0)[rank], weight.grad)
assert torch.allclose(model.bias.grad, bias.grad)


def init_1d_col(weight, bias):
spec = TensorSpec(
distspec.shard(gpc.get_group(ParallelMode.PARALLEL_1D), [-1], [gpc.get_world_size(ParallelMode.PARALLEL_1D)]),
Expand All @@ -61,33 +55,27 @@ def init_1d_col(weight, bias):
bias.set_spec(spec)


def check_grad_1d_col(model: torch.nn.Module, weight, bias):
rank = gpc.get_local_rank(ParallelMode.PARALLEL_1D)
size = gpc.get_world_size(ParallelMode.PARALLEL_1D)
assert torch.allclose(model.weight.grad.chunk(size, -1)[rank], weight.grad)
assert torch.allclose(model.bias.grad.chunk(size, -1)[rank], bias.grad)


def run_with_spec(spec_init_func, check_grad_func):
def run_with_spec(spec_init_func):
model = Conv1D(4, 16).cuda()
weight = ColoTensor(torch.nn.Parameter(model.weight.detach()))
bias = ColoTensor(torch.nn.Parameter(model.bias.detach()))
spec_init_func(weight, bias)
x = torch.rand(2, 16).cuda()
out = model(x)
colo_out = torch.addmm(bias, x, weight)
assert torch.allclose(out, colo_out)
assert tensor_equal(out, colo_out)
grad = torch.rand_like(out)
out.backward(grad)
colo_out.backward(grad)
check_grad_func(model, weight, bias)
tensor_shard_equal(model.weight.grad, weight.grad)
tensor_shard_equal(model.bias.grad, bias.grad)


def run_dist(rank, world_size, port):
config = dict(parallel=dict(tensor=dict(mode="1d", size=world_size),))
colossalai.launch(config=config, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
run_with_spec(init_1d_row, check_grad_1d_row)
run_with_spec(init_1d_col, check_grad_1d_col)
run_with_spec(init_1d_row)
run_with_spec(init_1d_col)


@pytest.mark.dist
Expand Down
23 changes: 6 additions & 17 deletions tests/test_tensor/test_embedding_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from colossalai.utils import free_port
from colossalai.core import global_context as gpc
from colossalai.tensor import TensorSpec, ComputePattern, ParallelAction, DistSpecManager
from _utils import tensor_equal, tensor_shard_equal


def init_1d_row(weight):
Expand All @@ -22,12 +23,6 @@ def init_1d_row(weight):
weight.set_spec(spec)


def check_grad_1d_row(model: torch.nn.Module, weight):
rank = gpc.get_local_rank(ParallelMode.PARALLEL_1D)
size = gpc.get_world_size(ParallelMode.PARALLEL_1D)
assert torch.allclose(model.weight.grad.chunk(size, 0)[rank], weight.grad)


def init_1d_col(weight):
spec = TensorSpec(
distspec.shard(gpc.get_group(ParallelMode.PARALLEL_1D), [-1], [gpc.get_world_size(ParallelMode.PARALLEL_1D)]),
Expand All @@ -36,31 +31,25 @@ def init_1d_col(weight):
weight.set_spec(spec)


def check_grad_1d_col(model: torch.nn.Module, weight):
rank = gpc.get_local_rank(ParallelMode.PARALLEL_1D)
size = gpc.get_world_size(ParallelMode.PARALLEL_1D)
assert torch.allclose(model.weight.grad.chunk(size, -1)[rank], weight.grad)


def run_with_spec(spec_init_func, check_grad_func):
def run_with_spec(spec_init_func):
model = torch.nn.Embedding(12, 32).cuda()
weight = ColoTensor(torch.nn.Parameter(model.weight.detach()))
spec_init_func(weight)
x = torch.tensor((0, 3, 6, 9)).cuda()
out = model(x)
colo_out = F.embedding(x, weight)
assert torch.allclose(out, colo_out)
assert tensor_equal(out, colo_out)
grad = torch.rand_like(out)
out.backward(grad)
colo_out.backward(grad)
check_grad_func(model, weight)
assert tensor_shard_equal(model.weight.grad, weight.grad)


def run_dist(rank, world_size, port):
config = dict(parallel=dict(tensor=dict(mode="1d", size=world_size),))
colossalai.launch(config=config, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
run_with_spec(init_1d_row, check_grad_1d_row)
run_with_spec(init_1d_col, check_grad_1d_col)
run_with_spec(init_1d_row)
run_with_spec(init_1d_col)


@pytest.mark.dist
Expand Down
Loading

0 comments on commit 8e3d0ad

Please sign in to comment.