Skip to content

Commit

Permalink
[chore] move fair_dev into fairscale (#1078)
Browse files Browse the repository at this point in the history
Co-authored-by: Min Xu <min.xu.public@gmail.com>
  • Loading branch information
min-xu-ai and flying-x committed Sep 24, 2022
1 parent bfd57ff commit 8f8f8ef
Show file tree
Hide file tree
Showing 63 changed files with 88 additions and 63 deletions.
2 changes: 1 addition & 1 deletion benchmarks/experimental/experimental_async_approaches.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
import torchtext
from torchtext.data.utils import get_tokenizer

from fair_dev.testing.testing import dist_init, get_worker_map
from fairscale.experimental.nn.ampnet_pipe import pipe
from fairscale.fair_dev.testing.testing import dist_init, get_worker_map
from fairscale.nn.model_parallel import initialize_model_parallel
from fairscale.nn.model_parallel.initialize import get_pipeline_parallel_group
from fairscale.nn.pipe import LazyModule
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import utils

from benchmarks.golden_configs.lm_wikitext2 import Pipe as lm_wikitext2
from fair_dev.testing.testing import dist_init
from fairscale.fair_dev.testing.testing import dist_init
from fairscale.nn import Pipe
from fairscale.nn.model_parallel import initialize_model_parallel

Expand Down
4 changes: 4 additions & 0 deletions fairscale/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
NOTE:

The experimental and fair_dev submodules are not part of the fairscale public
API. There can be breaking changes in them at anytime.
4 changes: 4 additions & 0 deletions fairscale/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

################################################################################
# Import most common subpackages
#
# NOTE: we don't maintain any public APIs in both experimental and fair_dev
# sub-modules. Code in them are experimental or for developer only. They
# can be changed, removed, anytime.
################################################################################

from typing import List
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader, Dataset

from fair_dev.testing.testing import get_worker_map, torch_spawn
from fairscale.experimental.nn.ampnet_pipe.pipe import AMPnetPipe
from fairscale.fair_dev.testing.testing import get_worker_map, torch_spawn


class MySGD(Optimizer):
Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/nn/data_parallel/test_gossip.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import torch.distributed
import torch.nn.functional as F

from fair_dev.testing.testing import skip_if_single_gpu, spawn_for_all_world_sizes
import fairscale.experimental.nn.data_parallel.gossip as gossip
from fairscale.fair_dev.testing.testing import skip_if_single_gpu, spawn_for_all_world_sizes

# Enfore CUBLAS reproducibility, see https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/nn/test_mevo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import pytest
import torch

from fair_dev.testing.testing import skip_if_no_cuda
from fairscale.experimental.nn import MEVO
from fairscale.experimental.nn.mevo import BaselineSoftmaxNllLoss, get_data
from fairscale.fair_dev.testing.testing import skip_if_no_cuda


@pytest.fixture(scope="session", params=[torch.float16, torch.float32])
Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/nn/test_multiprocess_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
import torch.multiprocessing as mp
import torch.nn as nn

from fair_dev.testing.testing import skip_due_to_flakyness, skip_if_single_gpu
from fairscale.experimental.nn.distributed_pipeline import DistributedLoss, DistributedPipeline, PipelineModulesGraph
from fairscale.fair_dev.testing.testing import skip_due_to_flakyness, skip_if_single_gpu
from fairscale.internal import torch_version

pytestmark = pytest.mark.skipif(
Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/nn/test_offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
import pytest
import torch

from fair_dev.testing.testing import skip_if_no_cuda
from fairscale.experimental.nn.offload import OffloadModel
from fairscale.fair_dev.testing.testing import skip_if_no_cuda
from fairscale.internal import torch_version

if torch_version() >= (1, 8, 0):
Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/tooling/test_layer_memory_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
import torch.nn as nn
from torch.nn.parallel import DistributedDataParallel

from fair_dev.testing.testing import GPT2, dist_init, skip_if_no_cuda, skip_if_single_gpu, temp_files_ctx
from fairscale.experimental.tooling.layer_memory_tracker import (
LayerwiseMemoryTracker,
ProcessGroupTracker,
find_best_reset_points,
)
from fairscale.fair_dev.testing.testing import GPT2, dist_init, skip_if_no_cuda, skip_if_single_gpu, temp_files_ctx
from fairscale.nn import FullyShardedDataParallel


Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/wgit/test_sha1_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import torch
from torch import nn

from fair_dev.testing.testing import objects_are_equal
from fairscale.experimental.wgit.sha1_store import SHA1_Store
from fairscale.fair_dev.testing.testing import objects_are_equal

# Get the absolute path of the parent at the beginning before any os.chdir(),
# so that we can proper clean it up at any CWD.
Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/wgit/test_signal_sparsity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import pytest
import torch

from fair_dev.testing.testing import objects_are_equal
from fairscale.experimental.wgit.signal_sparsity import SignalSparsity, random_sparse_mask
from fairscale.fair_dev.testing.testing import objects_are_equal

# Our own tolerance
ATOL = 1e-6
Expand Down
2 changes: 1 addition & 1 deletion tests/experimental/wgit/test_signal_sparsity_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import pytest
import torch

from fair_dev.testing.testing import objects_are_equal, skip_if_no_cuda
from fairscale.experimental.wgit.signal_sparsity_profiling import EnergyConcentrationProfile as ECP
from fairscale.fair_dev.testing.testing import objects_are_equal, skip_if_no_cuda

# Our own tolerance
ATOL = 1e-6
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/checkpoint/test_checkpoint_activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import torch.nn as nn
from torch.utils.checkpoint import checkpoint as torch_checkpoint_wrapper

from fair_dev.testing.testing import skip_if_no_cuda
from fairscale.fair_dev.testing.testing import skip_if_no_cuda
from fairscale.internal import torch_version
from fairscale.nn.checkpoint.checkpoint_activations import checkpoint_wrapper, disable_checkpointing
from fairscale.nn.misc import FlattenParamsWrapper
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/checkpoint/test_checkpoint_activations_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from torch.nn import BatchNorm2d, LayerNorm, Linear, Sequential
from torch.optim import SGD

from fair_dev.testing.testing import objects_are_equal
from fairscale.fair_dev.testing.testing import objects_are_equal
from fairscale.internal import torch_version
from fairscale.nn.checkpoint.checkpoint_activations import checkpoint_wrapper

Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch import nn
import torch.distributed

from fair_dev.testing.testing import (
from fairscale.fair_dev.testing.testing import (
DeviceAndTypeCheckModule,
DummyProcessGroup,
dist_init,
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_freezing_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from torch.nn.parallel import DistributedDataParallel
import torch.optim as optim

from fair_dev.testing.testing import dist_init, objects_are_equal, rmf, skip_if_single_gpu, teardown
from fairscale.fair_dev.testing.testing import dist_init, objects_are_equal, rmf, skip_if_single_gpu, teardown
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP


Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_grad_acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from parameterized import parameterized
import torch

from fair_dev.testing.testing import DummyProcessGroup, make_cudnn_deterministic, objects_are_equal
from fairscale.fair_dev.testing.testing import DummyProcessGroup, make_cudnn_deterministic, objects_are_equal
from fairscale.nn.data_parallel import FullyShardedDataParallel

from .test_fsdp import DistributedTest, NestedWrappedModule, rename_test, spawn_and_init
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_hf_transformer_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch
from torch import nn

from fair_dev.testing.testing import dist_init
from fairscale.fair_dev.testing.testing import dist_init
from fairscale.nn import FullyShardedDataParallel as FSDP
from fairscale.nn import auto_wrap, enable_wrap

Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from torch.nn import Linear, Module
from torch.optim import SGD

from fair_dev.testing.testing import dist_init, rmf, skip_if_no_cuda, teardown
from fairscale.fair_dev.testing.testing import dist_init, rmf, skip_if_no_cuda, teardown
from fairscale.internal import torch_version
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
from fairscale.nn.data_parallel import TrainingState
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch.nn.parallel import DistributedDataParallel
import torch.optim as optim

from fair_dev.testing.testing import dist_init, dump_all_tensors, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.fair_dev.testing.testing import dist_init, dump_all_tensors, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.internal import torch_version
from fairscale.internal.parallel import get_process_group_cached
from fairscale.nn import checkpoint_wrapper
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import torch.nn as nn
from torch.optim import Adam

from fair_dev.testing.testing import in_temporary_directory, skip_if_single_gpu, temp_files_ctx
from fairscale.fair_dev.testing.testing import in_temporary_directory, skip_if_single_gpu, temp_files_ctx
from fairscale.nn import FullyShardedDataParallel
from tests.nn.data_parallel.test_fsdp import DistributedTest, MixtureOfExperts, rename_test, spawn_and_init

Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_multiple_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from torch.nn import Linear, Module
from torch.optim import SGD

from fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown
from fairscale.fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown
from fairscale.internal import torch_version
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
from fairscale.nn.data_parallel import TrainingState
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from torch.nn.parallel import DistributedDataParallel
import torch.optim as optim

from fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.internal import torch_version
from fairscale.nn import checkpoint_wrapper
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_multiple_wrapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from torch.nn import Linear, Module, Sequential
from torch.optim import SGD

from fair_dev.testing.testing import dist_init, skip_if_no_cuda, teardown
from fairscale.fair_dev.testing.testing import dist_init, skip_if_no_cuda, teardown
from fairscale.internal import torch_version
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
from fairscale.nn.data_parallel import TrainingState
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
pytestmark = pytest.mark.skipif(True, reason=ie.msg)
pass

from fair_dev.testing.testing import dist_init, spawn_for_all_world_sizes
from fairscale.fair_dev.testing.testing import dist_init, spawn_for_all_world_sizes
from fairscale.nn.checkpoint.checkpoint_activations import checkpoint_wrapper
from fairscale.nn.data_parallel import FullyShardedDataParallel, OffloadConfig, TrainingState

Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_optimizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from torch import nn
from torch.optim import SGD, Adadelta, Adam # type: ignore

from fair_dev.testing.testing import dist_init, objects_are_equal, spawn_for_all_world_sizes
from fairscale.fair_dev.testing.testing import dist_init, objects_are_equal, spawn_for_all_world_sizes
from fairscale.internal.params import recursive_copy_to_device
from fairscale.nn.data_parallel import FullyShardedDataParallel, get_fsdp_instances
from fairscale.nn.data_parallel.fsdp_optim_utils import is_singleton_tensor
Expand Down
8 changes: 7 additions & 1 deletion tests/nn/data_parallel/test_fsdp_overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@
import torch.multiprocessing as mp
import torch.nn as nn

from fair_dev.testing.testing import dist_init, get_cycles_per_ms, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.fair_dev.testing.testing import (
dist_init,
get_cycles_per_ms,
skip_if_single_gpu,
teardown,
temp_files_ctx,
)
from fairscale.internal import torch_version
from fairscale.nn import enable_wrap, wrap
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_pre_backward_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import torch
from torch.nn import Linear, Module

from fair_dev.testing.testing import dist_init, skip_if_no_cuda, teardown, temp_files_ctx
from fairscale.fair_dev.testing.testing import dist_init, skip_if_no_cuda, teardown, temp_files_ctx
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP


Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_regnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import SGD

from fair_dev.testing.testing import (
from fairscale.fair_dev.testing.testing import (
dist_init,
objects_are_equal,
rmf,
Expand Down
8 changes: 7 additions & 1 deletion tests/nn/data_parallel/test_fsdp_shared_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@
from torch.nn import Linear, Module
from torch.optim import SGD

from fair_dev.testing.testing import dist_init, objects_are_equal, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.fair_dev.testing.testing import (
dist_init,
objects_are_equal,
skip_if_single_gpu,
teardown,
temp_files_ctx,
)
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP


Expand Down
4 changes: 2 additions & 2 deletions tests/nn/data_parallel/test_fsdp_shared_weights_mevo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
import torch.multiprocessing as mp
from torch.optim import SGD

from fair_dev.testing.testing import (
from fairscale.experimental.nn import MEVO
from fairscale.fair_dev.testing.testing import (
dist_init,
in_circle_ci,
objects_are_equal,
skip_if_single_gpu,
teardown,
temp_files_ctx,
)
from fairscale.experimental.nn import MEVO
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP

VOCAB = 4
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_state_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import torch
from torch import nn

from fair_dev.testing.testing import dist_init, objects_are_equal, skip_if_cuda, teardown, temp_files_ctx
from fairscale.fair_dev.testing.testing import dist_init, objects_are_equal, skip_if_cuda, teardown, temp_files_ctx
from fairscale.internal import torch_version
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP

Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_fsdp_uneven.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch.nn import Linear, Sequential
from torch.optim import SGD

from fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown
from fairscale.fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown
from fairscale.internal import torch_version
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
from fairscale.nn.data_parallel.fully_sharded_data_parallel import TrainingState
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import torch.distributed
import torch.multiprocessing as mp

from fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.fair_dev.testing.testing import dist_init, skip_if_single_gpu, teardown, temp_files_ctx
from fairscale.nn.checkpoint.checkpoint_activations import checkpoint_wrapper
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP

Expand Down
2 changes: 1 addition & 1 deletion tests/nn/data_parallel/test_sharded_ddp_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import torch.multiprocessing as mp
from torch.nn import Linear, Sequential

from fair_dev.testing.testing import (
from fairscale.fair_dev.testing.testing import (
GPT2,
SGDWithPausingCompute,
available_devices,
Expand Down
7 changes: 6 additions & 1 deletion tests/nn/data_parallel/test_sharded_ddp_pytorch_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
from torch.nn import Linear, Sequential
from torch.nn.parallel import DistributedDataParallel as DDP

from fair_dev.testing.testing import check_same_model_params, skip_if_no_cuda, skip_if_single_gpu, temp_files_ctx
from fairscale.fair_dev.testing.testing import (
check_same_model_params,
skip_if_no_cuda,
skip_if_single_gpu,
temp_files_ctx,
)
from fairscale.internal import torch_version
from fairscale.nn.data_parallel import ShardedDataParallel
from fairscale.optim import OSS
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/misc/test_flatten_params_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import torch

from fair_dev.testing.testing import objects_are_equal
from fairscale.fair_dev.testing.testing import objects_are_equal
from fairscale.nn import FlattenParamsWrapper


Expand Down
2 changes: 1 addition & 1 deletion tests/nn/model_parallel/test_cross_entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import torch
import torch.nn.functional as F

from fair_dev.testing.testing import IdentityLayer, dist_init, set_random_seed, spawn_for_all_world_sizes
from fairscale.fair_dev.testing.testing import IdentityLayer, dist_init, set_random_seed, spawn_for_all_world_sizes
from fairscale.nn.model_parallel import initialize as mpu
from fairscale.nn.model_parallel.cross_entropy import vocab_parallel_cross_entropy
from fairscale.nn.model_parallel.mappings import scatter_to_model_parallel_region
Expand Down
2 changes: 1 addition & 1 deletion tests/nn/model_parallel/test_initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import torch

from fair_dev.testing.testing import dist_init, spawn_for_all_world_sizes
from fairscale.fair_dev.testing.testing import dist_init, spawn_for_all_world_sizes
from fairscale.nn.model_parallel import initialize as mpu


Expand Down

0 comments on commit 8f8f8ef

Please sign in to comment.