Skip to content

Commit

Permalink
Merge branch 'master' into dynamic
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorghi12 committed Jul 23, 2018
2 parents 3425f66 + 6c4068e commit b4f6ff9
Show file tree
Hide file tree
Showing 224 changed files with 3,510 additions and 2,200 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ torch/csrc/nn/THNN.cpp
torch/csrc/nn/THNN.cwrap
torch/lib/*.a*
torch/lib/*.dll*
torch/lib/*.exe*
torch/lib/*.dylib*
torch/lib/*.h
torch/lib/*.lib
Expand Down
9 changes: 6 additions & 3 deletions .jenkins/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,13 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
(cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)")
fi

export ATEN_DISABLE_AVX=
export ATEN_DISABLE_AVX2=
if [[ "${JOB_BASE_NAME}" == *-NO_AVX-* ]]; then
export ATEN_CPU_CAPABILITY=default
elif [[ "${JOB_BASE_NAME}" == *-NO_AVX2-* ]]; then
export ATEN_CPU_CAPABILITY=avx
export ATEN_DISABLE_AVX=1
fi
if [[ "${JOB_BASE_NAME}" == *-NO_AVX2-* ]]; then
export ATEN_DISABLE_AVX2=1
fi

test_python_nn() {
Expand Down
4 changes: 0 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,6 @@ endif()
# ---[ CMake scripts + modules
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)

if (MSVC AND ${BUILD_SHARED_LIBS})
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()

# ---[ CMake build directories
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/Backtrace.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <ATen/optional.h>
#include <ATen/Backtrace.h>

#include <functional>
#include <memory>
Expand Down
6 changes: 4 additions & 2 deletions aten/src/ATen/Backtrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
#include <string>
#include <typeinfo>

#include <ATen/ATenGeneral.h>

namespace at {
/// Utility to demangle a C++ symbol name.
std::string demangle(const char* name);
AT_API std::string demangle(const char* name);

/// Returns the printable name of the type.
template <typename T>
Expand All @@ -19,7 +21,7 @@ inline const char* demangle_type() {
#endif // __GXX_RTTI
}

std::string get_backtrace(
AT_API std::string get_backtrace(
size_t frames_to_skip = 0,
size_t maximum_number_of_frames = 64,
bool skip_python_frames = true);
Expand Down
110 changes: 0 additions & 110 deletions aten/src/ATen/CUDAGuard.h
Original file line number Diff line number Diff line change
@@ -1,110 +0,0 @@
#pragma once

#include <ATen/ArrayRef.h>
#include <ATen/CUDAStream.h>
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>

#include <cstddef>
#include <vector>

namespace at {

/// A variant of `DeviceGuard` that augments it with an understanding of CUDA
/// streams. This guard can not only set and reset the current CUDA device, but
/// also set and reset the current CUDA stream. It is important to note that
/// because a CUDA stream is intrinsically associated with the CUDA device to
/// which it is bound, setting the CUDA stream *also* sets the current CUDA
/// device to that of the stream.
struct CUDAGuard {
/// Default constructor, does nothing and causes no change in the current
/// stream or device until `set_stream` or `set_device` is called.
CUDAGuard() = default;

/// Sets the CUDA stream and its associated device as the current one (calls
/// `set_stream`).
explicit CUDAGuard(const CUDAStream& stream) {
set_stream(stream);
}

/// Calls `set_device` with the given index.
explicit CUDAGuard(int32_t device) {
set_device(device);
}

CUDAGuard(const CUDAGuard&) = delete;
CUDAGuard& operator=(const CUDAGuard&) = delete;

/// Move-constructs this `CUDAGuard` from another `CUDAGuard`. The
/// moved-from `CUDAGuard` is modified such that its destruction has no
/// effect (does not reset the stream or device).
CUDAGuard(CUDAGuard&& other) noexcept = default;

/// Move-assigns this `CUDAGuard` from another `CUDAGuard`. The
/// moved-from `CUDAGuard` is modified such that its destruction has no
/// effect (does not reset the stream or device).
CUDAGuard& operator=(CUDAGuard&& other) {
device_guard_ = std::move(other.device_guard_);
original_streams_ = std::move(other.original_streams_);
other.original_streams_.clear();
return *this;
}

/// Resets the CUDA stream on each device to the one that was active upon
/// construction.
~CUDAGuard() {
if (!original_streams_.empty()) {
for (size_t device = 0; device < original_streams_.size(); ++device) {
globalContext().uncheckedSetCurrentCUDAStreamOnDevice(
device, original_streams_[device]);
}
}
}

/// Sets the current CUDA device to the device associated with the given
/// stream, and then sets the current stream on that device to the one given.
void set_stream(const CUDAStream& stream) {
device_guard_.set_index(stream.device());
// If we haven't stored the current stream yet, store it now.
if (original_streams_.empty()) {
const size_t device_count = globalContext().getNumGPUs();
original_streams_.reserve(device_count);
for (size_t device = 0; device < device_count; ++device) {
original_streams_.push_back(
globalContext().getCurrentCUDAStreamOnDevice(device));
}
}
globalContext().setCurrentCUDAStreamOnDevice(
device_guard_.last_index(), stream);
}

/// Sets the CUDA device to the given one.
void set_device(int32_t device) {
device_guard_.set_index(device);
}

/// Returns the CUDA streams that were active in the first call to
/// `set_stream`. If there was no such call, the returned container is
/// empty.
ArrayRef<CUDAStream> original_streams() const noexcept {
return original_streams_;
}

/// Returns the device that was set upon construction of the guard.
int32_t original_device() const noexcept {
return device_guard_.original_index();
}

/// Returns the last device that was set via `set_device`, if any.
int32_t last_device() const noexcept {
return device_guard_.last_index();
}

private:
/// The guard for the current device.
DeviceGuard device_guard_;
/// The original streams that were active on all devices.
std::vector<CUDAStream> original_streams_;
};

} // namespace at
197 changes: 0 additions & 197 deletions aten/src/ATen/CUDAStream.cpp
Original file line number Diff line number Diff line change
@@ -1,197 +0,0 @@
#include "ATen/CUDAStream.h"
#include "ATen/Error.h"
#include "ATen/detail/CUDAHooksInterface.h"

#include <mutex>

// Internal implementation is entirely hidden
struct CUDAStreamInternals {
bool is_destructible;
std::atomic<int> refcount;
int64_t device; // Note: cudaGetDevice works with int32_t, not int64_t
cudaStream_t stream;
};

namespace at {

namespace detail {

/*
* Stream state
*/
static constexpr cudaStream_t DEFAULT_STREAM = 0;

static std::once_flag init_flag;
static int64_t num_gpus;
static CUDAStreamInternals* default_streams;
static thread_local CUDAStreamInternals** current_streams = nullptr;

// Creates a(n indestructible) default stream for each device
// Note: the default stream on each device is signified by a zero
// value for the pointer, and so is not actually created as usual.
// In particular, we don't need to switch devices when creating the
// streams.
static void initDefaultCUDAStreams() {
num_gpus = getCUDAHooks().getNumGPUs();
default_streams = (CUDAStreamInternals*) malloc(num_gpus * sizeof(CUDAStreamInternals));
for (auto i = decltype(num_gpus){0}; i < num_gpus; ++i) {
default_streams[i].is_destructible = false;
default_streams[i].refcount = 0;
default_streams[i].device = i;
default_streams[i].stream = DEFAULT_STREAM;
}
}

// Init front-end to ensure initialization only occurs once
static void initCUDAStreamsOnce() {
// Inits default streams (once, globally)
std::call_once(init_flag, initDefaultCUDAStreams);

// Inits current streams (thread local) to default streams
if (current_streams) return;
current_streams = (CUDAStreamInternals**) malloc(num_gpus * sizeof(CUDAStreamInternals*));
for (auto i = decltype(num_gpus){0}; i < num_gpus; ++i) {
current_streams[i] = &default_streams[i];
}
}

/*
* Pointer-based stream API
*/

// Helper to return the current device
static inline int64_t current_device() {
int cur_device;
DynamicCUDAInterface::get_device(&cur_device);
return cur_device;
}

// Helper to verify the GPU index is valid
static inline void check_gpu(int64_t device) {
AT_ASSERT(device >= 0 && device < num_gpus);
}

CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device) {
initCUDAStreamsOnce();
check_gpu(device);
return &default_streams[device];
}
CUDAStreamInternals* CUDAStream_getDefaultStream() {
return CUDAStream_getDefaultStreamOnDevice(current_device());
}

// Creates (and retains) and new cuda stream
CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority) {
CUDAStreamInternals* internals = (CUDAStreamInternals*) malloc(sizeof(CUDAStreamInternals));
internals->is_destructible = true;
internals->refcount = 1;
internals->device = current_device();
DynamicCUDAInterface::cuda_stream_create_with_priority(&internals->stream, flags, priority);
return internals;
}

// Note: despite not being "unsafe," is using these methods in a multithreaded
// environment then the caller must be sure that streams are valid
// when they're requested. These methods will throw an error if an
// invalid stream is requested.
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device) {
initCUDAStreamsOnce();
check_gpu(device);
auto cur = current_streams[device];
AT_ASSERT(CUDAStream_retain(cur));
return cur;
}
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream() {
return CUDAStream_getAndRetainCurrentStreamOnDevice(current_device());
}

// Note: these unsafe methods do not retain the stream before returning it.
// This is unsafe behavior and these methods SHOULD NOT BE USED.
// They are here only for legacy compatibility.
CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device) {
initCUDAStreamsOnce();
check_gpu(device);
return current_streams[device];
}
CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe() {
return CUDAStream_getCurrentStreamOnDeviceUnsafe(current_device());
}

void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* ptr) {
initCUDAStreamsOnce();
check_gpu(device);
AT_ASSERT(ptr);
AT_ASSERT(ptr->device == device);
AT_ASSERT(CUDAStream_retain(ptr));

CUDAStream_free(current_streams[device]);
current_streams[device] = ptr;
}

void CUDAStream_uncheckedSetStreamOnDevice(int64_t device, CUDAStreamInternals* ptr) {
initCUDAStreamsOnce();
CUDAStream_uncheckedFree(current_streams[device]);
current_streams[device] = ptr;
}

void CUDAStream_setStream(CUDAStreamInternals* ptr) {
CUDAStream_setStreamOnDevice(current_device(), ptr);
}

// Getters
cudaStream_t CUDAStream_stream(CUDAStreamInternals* ptr) {
AT_ASSERT(ptr);
return ptr->stream;
}

int64_t CUDAStream_device(CUDAStreamInternals* ptr) {
AT_ASSERT(ptr);
return ptr->device;
}

// Memory management
// Note: only destructible (non-default) streams are ref counted
bool CUDAStream_retain(CUDAStreamInternals* ptr) {
AT_ASSERT(ptr);
if (ptr->is_destructible) return(++ptr->refcount > 1);
return true;
}

void CUDAStream_free(CUDAStreamInternals*& ptr) {
if (ptr && ptr->stream && ptr->is_destructible && --ptr->refcount <= 0) {
AT_ASSERT(ptr->refcount == 0);
DynamicCUDAInterface::cuda_stream_destroy(ptr->stream);
free(ptr);
ptr = nullptr;
}
}
void CUDAStream_uncheckedFree(CUDAStreamInternals*& ptr) {
if (ptr && ptr->stream && ptr->is_destructible && --ptr->refcount <= 0) {
DynamicCUDAInterface::unchecked_cuda_stream_destroy(ptr->stream);
free(ptr);
ptr = nullptr;
}
}

} // namespace detail

/*
* CUDAStream functions
*/

// Copy constructor
CUDAStream::CUDAStream(const CUDAStream& other) {
AT_ASSERT(other.internals_);
AT_ASSERT(detail::CUDAStream_retain(other.internals_));

internals_ = other.internals_;
}

// Move constructor
CUDAStream::CUDAStream(CUDAStream&& other) {
AT_ASSERT(other.internals_);

std::swap(internals_, other.internals_);
}

} // namespace at

0 comments on commit b4f6ff9

Please sign in to comment.