Skip to content

Commit

Permalink
Merge pull request #931 from danieldk/maintenance/merge-v9-thincai
Browse files Browse the repository at this point in the history
Merge `v9` into `thinc.ai`
  • Loading branch information
danieldk committed Apr 18, 2024
2 parents 4c84103 + f348090 commit 98a3118
Show file tree
Hide file tree
Showing 44 changed files with 1,306 additions and 1,241 deletions.
15 changes: 3 additions & 12 deletions .github/workflows/tests.yml
Expand Up @@ -87,7 +87,9 @@ jobs:
- name: Run mypy
run: python -m mypy thinc --no-implicit-reexport
if: matrix.python_version != '3.6'
if: |
matrix.python_version != '3.6' &&
matrix.python_version != '3.7'
- name: Delete source directory
run: rm -rf thinc
Expand Down Expand Up @@ -150,14 +152,3 @@ jobs:

- name: Run tests with extras
run: python -m pytest --pyargs thinc --cov=thinc --cov-report=term -p thinc.tests.enable_tensorflow -p thinc.tests.enable_mxnet

- name: Run tests for thinc-apple-ops
run: |
pip uninstall -y tensorflow
pip install thinc-apple-ops
python -m pytest --pyargs thinc_apple_ops
if: matrix.os == 'macos-latest' && matrix.python_version == '3.10'

- name: Run tests with thinc-apple-ops
run: python -m pytest --pyargs thinc
if: matrix.os == 'macos-latest' && matrix.python_version == '3.10'
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -25,7 +25,7 @@ pytest-cov>=2.7.0,<5.0.0
coverage>=5.0.0,<8.0.0
mock>=2.0.0,<3.0.0
flake8>=3.5.0,<3.6.0
mypy>=1.0.0,<1.1.0; python_version >= "3.7"
mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
types-mock>=0.1.1
types-contextvars>=0.1.2; python_version < "3.7"
types-dataclasses>=0.1.3; python_version < "3.7"
Expand Down
18 changes: 14 additions & 4 deletions setup.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
import platform
import sys
from setuptools.command.build_ext import build_ext
from sysconfig import get_path
Expand All @@ -13,16 +14,16 @@
# http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options
Options.docstrings = True

ACCELERATE = "thinc.backends._accelerate"
APPLE_OPS = ["thinc.backends.apple_ops", ACCELERATE]

PACKAGES = find_packages()
MOD_NAMES = [
"thinc.backends.cblas",
"thinc.backends.linalg",
"thinc.backends.numpy_ops",
"thinc.extra.search",
"thinc.layers.sparselinear",
"thinc.layers.premap_ids",
]
] + (APPLE_OPS if platform.system() == "Darwin" else [])
COMPILE_OPTIONS = {
"msvc": ["/Ox", "/EHsc"],
"other": ["-O3", "-Wno-strict-prototypes", "-Wno-unused-function", "-std=c++11"],
Expand Down Expand Up @@ -80,7 +81,16 @@ def setup_package():
ext_modules = []
for name in MOD_NAMES:
mod_path = name.replace(".", "/") + ".pyx"
ext = Extension(name, [mod_path], language="c++", include_dirs=include_dirs)
if name == ACCELERATE:
ext = Extension(
name,
[mod_path],
language="c++",
include_dirs=include_dirs,
libraries=["blas"],
)
else:
ext = Extension(name, [mod_path], language="c++", include_dirs=include_dirs)
ext_modules.append(ext)
print("Cythonizing sources")
ext_modules = cythonize(
Expand Down
2 changes: 1 addition & 1 deletion thinc/about.py
@@ -1,2 +1,2 @@
__version__ = "8.2.2"
__version__ = "9.0.0"
__release__ = True
13 changes: 10 additions & 3 deletions thinc/api.py
Expand Up @@ -119,11 +119,13 @@
)
from .optimizers import SGD, Adam, Optimizer, RAdam
from .schedules import (
Schedule,
compounding,
constant,
constant_then,
cyclic_triangular,
decaying,
plateau,
slanted_triangular,
warmup_linear,
)
Expand Down Expand Up @@ -160,6 +162,11 @@
xp2torch,
)

try:
from .backends import AppleOps
except ImportError:
AppleOps = None

# fmt: off
__all__ = [
# .config
Expand All @@ -179,8 +186,8 @@
# .optimizers
"Adam", "RAdam", "SGD", "Optimizer",
# .schedules
"cyclic_triangular", "warmup_linear", "constant", "constant_then",
"decaying", "slanted_triangular", "compounding",
"Schedule", "cyclic_triangular", "warmup_linear", "constant", "constant_then",
"decaying", "slanted_triangular", "compounding", "plateau",
# .types
"Ragged", "Padded", "ArgsKwargs", "Unserializable",
# .util
Expand All @@ -196,7 +203,7 @@
"has_cupy",
# .backends
"get_ops", "set_current_ops", "get_current_ops", "use_ops",
"Ops", "CupyOps", "MPSOps", "NumpyOps", "set_gpu_allocator",
"Ops", "AppleOps", "CupyOps", "MPSOps", "NumpyOps", "set_gpu_allocator",
"use_pytorch_for_gpu_memory", "use_tensorflow_for_gpu_memory",
# .layers
"Dropout", "Embed", "expand_window", "HashEmbed", "LayerNorm", "Linear",
Expand Down
33 changes: 15 additions & 18 deletions thinc/backends/__init__.py
Expand Up @@ -19,13 +19,21 @@
from .numpy_ops import NumpyOps
from .ops import Ops

try:
from .apple_ops import AppleOps
except ImportError:
AppleOps = None

context_ops: ContextVar[Optional[Ops]] = ContextVar("context_ops", default=None)
context_pools: ContextVar[dict] = ContextVar("context_pools", default={})

# Internal use of thread-local storage only for detecting cases where a Jupyter
# notebook might not have preserved contextvars across cells.
_GLOBAL_STATE = {"ops": None}

# Thread-local state.
_LOCAL_STATE = threading.local()


def set_gpu_allocator(allocator: str) -> None: # pragma: no cover
"""Route GPU memory allocation via PyTorch or tensorflow.
Expand Down Expand Up @@ -80,10 +88,6 @@ def use_tensorflow_for_gpu_memory() -> None: # pragma: no cover


def _import_extra_cpu_backends():
try:
from thinc_apple_ops import AppleOps
except ImportError:
pass
try:
from thinc_bigendian_ops import BigEndianOps
except ImportError:
Expand Down Expand Up @@ -152,22 +156,14 @@ def contextvars_eq_thread_ops() -> bool:
return False


def _get_thread_state():
def _get_thread_state() -> threading.local:
"""Get a thread-specific state variable that inherits from a global
state when it's created."""
thread: threading.Thread = threading.current_thread()
if not hasattr(thread, "__local"):
thread.__local = _create_thread_local(_GLOBAL_STATE)
return thread.__local


def _create_thread_local(
attrs: Dict[str, Any], local_class: Type[threading.local] = threading.local
):
obj = local_class()
for name, value in attrs.items():
setattr(obj, name, value)
return obj
if not hasattr(_LOCAL_STATE, "initialized") or not _LOCAL_STATE.initialized:
for name, value in _GLOBAL_STATE.items():
setattr(_LOCAL_STATE, name, value)
_LOCAL_STATE.initialized = True
return _LOCAL_STATE


__all__ = [
Expand All @@ -176,6 +172,7 @@ def _create_thread_local(
"use_ops",
"ParamServer",
"Ops",
"AppleOps",
"CupyOps",
"MPSOps",
"NumpyOps",
Expand Down
40 changes: 40 additions & 0 deletions thinc/backends/_accelerate.pxd
@@ -0,0 +1,40 @@
cdef extern from "Accelerate/Accelerate.h":
enum CBLAS_ORDER: CblasRowMajor, CblasColMajor
enum CBLAS_TRANSPOSE: CblasNoTrans, CblasTrans, CblasConjTrans
enum CBLAS_UPLO: CblasUpper, CblasLower
enum CBLAS_DIAG: CblasNonUnit, CblasUnit
enum CBLAS_SIDE: CblasLeft, CblasRight

# BLAS level 1 routines

void cblas_sswap(int M, float *x, int incX, float *y, int incY) nogil
void cblas_sscal(int N, float alpha, float *x, int incX) nogil
void cblas_scopy(int N, float *x, int incX, float *y, int incY) nogil
void cblas_saxpy(int N, float alpha, float *x, int incX, float *y, int incY ) nogil
float cblas_sdot(int N, float *x, int incX, float *y, int incY ) nogil
float cblas_snrm2(int N, float *x, int incX) nogil
float cblas_sasum(int N, float *x, int incX) nogil
int cblas_isamax(int N, float *x, int incX) nogil

# BLAS level 2 routines
void cblas_sgemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
float alpha, float *A, int lda, float *x, int incX,
float beta, float *y, int incY) nogil

void cblas_sger(CBLAS_ORDER Order, int M, int N, float alpha, float *x,
int incX, float *y, int incY, float *A, int lda) nogil

# BLAS level 3 routines
void cblas_sgemm(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, int M, int N, int K,
float alpha, float *A, int lda, float *B, int ldb,
float beta, float *C, int ldc) nogil


cdef void sgemm(bint TransA, bint TransB, int M, int N, int K,
float alpha, const float* A, int lda, const float *B,
int ldb, float beta, float* C, int ldc) nogil


cdef void saxpy(int N, float alpha, const float* X, int incX,
float *Y, int incY) nogil
75 changes: 75 additions & 0 deletions thinc/backends/_accelerate.pyx
@@ -0,0 +1,75 @@
cimport numpy as np
from libc.stdint cimport uintptr_t

import numpy


cpdef np.ndarray gemm(float[:, ::1] A, float[:, ::1] B,
bint trans1=False, bint trans2=False,
np.ndarray out=None):
cdef int nM = A.shape[0] if not trans1 else A.shape[1]
cdef int nK = A.shape[1] if not trans1 else A.shape[0]
cdef int nK_b = B.shape[0] if not trans2 else B.shape[1]
cdef int nN = B.shape[1] if not trans2 else B.shape[0]

cdef float[:, ::1] C = out

if out is None:
out = numpy.empty((nM, nN), dtype="f")
C = out
else:
if C.shape[0] != nM or C.shape[1] != nN:
msg = "Shape mismatch for output matrix, was: (%d, %d), expected (%d, %d)"
raise ValueError(msg % (C.shape[0], C.shape[1], nM, nN))


if nK != nK_b:
msg = "Shape mismatch for gemm: (%d, %d), (%d, %d)"
raise ValueError(msg % (nM, nK, nK_b, nN))

if nM == 0 or nK == 0 or nN == 0:
return out

cblas_sgemm(
CblasRowMajor,
CblasTrans if trans1 else CblasNoTrans,
CblasTrans if trans2 else CblasNoTrans,
nM,
nN,
nK,
1.0,
&A[0, 0],
A.shape[1],
&B[0, 0],
B.shape[1],
0.0,
&C[0, 0],
C.shape[1]
)
return out


cdef void sgemm(bint TransA, bint TransB, int M, int N, int K,
float alpha, const float* A, int lda, const float *B,
int ldb, float beta, float* C, int ldc) nogil:
cblas_sgemm(
CblasRowMajor,
CblasTrans if TransA else CblasNoTrans,
CblasTrans if TransB else CblasNoTrans,
M,
N,
K,
alpha,
A,
lda,
B,
ldb,
beta,
C,
ldc
)


cdef void saxpy(int N, float alpha, const float* X, int incX,
float *Y, int incY) nogil:
cblas_saxpy(N, alpha, X, incX, Y, incY)
39 changes: 39 additions & 0 deletions thinc/backends/apple_ops.pyx
@@ -0,0 +1,39 @@
from typing import Optional

import numpy

from ._accelerate import gemm

from ._accelerate cimport saxpy, sgemm
from .cblas cimport CBlas, set_saxpy, set_sgemm

from .. import registry
from ..types import Floats2d
from .numpy_ops import NumpyOps


@registry.ops("AppleOps")
class AppleOps(NumpyOps):
"""Thinc Ops class that calls into Apple's native libraries for some
operations. Other operations fall back to numpy."""
name = "apple"
xp = numpy

def cblas(self) -> CBlas:
cdef CBlas cblas = CBlas()
set_saxpy(cblas, saxpy)
set_sgemm(cblas, sgemm)
return cblas

def gemm(
self,
x: Floats2d,
y: Floats2d,
out: Optional[Floats2d] = None,
trans1: bool = False,
trans2: bool = False,
) -> Floats2d:
"""Perform General Matrix Multiplication (GeMM) and optionally store
the result in the specified output variable.
"""
return gemm(x, y, out=out, trans1=trans1, trans2=trans2)
13 changes: 12 additions & 1 deletion thinc/backends/cblas.pxd
@@ -1,8 +1,11 @@
from libcpp.memory cimport shared_ptr

ctypedef void (*sgemm_ptr)(bint transA, bint transB, int M, int N, int K,
float alpha, const float* A, int lda, const float *B,
float alpha, const float* A, int lda, const float* B,
int ldb, float beta, float* C, int ldc) nogil
ctypedef void (*dgemm_ptr)(bint transA, bint transB, int M, int N, int K,
double alpha, const double* A, int lda, const double* B,
int ldb, double beta, double* C, int ldc) nogil


ctypedef void (*saxpy_ptr)(int N, float alpha, const float* X, int incX,
Expand All @@ -12,6 +15,8 @@ ctypedef void (*saxpy_ptr)(int N, float alpha, const float* X, int incX,
ctypedef void (*daxpy_ptr)(int N, double alpha, const double* X, int incX,
double *Y, int incY) nogil

ctypedef void (*sscal_ptr)(int N, float alpha, float* X, int incX) nogil
ctypedef void (*dscal_ptr)(int N, double alpha, double* X, int incX) nogil

# Forward-declaration of the BlasFuncs struct. This struct must be opaque, so
# that consumers of the CBlas class cannot become dependent on its size or
Expand All @@ -32,6 +37,12 @@ cdef class CBlas:
cdef daxpy_ptr daxpy(CBlas cblas) nogil
cdef saxpy_ptr saxpy(CBlas cblas) nogil
cdef sgemm_ptr sgemm(CBlas cblas) nogil
cdef dgemm_ptr dgemm(CBlas cblas) nogil
cdef sscal_ptr sscal(CBlas cblas) nogil
cdef dscal_ptr dscal(CBlas cblas) nogil
cdef void set_daxpy(CBlas cblas, daxpy_ptr daxpy) nogil
cdef void set_saxpy(CBlas cblas, saxpy_ptr saxpy) nogil
cdef void set_sgemm(CBlas cblas, sgemm_ptr sgemm) nogil
cdef void set_dgemm(CBlas cblas, dgemm_ptr dgemm) nogil
cdef void set_sscal(CBlas cblas, sscal_ptr sscal) nogil
cdef void set_dscal(CBlas cblas, dscal_ptr dscal) nogil

0 comments on commit 98a3118

Please sign in to comment.