From c533035f3d8feae2f2ab95b9ef0b67be9eed7235 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 23 Nov 2018 22:42:33 -0500
Subject: [PATCH 1/5] Only patch-up `array` buffer on Python 2/3

When initializing `cybuffer` with an `array, ensure that we only patch
up the `format` and associated attributes for the `array` buffer on
Python 2/3. This workaround is needed for Python 2 as the `array` type
does not support the (new) buffer protocol. So we need to use the old
buffer protocol and then the (new) buffer protocol with that result.
This loses information about the `array` in the process, which is the
bulk of the reason this is needed. Python 3's `array` supports the (new)
buffer protocol; so, we can largely ignore these workarounds. However on
Python 2/3, we have the `"u"` (unicode) format for the `array`, which is
a little broken. So we workaround those issues by casting the array to
an unsigned integer of equivalent width to the unicode type. However the
`"u"` format for `array` is deprecated and scheduled for removal in
Python 4. So there should be no need to do any of this work on Python 4.
As such, check whether Python 2/3 is in use when checking if the data is
provided is an `array`. That way this whole branch will automatically be
removed on Python 4 when it lands.
---
 src/cybuffer.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cybuffer.pyx b/src/cybuffer.pyx
index 883d42d..7b34fe6 100644
--- a/src/cybuffer.pyx
+++ b/src/cybuffer.pyx
@@ -162,7 +162,7 @@ cdef class cybuffer(object):
         # Workaround some special cases with the builtin array
         cdef size_t len_nd_b
         cdef int n_1
-        if isinstance(self.obj, array):
+        if (PY2K or PY3K) and isinstance(self.obj, array):
             # Fix-up typecode
             typecode = self.obj.typecode
             if typecode == "B":
@@ -170,7 +170,7 @@ cdef class cybuffer(object):
             elif PY2K and typecode == "c":
                 self._format = UBYTE_TC
                 return
-            elif (PY2K or PY3K) and typecode == "u":
+            elif typecode == "u":
                 if Py_UNICODE_SIZE == 2:
                     self._format = UCS2_TC
                 elif Py_UNICODE_SIZE == 4:

From 071aade2e3e8d6d9990d9f0167fc0d7cf88f1c67 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 23 Nov 2018 23:14:08 -0500
Subject: [PATCH 2/5] Define `Py_UNICODE_SIZE` at Cython compile-time

Previously we dealt with `Py_UNICODE_SIZE` as a C compile-time constant,
which would inject the value during the C preprocessor pass. Later
compilation optimizations by the C compiler likely removed unneeded
branches caused by this. Though this is pretty needless as Cython is
generating these branches that we later drop. Not to mention Cython
sometimes assigns constants like these to intermediate or stack local
variables, which can hinder compiler optimizations. It would be better
yet to have Cython not define these branches at all.

To that end, we make `Py_UNICODE_SIZE` a Cython compile-time constant
that is set in `config.pxi` along with other compile-time constants. As
a result, Cython injects these directly in `cybuffer.pxd` and
`cybuffer.pxi` before even performing any compilation. Once it goes to
compile the Cython code to C, it recognizes `Py_UNICODE_SIZE` is a
compile-time constant and evaluates all expressions it is used in. As a
result, the Cython branches with `Py_UNICODE_SIZE` are eliminated during
compile-time leaving only one unconditional code path in C.

In order to discern what `Py_UNICODE_SIZE` should be at the Python
level, we inspect the `itemsize` of an empty unicode (i.e. `"u"`)
`array`'s `itemsize` in `setup.py`. Based on this we can determine
`Py_UNICODE_SIZE`'s value. We then inject this into `config.pxi`. This
value is only inspected and added for Python 2/3 as the unicode `array`
is scheduled for removal in Python 4. As we only check this value
within a branch of code that should be removed on Python 4 anyways,
leaving `Py_UNICODE_SIZE` undefined shouldn't be an issue for Python 4
builds.

While `Py_UNICODE_SIZE` is defined in Python headers, we don't need to
be worried about our definition of `Py_UNICODE_SIZE` clashing with it.
The reason being Cython guarantees the assigned value will injected into
the source. So the definition will have already been eliminated by the
time the C code is generated. Further we only use `Py_UNICODE_SIZE` in
comparisons to other constants. So Cython will inject the value and then
evaluate these comparisons resulting in them being turned into always
`True` or `False` branches. These in turn are optimized away.
---
 setup.py         | 7 +++++++
 src/cybuffer.pyx | 2 --
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 42592e4..0745845 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import array
 import glob
 import os
 import sys
@@ -68,6 +69,12 @@ def run_tests(self):
             "DEF PY2K = " + str(sys.version_info.major == 2) + "\n",
             "DEF PY3K = " + str(sys.version_info.major == 3) + "\n"
         ])
+        if sys.version_info.major < 4:
+            Py_UNICODE_SIZE = array.array('u').itemsize
+            f.writelines([
+                "DEF Py_UNICODE_SIZE = " + str(Py_UNICODE_SIZE) + "\n",
+            ])
+
     with open("src/version.pxi", "w") as f:
         f.writelines([
             "__version__ = " + "\"" + str(version) + "\""
diff --git a/src/cybuffer.pyx b/src/cybuffer.pyx
index 7b34fe6..e79293b 100644
--- a/src/cybuffer.pyx
+++ b/src/cybuffer.pyx
@@ -33,8 +33,6 @@ include "version.pxi"
 
 
 cdef extern from "Python.h":
-    size_t Py_UNICODE_SIZE
-
     object PyMemoryView_FromObject(object obj)
 
 

From c89c982ba39833e6a6836b77651c571c4209dad0 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 23 Nov 2018 23:28:47 -0500
Subject: [PATCH 3/5] Drop character array format assignment on Python 2

On Python 2, the `format` of all `array`s is `uint8` (i.e. `"B"`). The
reason being that all `array`s on Python 2 only support the old buffer
protocol. As such they need to be converted through that path to get
access to use them with the (new) buffer protocol. Doing so results in
such objects being treated as if they are of `uint8` type. Since that is
how we want character (i.e. `"c"`) arrays treated on Python 2 (they
don't exist on Python 3), then there is nothing we need to here. This
includes changing the `_format`. So drop that line.
---
 src/cybuffer.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/cybuffer.pyx b/src/cybuffer.pyx
index e79293b..6c92bba 100644
--- a/src/cybuffer.pyx
+++ b/src/cybuffer.pyx
@@ -166,7 +166,6 @@ cdef class cybuffer(object):
             if typecode == "B":
                 return
             elif PY2K and typecode == "c":
-                self._format = UBYTE_TC
                 return
             elif typecode == "u":
                 if Py_UNICODE_SIZE == 2:

From 35781156db426f22afad1a6e979da9c334dc810a Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 23 Nov 2018 23:35:30 -0500
Subject: [PATCH 4/5] Fuse no-op `array` cases

Go ahead and join the different no-op cases with `array` types into one
condition. Just makes it easier to see that all of these are just being
skipped. Besides Cython will break this out however it sees fit in C
code. So there is no benefit to us doing anything special here.
---
 src/cybuffer.pyx | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/cybuffer.pyx b/src/cybuffer.pyx
index 6c92bba..549080f 100644
--- a/src/cybuffer.pyx
+++ b/src/cybuffer.pyx
@@ -163,9 +163,7 @@ cdef class cybuffer(object):
         if (PY2K or PY3K) and isinstance(self.obj, array):
             # Fix-up typecode
             typecode = self.obj.typecode
-            if typecode == "B":
-                return
-            elif PY2K and typecode == "c":
+            if typecode == "B" or (PY2K and typecode == "c"):
                 return
             elif typecode == "u":
                 if Py_UNICODE_SIZE == 2:

From 7b0903904c701f162e06c72945a1a8d2580d7ba7 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 24 Nov 2018 00:23:39 -0500
Subject: [PATCH 5/5] Set itemsize and format for `array` on Python 2

Go ahead and set `itemsize` at the same time that `format` is being set
for the Python 2 `array` cases. These two properties are connected and
naturally go together. So this makes a bit more sense from a code
readability point of view.
---
 src/cybuffer.pyx | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/cybuffer.pyx b/src/cybuffer.pyx
index 549080f..0057e1b 100644
--- a/src/cybuffer.pyx
+++ b/src/cybuffer.pyx
@@ -166,17 +166,18 @@ cdef class cybuffer(object):
             if typecode == "B" or (PY2K and typecode == "c"):
                 return
             elif typecode == "u":
+                if PY2K:
+                    self.itemsize = Py_UNICODE_SIZE
                 if Py_UNICODE_SIZE == 2:
                     self._format = UCS2_TC
                 elif Py_UNICODE_SIZE == 4:
                     self._format = UCS4_TC
             elif PY2K:
+                self.itemsize = self.obj.itemsize
                 self._format = typecode
 
-            # Adjust itemsize, shape, and strides based on casting
+            # Adjust shape and strides based on casting
             if PY2K:
-                self.itemsize = self.obj.itemsize
-
                 len_nd_b = self._buf.ndim * sizeof(Py_ssize_t)
                 self._shape = <Py_ssize_t*>cpython.mem.PyMem_Malloc(len_nd_b)
                 self._strides = <Py_ssize_t*>cpython.mem.PyMem_Malloc(len_nd_b)