In [1]:
%%html
<style>
.language-cpp {
  height: auto;
  width: auto;
  min-height: 100% !important;
  min-widthh: 100% !important;
}
.reveal pre code {
  line-height: 1.1em;
  max-height: 768px;
  max-width: 2048px;
}
.rendered_html pre, .rendered_html code {
  font-size: 100%;
}
</style>

<center>
    <img src="https://upload.wikimedia.org/wikipedia/commons/a/a8/%D0%9B%D0%9E%D0%93%D0%9E_%D0%A8%D0%90%D0%94.png" width=500px/>
    <font>Python 2020</font><br/>
    <br/>
    <br/>
    <b style="font-size: 2em">Bindings & Extensions</b><br/>
    <br/>
    <font>Мирон Левков</font><br/>
</center>

<center>
    ![img](attachment:img/bindings_1.jpg)
</center>

# Problems:
* You have lots of useful C/C++ code
* You want use C++ in your app bottleneck

# Extensions

1. Take C++ code
2. Define interaction between C++ and Py
3. Compile it as shared library
4. Use in Py code


```cpp
#include <string>

class EmptyArgumentException : public std::runtime_error {
    using std::runtime_error::runtime_error;
};

std::string cpp_concat(const std::string& l, const std::string& r) {
    if (l.empty() || r.empty()) {
        throw EmptyArgumentException{"Empty argument passed"};
    }
    return l + r;
}
```

### Describe py function
```cpp
static PyObject* py_concat(PyObject* /* self */, PyObject* args) {
    PyObject* result = NULL;
    return result;
}
```

### Describe py function
```cpp
static PyObject* py_concat(PyObject* /* self */, PyObject* args) {
    PyObject* result = NULL;
    const char *l, *r;
    if (!PyArg_ParseTuple(args, "ss", &l, &r)) {
        return NULL;
    }
    return result;
}
```

### Describe py function
```cpp
static PyObject* py_concat(PyObject* /* self */, PyObject* args) {
    PyObject* result = NULL;
    const char *l, *r;
    if (!PyArg_ParseTuple(args, "ss", &l, &r)) {
        return NULL;
    }
    
    auto concatenated = cpp_concat({l}, {r});
    result = Py_BuildValue("s", concatenated.data());
    if (result == NULL) {
        return result;
    }
    return result;
}
```

### Describe py function
```cpp
static PyObject* py_concat(PyObject* /* self */, PyObject* args) {
    PyObject* result = NULL;
    const char *l, *r;
    if (!PyArg_ParseTuple(args, "ss", &l, &r)) {
        return NULL;
    }
    
    try {
        auto concatenated = cpp_concat({l}, {r});
        result = Py_BuildValue("s", concatenated.data());
        if (result == NULL) {
            return result;
        }
    } catch (const EmptyArgumentException& e) {
        PyErr_SetString(PyExc_RuntimeError, e.what());
    }
    return result;
}
```

### Describe py module
```cpp
static PyMethodDef methods[] = {
    {"concat", py_concat, METH_VARARGS, "Concat 2 non-empty strings"},
    {NULL, NULL, 0, NULL}        /* Sentinel */
};


static struct PyModuleDef module = {
    PyModuleDef_HEAD_INIT,
    "concat module",    /* module name */
    NULL,               /* documentation, may be NULL */
    -1,                 /* size of per-interpreter module state,
                         * -1 if the module keeps state in globals
                         */
    methods
};

```

### Describe py module
```cpp
// Note: name `_libconcat` will be the name of your module
PyMODINIT_FUNC
PyInit_libconcat() {
    return PyModule_Create(&module);
}

```

### Compile
* use python3-config to get flags python was compiled with
```bash
g++ $(python3-config --cflags --ldflags) -shared --std=c++17 \
concat.cpp -o libconcat.so
```

In [2]:
import libconcat

print(libconcat.__name__)

l = 'a' * 10000
r = 'b' * 10000
res = libconcat.concat(l, r)
print(len(res))

libconcat.concat('', r)

concat module
20000


RuntimeError: Empty argument passed

# Anything about memory management?

```cpp
static PyObject* py_concat_list(PyObject* self, PyObject* args) {
    PyObject* list;
    if (!PyArg_ParseTuple(args, "O", &list)) {
        return NULL;
    }
    int n = PyList_Size(list);
    if (n < 2) {
        Py_DECREF(list);
        PyErr_SetString(PyExc_RuntimeError, "Sequence of len >= 2 required");
        return NULL;
    }
    for (int i = 0; i < n; ++i) {
        /*
         * process i-th item somehow
         */
    }
    Py_DECREF(list);
    return result;
}
```

### What's inside loop
```cpp
PyObject *item = NULL, *result = NULL;

for (int i = 0; i < n; ++i) {
    item = PyList_GetItem(list, i);
    if (result == NULL) {
        result = item;
        continue;
    }
    
    PyObject* subcall_args = Py_BuildValue("OO", result, item);
    PyObject* subcall_res = py_concat(self, subcall_args);
    if (subcall_res == NULL) {
        return NULL;
    }
    result = subcall_res;
}
```

### What's inside loop
```cpp
PyObject *item = NULL, *result = NULL;
for (int i = 0; i < n; ++i) {
    item = PyList_GetItem(list, i);
    if (result == NULL) {
        result = item;
        Py_INCREF(result);
        continue;
    }
    PyObject* subcall_args = Py_BuildValue("OO", result, item);
    PyObject* subcall_res = py_concat(self, subcall_args);
    Py_DECREF(subcall_args);
    Py_XDECREF(result);
    if (subcall_res == NULL) {
        Py_DECREF(list);
        return NULL;
    }
    result = subcall_res;
}
```

## Check refcounts for our code

In [3]:
from sys import getrefcount

l = 'a' * 10000
r = 'b' * 10000

def print_refcounts(str):
    print('{}: \tl-refcount = {}, r-refcount = {}'.format(str, getrefcount(l), getrefcount(r)))

print_refcounts('before')
args_list = [l, r, l]
print_refcounts('list created')
res = libconcat.concat_list([l, r, l])
print_refcounts('after call')
del(args_list)
print_refcounts('list deleted')

before: 	l-refcount = 2, r-refcount = 2
list created: 	l-refcount = 4, r-refcount = 3
after call: 	l-refcount = 4, r-refcount = 3
list deleted: 	l-refcount = 2, r-refcount = 2


## Memory leaks


Try removing all DECREF-s and run following code:
```py
a = 'a' * 100
b = 'b' * 100
c = 'c' * 100

for i in range(10000000000):
    res = libconcat.concat_list([a, b, c])
```

<center>
    ![](img/htop.png)
</center>

# Too much boilerplate?

# Cython (static compiler)

```cython
from libcpp.string cimport string

cdef extern from "concat.h":
    string cpp_concat(string l, string r) except +


def concat(str l, str r):
    cdef string res = cpp_concat(l.encode('utf-8'), r.encode('utf-8'))
    return res.decode('utf-8')

def concat_list(list args):
    if len(args) < 2:
        raise RuntimeError("Expected at least 2 arguments")

    cdef string res = args[0].encode('utf-8')
    for i in range(1, len(args)):
        res = cpp_concat(res, args[i].encode('utf-8'))
    return res.decode('utf-8')
```

### Compile
```bash
# cyconcat.pyx => cyconcat.c
cython cyconcat.pyx

# cyconcat.c => cyconcat.so
g++ $(python3-config --cflags --ldflags) -shared --std=c++17 cyconcat.c -o cyconcat.so
```
#### OR
```bash
# cyconcat.pyx => cyconcat.so
cythonize -i libconcat_cy.pyx
```

In [4]:
import cyconcat

print(cyconcat.__name__)
l = 'a' * 10000
r = 'b' * 10000
res = cyconcat.concat(l, r)
print(len(res))

cyconcat.concat('', r)

cyconcat
20000


RuntimeError: Empty argument passed

# Serialization

1. JSON / YAML
    * human readable
    * slow (text formats)
2. numpy
    * binary
    * very inconvenient for arbitrary data
3. [pickle](https://docs.python.org/3.8/library/pickle.html) 
    * binary
    * unsafe 
    * has different [sideffects](https://stackoverflow.com/questions/29704139/pickle-in-python3-doesnt-work-for-large-data-saving)
    * python-only
4. [protobuf](https://developers.google.com/protocol-buffers/docs/pythontutorial)
5. [flatbuf](https://google.github.io/flatbuffers/)

### Protobuf / Flatbuf

##### Common:

1. Describe data schema in special IDL
2. Use protoc / flatc tool to generate source code
3. Use generated code in your program

##### Distinct:
1. Protobuf serialization/deserialization needs less code
2. Flatbuf allows to read data partially
3. [Flexbuffers](https://google.github.io/flatbuffers/flexbuffers.html)

<center>
    ![img](img/comparison.png)
</center>

### Flatbuf. Describe schema in IDL
```
namespace lecture.example;

table EmptyT {}

table NonEmptyT {
    x:byte;
    y:[int];
    z:int (deprecated);
}

union Union {
    Empty:EmptyT,
    NonEmpty:NonEmptyT
}

table AnyT {
    content:Union;
}
root_type AnyT;
```

### Flatbuf. Compile schema with flatc
```bash
flatc --python schema.fbs
```

### Flatbuf. Use created module in Python

In [5]:
import flatbuffers

from lecture.example import (
    AnyT,
    EmptyT,
    NonEmptyT,
    Union
)

def list_specific_methods(obj):
  print([method for method in dir(obj) if not method.startswith('__')])
  print()

# Note: doesn't contain AddZ because z is `deprecated`
list_specific_methods(NonEmptyT)

list_specific_methods(AnyT)

list_specific_methods(Union.Union)

['NonEmptyT', 'NonEmptyTAddX', 'NonEmptyTAddY', 'NonEmptyTEnd', 'NonEmptyTStart', 'NonEmptyTStartYVector', 'flatbuffers']

['AnyT', 'AnyTAddContent', 'AnyTAddContentType', 'AnyTEnd', 'AnyTStart', 'flatbuffers']

['Empty', 'NONE', 'NonEmpty']



In [6]:
builder = flatbuffers.Builder(initialSize=0)

NonEmptyT.NonEmptyTStart(builder)
non_empty_table = NonEmptyT.NonEmptyTEnd(builder)
builder.Finish(non_empty_table)

first = builder.Output()
print('first:\t\t {}'.format(first))


builder = flatbuffers.Builder(initialSize=0)

NonEmptyT.NonEmptyTStart(builder)
non_empty_table = NonEmptyT.NonEmptyTEnd(builder)

AnyT.AnyTStart(builder)
AnyT.AnyTAddContent(builder, non_empty_table)
AnyT.AnyTAddContentType(builder, Union.Union.NonEmpty)
any_table = AnyT.AnyTEnd(builder)
builder.Finish(any_table)

second = builder.Output()
print('second:\t\t {}'.format(second))
print('first in second: {}'.format(first in second))

first:		 bytearray(b'\x08\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00')
second:		 bytearray(b'\x0c\x00\x00\x00\x08\x00\x0c\x00\x07\x00\x08\x00\x08\x00\x00\x00\x00\x00\x00\x02\x08\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00')
first in second: True


In [7]:
data = AnyT.AnyT.GetRootAsAnyT(second, 0)

assert data.ContentType() == Union.Union.NonEmpty
content = NonEmptyT.NonEmptyT()
content.Init(data.Content().Bytes, data.Content().Pos)

print('x = {}, y.len = {}'.format(content.X(), content.YLength()))

x = 0, y.len = 0


### Protobuf. Describe schema in IDL*
```proto
syntax = "proto3";

package lecture.example;

message EmptyT {}

message NonEmptyT {
    /* forbid to reuse field-id (useful for or deprecated fields) */
    reserved 2;
    int32 x = 1;
    repeated int32 y = 3;
}

message AnyT {
    oneof content {
        EmptyT empty_content = 1;
        NonEmptyT non_empty_content = 2;
    }
}
```



*proto3

### Protobuf. Compile schema with flatc
```bash
protoc --python_out=protolecture schema.proto
```

In [8]:
from protolecture import schema_pb2

from io import BytesIO

buf = BytesIO()

any_table = schema_pb2.AnyT()
empty_content = schema_pb2.EmptyT()

non_empty_table = schema_pb2.NonEmptyT()
non_empty_table.y.extend([1, 2, 3])

list_specific_methods(any_table)

any_table.non_empty_content = non_empty_table

['ByteSize', 'Clear', 'ClearExtension', 'ClearField', 'CopyFrom', 'DESCRIPTOR', 'DiscardUnknownFields', 'Extensions', 'FindInitializationErrors', 'FromString', 'HasExtension', 'HasField', 'IsInitialized', 'ListFields', 'MergeFrom', 'MergeFromString', 'ParseFromString', 'RegisterExtension', 'SerializePartialToString', 'SerializeToString', 'SetInParent', 'UnknownFields', 'WhichOneof', '_CheckCalledFromGeneratedFile', '_SetListener', '_extensions_by_name', '_extensions_by_number', 'empty_content', 'non_empty_content']



AttributeError: Assignment not allowed to field "non_empty_content" in protocol message object.

In [9]:
any_table.empty_content.MergeFrom(empty_content)

print(any_table.WhichOneof("content"))
print()

any_table.non_empty_content.x = non_empty_table.x
any_table.non_empty_content.y.MergeFrom(non_empty_table.y)

print(any_table.WhichOneof("content"))
print()

print('x = {}, y = {}'.format(any_table.non_empty_content.x, any_table.non_empty_content.y))
print()

empty_content

non_empty_content

x = 0, y = [1, 2, 3]



In [10]:
print('buf = {}'.format(any_table.SerializeToString()))
print()
buf.write(any_table.SerializeToString())

any_table.Clear()
any_table.ParseFromString(buf.getvalue())
print(any_table)
print()

print('Be careful:\n')
non_empty_table.x = 2
non_empty_table.ClearField("y")
non_empty_table.y.extend([2, 3, 4])
print(any_table)
print()

any_table_second = schema_pb2.AnyT()
any_table_second.non_empty_content.CopyFrom(non_empty_table)

buf.write(any_table_second.SerializeToString())
any_table.ParseFromString(buf.getvalue())
print(any_table)

buf = b'\x12\x05\x1a\x03\x01\x02\x03'

non_empty_content {
  y: 1
  y: 2
  y: 3
}


Be careful:

non_empty_content {
  y: 1
  y: 2
  y: 3
}


non_empty_content {
  x: 2
  y: 1
  y: 2
  y: 3
  y: 2
  y: 3
  y: 4
}

