In [1]:
!pip install "thinc>=8.0.0a0" ml_datasets "tqdm>=4.41"



In [2]:
from thinc.api import prefer_gpu
prefer_gpu()


False

In [3]:
import ml_datasets
from tqdm import tqdm
from thinc.api import fix_random_seed

In [10]:
fix_random_seed(0)

def train_model(data, model, optimizer, n_iter, batch_size):
    (train_X, train_y), (test_X, test_y) = data
    model.initialize(X=train_X[:5], Y=train_y[:5])
    for n in range(n_iter):
        loss = 0.0
        batches = model.ops.multibatch(batch_size, train_X, train_y, shuffle=True)
        for X, Y in tqdm(batches, leave=False):
            Yh, backprop = model.begin_update(X)
            d_loss = []
            for i in range(len(Yh)):
                d_loss.append(Yh[i] - Y[i])
                loss += ((Yh[i] - Y[i]) ** 2).sum()
            backprop(d_loss)
            model.finish_update(optimizer)
        score = evaluate(model, test_X, test_y, batch_size)
        print(f"{n}\t{loss:.2f}\t{score:.3f}")
        
def evaluate(model, test_X, test_Y, batch_size):
    correct = 0
    total = 0
    for X, Y in model.ops.multibatch(batch_size, test_X, test_Y):
        Yh = model.predict(X)
        for yh, y in zip(Yh, Y):
            correct += (y.argmax(axis=1) == yh.argmax(axis=1)).sum()
            total += y.shape[0]
    return float(correct / total)

## Composing the model in code

Here's the model definition, using the `>>` operator for the `chain` combinator.
The `strings2arrays` transform converts a sequence of strings to a list of arrays. `with_array` 
transforms sequences (the sequences of arrays) into a contiguous 2-dimensional array on the way into 
and out of the model it wraps. This means our model has the following signature: 
`Model[Sequence[str], Sequence[Array2d]]`.


In [11]:
from thinc.api import Model, chain, strings2arrays, with_array, HashEmbed, expand_window, Relu, Softmax, Adam, warmup_linear

width = 32
vector_width = 16
nr_classes = 17
learn_rate = 0.001
n_iter = 10
batch_size = 128

with Model.define_operators({">>": chain}):
    model = strings2arrays() >> with_array(
        HashEmbed(nO=width, nV=vector_width, column=0)
        >> expand_window(window_size=1) # 1-step Convolution
        >> Relu(nO=width, nI=width * 3)
        >> Relu(nO=width, nI=width)
        >> Softmax(nO=nr_classes, nI=width)
    )
optimizer = Adam(learn_rate)

## Composing the model via config file

If we want to rebuild the model defined above in a config file, we first need to break down its structure:

* `chain` (any number of positional arguments)
  * `strings2arrays` (no arguments)
  * `with_array` (one argument **layer**)
    * **layer:** `chain` (any number of positional arguments)
      * `HashEmbed`
      * `expand_window`
      * `Relu`
      * `Relu`
      * `Softmax`

`chain` takes a variable number of positional arguments (the layers to compose). 
In the config, positional arguments can be expressed using `*` in the dot notation. For example, 
`model.layer` could describe a function passed to `model` as the argument `layer`, while `model.*.relu` 
defines a positional argument passed to `model`. The name of the argument, e.g. `relu` – doesn't matter 
in this case. It just needs to be unique.

> ⚠️ **Important note:** It is recommended to use a hybrid approach: wrap the model definition in a registered function
> and configure it via the config.

In [12]:
CONFIG = """
[hyper_params]
width = 32
vector_width = 16
learn_rate = 0.001
n_tags = 17

[training]
n_iter = 10
batch_size = 128

[model]
@layers = "chain.v1"

[model.*.strings2arrays]
@layers = "strings2arrays.v1"

[model.*.with_array]
@layers = "with_array.v1"

[model.*.with_array.layer]
@layers = "chain.v1"

[model.*.with_array.layer.*.hashembed]
@layers = "HashEmbed.v1"
nO = ${hyper_params:width}
nV = ${hyper_params:vector_width}
column = 0

[model.*.with_array.layer.*.expand_window]
@layers = "expand_window.v1"
window_size = 1

[model.*.with_array.layer.*.relu1]
@layers = "Relu.v1"
nO = ${hyper_params:width}
nI = 96

[model.*.with_array.layer.*.relu2]
@layers = "Relu.v1"
nO = ${hyper_params:width}
nI = ${hyper_params:width}

[model.*.with_array.layer.*.softmax]
@layers = "Softmax.v1"
nO = ${hyper_params:n_tags}
nI = ${hyper_params:width}

[optimizer]
@optimizers = "Adam.v1"
learn_rate = ${hyper_params:learn_rate}
"""


from thinc.api import registry, Config

config = Config().from_str(CONFIG)
loaded_config = registry.make_from_config(config)

model = loaded_config["model"]
optimizer = loaded_config["optimizer"]
n_iter = loaded_config["training"]["n_iter"]
batch_size = loaded_config["training"]["batch_size"]

In [13]:
data = ml_datasets.ud_ancora_pos_tags()
train_model(data, model, optimizer, n_iter, batch_size)

8%|▊         | 9/112 [00:00<00:01, 86.79it/s]0	397584.09	0.373
  8%|▊         | 9/112 [00:00<00:01, 83.08it/s]1	307524.31	0.538
  8%|▊         | 9/112 [00:00<00:01, 83.08it/s]2	260264.22	0.584
  7%|▋         | 8/112 [00:00<00:01, 79.92it/s]3	239829.69	0.613
  8%|▊         | 9/112 [00:00<00:01, 82.13it/s]4	224328.70	0.627
  8%|▊         | 9/112 [00:00<00:01, 84.93it/s]5	212690.89	0.644
  8%|▊         | 9/112 [00:00<00:01, 80.85it/s]6	203882.17	0.657
  8%|▊         | 9/112 [00:00<00:01, 85.27it/s]7	197021.96	0.668
  8%|▊         | 9/112 [00:00<00:01, 81.19it/s]8	191324.42	0.678
 95%|█████████▍| 106/112 [00:01<00:00, 83.61it/s]9	186663.51	0.684


## Composing the model with code and config


In [14]:
import thinc
from thinc.api import Model, chain, strings2arrays, with_array, HashEmbed, expand_window, Relu, Softmax,\
    Adam, warmup_linear

@thinc.registry.layers("cnn_tagger.v1")
def create_cnn_tagger(width: int, vector_width: int, nr_classes: int = 17) -> Model:
    with Model.define_operators({">>": chain}):
        model = strings2arrays() >> with_array(
            HashEmbed(nO=width, nV=vector_width, column=0)
            >> expand_window(window_size=1)
            >> Relu(nO=width, nI=width * 3)
            >> Relu(nO=width, nI=width)
            >> Softmax(nO=nr_classes, nI=width)
        )
    return model

CONFIG = """
[hyper_params]
width = 32
vector_width = 16
learn_rate = 0.001

[training]
n_iter = 10
batch_size = 128

[model]
@layers = "cnn_tagger.v1"
width = ${hyper_params:width}
vector_width = ${hyper_params:vector_width}
nr_classes = 17

[optimizer]
@optimizers = "Adam.v1"
learn_rate = ${hyper_params:learn_rate}
"""

loaded_config = registry.make_from_config(Config().from_str(CONFIG))

model = loaded_config["model"]
optimizer = loaded_config["optimizer"]
n_iter = loaded_config["training"]["n_iter"]
batch_size = loaded_config["training"]["batch_size"]

data = ml_datasets.ud_ancora_pos_tags()

train_model(data, model, optimizer, n_iter, batch_size)

7%|▋         | 8/112 [00:00<00:01, 77.85it/s]0	394804.24	0.445
  8%|▊         | 9/112 [00:00<00:01, 81.89it/s]1	281690.39	0.552
  8%|▊         | 9/112 [00:00<00:01, 79.89it/s]2	251534.66	0.578
  8%|▊         | 9/112 [00:00<00:01, 83.73it/s]3	233599.77	0.607
  8%|▊         | 9/112 [00:00<00:01, 86.54it/s]4	220240.38	0.630
  8%|▊         | 9/112 [00:00<00:01, 83.18it/s]5	209728.11	0.645
  8%|▊         | 9/112 [00:00<00:01, 85.49it/s]6	201995.64	0.658
  8%|▊         | 9/112 [00:00<00:01, 78.35it/s]7	196320.43	0.668
  8%|▊         | 9/112 [00:00<00:01, 83.07it/s]8	191455.18	0.675
 97%|█████████▋| 109/112 [00:01<00:00, 86.97it/s]9	187216.77	0.685
