# Vadim's tools for ML research

In [15]:
! pip install -e .

Obtaining file:///home/vadim/Documents/code/philips-nlp/vadim-ml-tools
Installing collected packages: vadim-ml
  Running setup.py develop for vadim-ml
Successfully installed vadim-ml
[33mYou are using pip version 19.0.2, however version 19.0.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## IO tools

Save and load any data with zero friction

In [1]:
from vadim_ml.io import load_file, dump_file

In [4]:
dump_file({
    'x': 0.5,
    'y': 0.1
}, 'xy.pickle')

In [5]:
load_file('xy.pickle')

{'x': 0.5, 'y': 0.1}

In [6]:
dump_file({
    'x': 10,
    'y': 11
}, 'xy.json')

In [7]:
load_file('xy.json')

{'x': 10, 'y': 11}

In [8]:
!cat xy.json

{"x": 10, "y": 11}

It can even save and load sequences as multiple files 

In [2]:
dump_file({
    'x': 0.5,
    'y': 0.1
}, 'xy')

In [3]:
dict(load_file('xy'))

{'y': 0.1, 'x': 0.5}

In [5]:
!ls xy

x.pickle  y.pickle


## Memoization

Memoize any function

In [7]:
from vadim_ml.memoize import memoize

In [9]:
from time import sleep

@memoize
def square(x):
    sleep(10)
    return x * x

In [12]:
%%time

print(square(117))

13689
CPU times: user 0 ns, sys: 2.76 ms, total: 2.76 ms
Wall time: 10 s


In [13]:
%%time

print(square(117))

13689
CPU times: user 323 µs, sys: 114 µs, total: 437 µs
Wall time: 422 µs


Function cache can persist if you need it to

In [15]:
from vadim_ml.memoize import disk_memoize

In [16]:
@disk_memoize('squares')
def square(x):
    return x * x

In [17]:
square(15)

225

In [18]:
!ls squares

15.pickle


## Text Segmentation

A class for working with annotated texts

In [1]:
from vadim_ml.segmentation import TextSegmentation

In [2]:
seg = TextSegmentation(
    'I do not like you', [
    ((0, 1), 'person'), 
    ((14, 17), 'person')
    ])

In [3]:
from IPython.display import HTML

HTML(seg.html())

In [4]:
seg

<person>I</person> do not like <person>you</person>

In [5]:
seg.get_label(4) # Char 4 is not within an annotation

In [6]:
seg.get_label(15)

'person'

In [8]:
seg.get_segment(0)

((0, 1), 'person')

In [9]:
seg.get_segment(7)

((1, 14), None)

In [10]:
seg.get_segment(14)

((14, 17), 'person')