In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext
import torchtext.data as data
from torchtext.data import Field, BucketIterator
import os
import typing

In [17]:
MAX_LEN = 1000

In [18]:
SRC_TEXT = Field(init_token="<s>", eos_token="</s>", include_lengths=True)
TRG_TEXT = Field(init_token="<s>", eos_token="</s>")

In [19]:
data_fields = [("source", SRC_TEXT), ("target", TRG_TEXT)]

def filter_pred(x: str) -> bool: 
    return len(x.source) <= MAX_LEN and len(x.target) <= MAX_LEN

dataset = torchtext.datasets.TranslationDataset(
    path="/workspace/tmp/dataset_10k.bpe/train", 
    exts=(".diff", ".msg"), 
    fields=data_fields,
    filter_pred=filter_pred
)

In [20]:
dataset = torchtext.data.Dataset(examples=dataset[:10_000], fields=data_fields, filter_pred=filter_pred)

In [21]:
train_data, test_data, valid_data = dataset.split(split_ratio=[0.8, 0.15, 0.05])

In [22]:
def save_dataset(
    dataset: torchtext.data.Dataset, 
    filename: str, 
    exts: typing.Tuple[str, str]
):
    if os.path.exists(filename):
        os.remove(filename)
    os.makedirs(os.path.dirname(filename), 
                exist_ok=True)
    
    source_filename = filename + f".{exts[0]}"
    target_filename = filename + f".{exts[1]}"
    
    with open(source_filename, mode="w") as source, open(target_filename, mode="w") as target:
        for i, example in enumerate(dataset.examples):
            source_sent = example.source
            target_sent = example.target
            source.write(" ".join(source_sent) + "\n")
            target.write(" ".join(target_sent) + "\n")

In [23]:
save_dataset(train_data, "/workspace/tmp/dataset_10k.bpe/prepared/train", ("diff", "msg"))

In [24]:
save_dataset(valid_data, "/workspace/tmp/dataset_10k.bpe/prepared/valid", ("diff", "msg"))

In [25]:
save_dataset(test_data, "/workspace/tmp/dataset_10k.bpe/prepared/test", ("diff", "msg"))

In [26]:
len("▁ <file> ▁readme . md ▁ <chunk> ▁ <nl> ▁flex x ▁ <nl> ▁= ==== ▁ <nl> ▁ <nl> ▁ <del> ▁flex x ▁is ▁a ▁python ▁to ok it ▁for ▁creating ▁graph ical ▁user ▁inter faces ▁( gui ' s ), ▁ <nl> ▁ <del> ▁that ▁uses ▁web ▁tech nolog y ▁for ▁its ▁rendering . ▁you ▁can ▁use ▁flex x ▁to ▁create ▁ <nl> ▁ <del> ▁de sktop ▁applications ▁as ▁well ▁as ▁web ▁applications . ▁flex x ▁can ▁also ▁be ▁ <nl> ▁ <del> ▁used ▁from ▁within ▁the ▁jupyter ▁notebook . ▁ <nl> ▁ <del> ▁ <nl> ▁ <del> ▁flex x ▁is ▁p ure ▁python , ▁and ▁has ▁no ▁dependencies ▁other ▁than ▁the ▁browser ▁ <nl> ▁ <del> ▁that ' s ▁already ▁installed ▁on ▁the ▁system . ▁to ▁run ▁apps ▁in ▁de sktop - mode , ▁ <nl> ▁ <del> ▁we ▁recommend ▁having ▁fi re fo x ▁installed . ▁ <nl> ▁ <add> ▁flex x ▁is ▁a ▁cross - platform , ▁p ure ▁python ▁to ok it ▁for ▁creating ▁graph ical ▁ <nl> ▁ <add> ▁user ▁inter faces ▁( gui ' s ), ▁that ▁uses ▁web ▁tech nolog y ▁for ▁its ▁rendering . ▁ <nl> ▁ <add> ▁you ▁can ▁use ▁flex x ▁to ▁create ▁de sktop ▁applications ▁as ▁well ▁as ▁web ▁ <nl> ▁ <add> ▁applications . ▁ <nl> ▁ <add> ▁ <nl> ▁ <add> ▁flex x ▁can ▁also ▁be ▁used ▁to ▁run ▁a ▁sub set ▁of ▁python ▁in ▁a ▁web ▁runtime ▁( e . g . ▁ <nl> ▁ <add> ▁node js ), ▁and ▁can ▁be ▁used ▁from ▁within ▁the ▁jupyter ▁notebook . ▁ <nl> ▁ <add> ▁ <nl> ▁ <add> ▁flex x ▁is ▁p ure ▁python , ▁and ▁its ▁only ▁dependencies ▁are ▁tornado ▁and ▁a ▁ <nl> ▁ <add> ▁browser . ▁to ▁run ▁apps ▁in ▁de sktop - mode , ▁we ▁recommend ▁having ▁fi re fo x ▁ <nl> ▁ <add> ▁installed . ▁ <nl> ▁ <nl> ▁flex x ▁consist s ▁of ▁several ▁modules ▁which ▁can ▁be ▁in divid ually ▁used ; ▁none ▁ <nl> ▁of ▁the ▁modules ▁are ▁imported ▁by ▁default . ▁ <nl> ▁ <file> ▁start . rst ▁ <chunk> ▁getting ▁started ▁ <nl> ▁dependencies ▁ <nl> ▁------------ ▁ <nl> ▁ <nl> ▁ <del> ▁flex x ▁has ▁no ▁dependencies . ▁to ▁use ▁`` flex x . ui `` ▁you ▁need ▁a ▁browser . ▁to ▁ <nl> ▁ <del> ▁run ▁apps ▁that ▁look ▁like ▁de sktop ▁apps , ▁we ▁recommend ▁having ▁fi re fo x ▁ <nl> ▁ <del> ▁installed . ▁ <nl> ▁ <add> ▁the ▁only ▁dependency ▁of ▁flex x ▁is ▁tornado ▁( a ▁p ure ▁python ▁package ). ▁to ▁use ▁ <nl> ▁ <add> ▁`` flex x . ui `` ▁you ▁need ▁a ▁browser . ▁to ▁run ▁apps ▁that ▁look ▁like ▁de sktop ▁ <nl> ▁ <add> ▁apps , ▁we ▁recommend ▁having ▁fi re fo x ▁installed . ▁ <nl> ▁ <nl> ▁developers ▁that ▁want ▁to ▁run ▁the ▁tests ▁need : ▁ <nl> ▁ <nl> ▁ <nl>".split(" "))

492