In [1]:
import os
import re
import random
import hashlib

from enum import Enum
from collections import ChainMap

import numpy as np

from torch.autograd import Variable
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

from scipy.io import wavfile

import librosa

import matplotlib.pyplot as plt
import IPython.display as ipd
%matplotlib inline

In [2]:
#########
# Utils #
#########

class SimpleCache(dict):
    def __init__(self, limit):
        super().__init__()
        self.limit = limit
        self.n_keys = 0

    def __setitem__(self, key, value):
        if key in self.keys():
            super().__setitem__(key, value)
        elif self.n_keys < self.limit:
            self.n_keys += 1
            super().__setitem__(key, value)
        return value

class SerializableModule(nn.Module):
    def __init__(self):
        super().__init__()

    def save(self, filename):
        torch.save(self.state_dict(), filename)

    def load(self, filename):
        self.load_state_dict(torch.load(filename, map_location=lambda storage, loc: storage))

def play_wav(wav_path):
    sample_rate, samples = wavfile.read(wav_path)
    return ipd.Audio(samples, rate=sample_rate)

In [3]:
class DatasetType(Enum):
    TRAIN = 0
    DEV = 1
    TEST = 2

In [8]:
# useful PyTorch abstraction
class SpeechDataset(data.Dataset):
    def __init__(self, data, set_type, config):
        ####
        # data – {wave_file_path : label}
        # set_type – DatasetType (dev, train or test)
        # config.bg_noise_files – will be used to noise wavs during training (see preprocess func)
        ####
        super().__init__()
        self.audio_files = list(data.keys())
        self.audio_labels = list(data.values())
        self.set_type = set_type
        config["bg_noise_files"] = list(filter(lambda x: x.endswith("wav"), config.get("bg_noise_files", [])))
        self.bg_noise_audio = [librosa.core.load(file, sr=16000)[0] for file in config["bg_noise_files"]]
        self.unknown_prob = config["unknown_prob"]
        self.silence_prob = config["silence_prob"]
        self.noise_prob = config["noise_prob"]
        self.n_dct = config["n_dct_filters"]
        self.n_mels = config["n_mels"]
        self.input_length = config["input_length"]
        self.timeshift_ms = config["timeshift_ms"]
        self.filters = librosa.filters.dct(config["n_dct_filters"], config["n_mels"])
        self._audio_cache = SimpleCache(config["cache_size"])
        self._file_cache = SimpleCache(config["cache_size"])
        n_unk = len(list(filter(lambda x: x == 1, self.audio_labels)))
        self.n_silence = int(self.silence_prob * (len(self.audio_labels) - n_unk))
    
    # PyTorch needs this function
    def __getitem__(self, index):
        if index >= len(self.audio_labels):
            return self.preprocess(None, silence=True), 0
        return self.preprocess(self.audio_files[index]), self.audio_labels[index]

    # PyTorch needs this function
    def __len__(self):
        return len(self.audio_labels) + self.n_silence

    # Our extra functions go below:
    def _timeshift_audio(self, data):
        shift = (16000 * self.timeshift_ms) // 1000
        shift = random.randint(-shift, shift)
        a = -min(0, shift)
        b = max(0, shift)
        data = np.pad(data, (a, b), "constant")
        return data[:len(data) - a] if a else data[b:]
    
    def _preprocess_audio(self, data, n_mels, dct_filters):
        data = librosa.feature.melspectrogram(
            data, 
            sr=16000, 
            n_mels=n_mels, 
            hop_length=160, 
            n_fft=480, 
            fmin=20, 
            fmax=4000
        )
        data[data > 0] = np.log(data[data > 0])
        data = [np.matmul(dct_filters, x) for x in np.split(data, data.shape[1], axis=1)]
        data = np.array(data, order="F").squeeze(2).astype(np.float32)
        return data

    def preprocess(self, example, silence=False):
        if silence:
            example = "__silence__"
        if random.random() < 0.7:
            try:
                return self._audio_cache[example]
            except KeyError:
                pass
        
        in_len = self.input_length
        if self.bg_noise_audio:
            # take a random samples chunk of a random noise audio
            bg_noise = random.choice(self.bg_noise_audio)
            a = random.randint(0, len(bg_noise) - in_len - 1)
            bg_noise = bg_noise[a:a + in_len]
        else:
            bg_noise = np.zeros(in_len)
        
        use_clean = (self.set_type != DatasetType.TRAIN)
        if use_clean:
            bg_noise = np.zeros(in_len)
        if silence:
            data = np.zeros(in_len, dtype=np.float32)
        else:
            file_data = self._file_cache.get(example)
            data = librosa.core.load(example, sr=16000)[0] if file_data is None else file_data
            self._file_cache[example] = data
        data = np.pad(data, (0, max(0, in_len - len(data))), "constant")
        if not use_clean:
            data = self._timeshift_audio(data)

        if random.random() < self.noise_prob or silence:
            a = random.random() * 0.1
            data = np.clip(a * bg_noise + data, -1, 1)
        data = torch.from_numpy(self._preprocess_audio(data, self.n_mels, self.filters))
        self._audio_cache[example] = data
        return data

### Define config

In [4]:
config = dict(
    data_folder="./../raw_data/train/audio",
    wanted_words="yes no up down left right on off stop go".split(),
    
    seed=0,
    
    output_file="./../models/model.pt", 
    gpu_no=0,
    cache_size=32768,
    
    n_epochs=26,
    momentum=0.9, 
    weight_decay=0.00001,
    lr=[0.1, 0.01, 0.001], 
    schedule=[3000, 6000], 
    batch_size=64, 
    dev_every=1,
    use_nesterov=False,
    
    group_speakers_by_id=True,
    
    silence_prob=0.1,
    noise_prob=0.8,
    unknown_prob=0.1, # percentage of unknown waves in the train/dev/test dataset
    
    input_length=16000,
    timeshift_ms=100,
    # Below are two parameters of Discrete Cosine Transform (http://en.wikipedia.org/wiki/Discrete_cosine_transform)
    n_dct_filters=40, 
    n_mels=40,
    
    train_pct=80,
    dev_pct=10,
    test_pct=10
)

In [5]:
seed = config["seed"]
torch.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
random.seed(seed)

In [6]:
folder = config["data_folder"]
wanted_words = config["wanted_words"]
unknown_prob = config["unknown_prob"]
train_pct = config["train_pct"]
dev_pct = config["dev_pct"]
test_pct = config["test_pct"]

LABEL_UNKNOWN = "__unknown__"
LABEL_SILENCE = "__silence__"

words = {word: i + 2 for i, word in enumerate(wanted_words)}
words.update({LABEL_SILENCE:0, LABEL_UNKNOWN:1})
sets = [{}, {}, {}]  # [train, dev, test] => {wav_name: label}
unknowns = [0] * 3 # number of wavs for each dataset to take from collected unknown waves according to unknown_prob
bg_noise_files = []
unknown_files = []

In [7]:
words

{'__silence__': 0,
 '__unknown__': 1,
 'down': 5,
 'go': 11,
 'left': 6,
 'no': 3,
 'off': 9,
 'on': 8,
 'right': 7,
 'stop': 10,
 'up': 4,
 'yes': 2}

### prep datasets

In [9]:
max_no_wavs = 2**27 - 1

for folder_name in os.listdir(folder):
    print("Process " + folder_name)
    path_name = os.path.join(folder, folder_name)
    is_bg_noise = False
    
    # Skip files
    if os.path.isfile(path_name):
        continue
    
    # Assign the label based on the current folder name
    if folder_name in words:
        label = words[folder_name]
    elif folder_name == "_background_noise_":
        is_bg_noise = True
    else:
        label = words[LABEL_UNKNOWN]

    for filename in os.listdir(path_name):
        wav_name = os.path.join(path_name, filename)
        
        # Just record background noises files and continue
        if is_bg_noise and os.path.isfile(wav_name):
            bg_noise_files.append(wav_name)
            continue
        # collect wav names when label is unknown
        elif label == words[LABEL_UNKNOWN]:
            unknown_files.append(wav_name)
            continue
        
        # Fill sets with {wave_name : label}
        if config["group_speakers_by_id"]:
            hashname = re.sub(r"_nohash_.*$", "", filename)
            bucket = int(hashlib.sha1(hashname.encode()).hexdigest(), 16)
            bucket = (bucket % (max_no_wavs + 1)) * (100. / max_no_wavs)
            if bucket < dev_pct:
                tag = DatasetType.DEV
            elif bucket < test_pct + dev_pct:
                tag = DatasetType.TEST
            else:
                tag = DatasetType.TRAIN
            sets[tag.value][wav_name] = label
        else:
            raise NotImplementedError("Speakers should be grouped for now :/")
            
# Mix in unknown wavs in datasets
# 1: compute how many files should be mixed in each dataset according to unknown_prob
for tag in range(len(sets)):
    unknowns[tag] = int(unknown_prob * len(sets[tag]))

# 2: shuffle unknown files
random.shuffle(unknown_files)

# 3: update datasets with unknown_files[a:b], where a:b is the interval
a = 0
for tag, dataset in enumerate(sets):
    b = a + unknowns[tag]
    unk_dict = {u: words[LABEL_UNKNOWN] for u in unknown_files[a:b]}
    dataset.update(unk_dict)
    a = b

train_cfg = ChainMap(dict(bg_noise_files=bg_noise_files), config)
test_cfg = ChainMap(dict(noise_prob=0), config)
datasets = (
    SpeechDataset(sets[0], DatasetType.TRAIN, train_cfg), 
    SpeechDataset(sets[1], DatasetType.DEV, test_cfg),
    SpeechDataset(sets[2], DatasetType.TEST, test_cfg)
)

print("Speech dataset created")

Process marvin
Process wow
Process down
Process dog
Process cat
Process on
Process yes
Process left
Process no
Process five
Process house
Process eight
Process _background_noise_
Process nine
Process stop
Process two
Process happy
Process seven
Process zero
Process .DS_Store
Process bird
Process sheila
Process go
Process six
Process off
Process bed
Process three
Process tree
Process up
Process four
Process one
Process right
Speech dataset created


In [10]:
train_set, dev_set, test_set = datasets

## Training

In [11]:
# https://arxiv.org/pdf/1710.10361.pdf
class SpeechResModel(SerializableModule):
    def __init__(self, config):
        super().__init__()
        n_labels = config["n_labels"]
        n_maps = config["n_feature_maps"]
        self.conv0 = nn.Conv2d(1, n_maps, (3, 3), padding=(1, 1), bias=False)
        if "res_pool" in config:
            self.pool = nn.AvgPool2d(config["res_pool"])

        self.n_layers = n_layers = config["n_layers"]
        dilation = config["use_dilation"]
        if dilation:
            self.convs = [nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(i // 3)), dilation=int(2**(i // 3)), 
                bias=False) for i in range(n_layers)]
        else:
            self.convs = [nn.Conv2d(n_maps, n_maps, (3, 3), padding=1, dilation=1, 
                bias=False) for _ in range(n_layers)]
        for i, conv in enumerate(self.convs):
            self.add_module("bn{}".format(i + 1), nn.BatchNorm2d(n_maps, affine=False))
            self.add_module("conv{}".format(i + 1), conv)
        self.output = nn.Linear(n_maps, n_labels)

    def forward(self, x):
        x = x.unsqueeze(1)
        for i in range(self.n_layers + 1):
            y = F.relu(getattr(self, "conv{}".format(i))(x))
            if i == 0:
                if hasattr(self, "pool"):
                    y = self.pool(y)
                old_x = y
            if i > 0 and i % 2 == 0:
                x = y + old_x
                old_x = x
            else:
                x = y
            if i > 0:
                x = getattr(self, "bn{}".format(i))(x)
        x = x.view(x.size(0), x.size(1), -1) # shape: (batch, feats, o3)
        x = torch.mean(x, 2)
        return self.output(x)

In [12]:
res15 = dict(n_labels=12, use_dilation=True, n_layers=13, n_feature_maps=45)
model = SpeechResModel(res15)

torch.cuda.set_device(config["gpu_no"])
model.cuda()

SpeechResModel(
  (conv0): Conv2d (1, 45, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(45, eps=1e-05, momentum=0.1, affine=False)
  (conv1): Conv2d (45, 45, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(45, eps=1e-05, momentum=0.1, affine=False)
  (conv2): Conv2d (45, 45, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(45, eps=1e-05, momentum=0.1, affine=False)
  (conv3): Conv2d (45, 45, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn4): BatchNorm2d(45, eps=1e-05, momentum=0.1, affine=False)
  (conv4): Conv2d (45, 45, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
  (bn5): BatchNorm2d(45, eps=1e-05, momentum=0.1, affine=False)
  (conv5): Conv2d (45, 45, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
  (bn6): BatchNorm2d(45, eps=1e-05, momentum=0.1, affine=False)
  (conv6): Conv2d (45, 45, 

In [22]:
def print_eval(name, scores, labels, loss, end="\n"):
    batch_size = labels.size(0)
    accuracy = (torch.max(scores, 1)[1].view(batch_size).data == labels.data).sum() / batch_size
    loss = loss.cpu().data.numpy()[0]
    print("{} accuracy: {:>5}, loss: {:<25}".format(name, accuracy, loss), end=end)
    return accuracy

def evaluate(config, model, test_loader):
    torch.cuda.set_device(config["gpu_no"])
    model.eval()
    criterion = nn.CrossEntropyLoss()
    results = []
    total = 0
    for model_in, labels in test_loader:
        model_in = Variable(model_in, requires_grad=False)
        
        model_in = model_in.cuda()
        labels = labels.cuda()
        
        scores = model(model_in)
        labels = Variable(labels, requires_grad=False)
        loss = criterion(scores, labels)
        results.append(print_eval("test", scores, labels, loss) * model_in.size(0))
        total += model_in.size(0)
    print("final test accuracy: {}".format(sum(results) / total))

In [23]:
optimizer = torch.optim.SGD(
    model.parameters(), 
    lr=config["lr"][0], 
    nesterov=config["use_nesterov"], 
    weight_decay=config["weight_decay"], 
    momentum=config["momentum"]
)
schedule_steps = config["schedule"]
schedule_steps.append(np.inf)
sched_idx = 0
criterion = nn.CrossEntropyLoss()
max_acc = 0

train_loader = data.DataLoader(train_set, batch_size=config["batch_size"], shuffle=True, drop_last=True)
dev_loader = data.DataLoader(dev_set, batch_size=min(len(dev_set), 16), shuffle=True)
test_loader = data.DataLoader(test_set, batch_size=min(len(test_set), 16), shuffle=True)
step_no = 0

for epoch_idx in range(config["n_epochs"]):
    for batch_idx, (model_in, labels) in enumerate(train_loader):
        model.train() # sets the model in training mode
        optimizer.zero_grad() # Zero gradients
        
        model_in = model_in.cuda()
        labels = labels.cuda()
        
        model_in = Variable(model_in, requires_grad=False)
        scores = model(model_in)
        
        labels = Variable(labels, requires_grad=False)
        loss = criterion(scores, labels)
        loss.backward() # perform a backward pass
        
        optimizer.step() # and update the weights
        step_no += 1
        if step_no > schedule_steps[sched_idx]:
            sched_idx += 1
            print("changing learning rate to {}".format(config["lr"][sched_idx]))
            optimizer = torch.optim.SGD(model.parameters(), lr=config["lr"][sched_idx],
                nesterov=config["use_nesterov"], momentum=config["momentum"], weight_decay=config["weight_decay"])
        print_eval("train step #{}".format(step_no), scores, labels, loss)

    if epoch_idx % config["dev_every"] == config["dev_every"] - 1:
        model.eval() # sets the model in evaluation mode
        accs = []
        for model_in, labels in dev_loader:
            model_in = Variable(model_in, requires_grad=False)
            model_in = model_in.cuda()
            labels = labels.cuda()
            scores = model(model_in)
            labels = Variable(labels, requires_grad=False)
            loss = criterion(scores, labels)
            loss_numeric = loss.cpu().data.numpy()[0]
            accs.append(print_eval("dev", scores, labels, loss))
        avg_acc = np.mean(accs)
        print("final dev accuracy: {}".format(avg_acc))
        if avg_acc > max_acc:
            print("saving best model...")
            max_acc = avg_acc
            model.save(config["output_file"])
evaluate(config, model, test_loader)

train step #1 accuracy: 0.078125, loss: 2.478188991546631        
train step #2 accuracy: 0.09375, loss: 2.5016794204711914       
train step #3 accuracy: 0.03125, loss: 2.4945156574249268       
train step #4 accuracy: 0.03125, loss: 2.4963042736053467       
train step #5 accuracy: 0.15625, loss: 2.475476026535034        
train step #6 accuracy: 0.09375, loss: 2.4538135528564453       
train step #7 accuracy: 0.078125, loss: 2.4516944885253906       
train step #8 accuracy: 0.125, loss: 2.4377880096435547       
train step #9 accuracy: 0.125, loss: 2.455156087875366        
train step #10 accuracy: 0.234375, loss: 2.4315528869628906       
train step #11 accuracy: 0.203125, loss: 2.424142599105835        
train step #12 accuracy: 0.078125, loss: 2.423679828643799        
train step #13 accuracy: 0.125, loss: 2.3908770084381104       
train step #14 accuracy: 0.109375, loss: 2.348961114883423        
train step #15 accuracy: 0.171875, loss: 2.3722429275512695       
train step #16 acc

train step #125 accuracy: 0.546875, loss: 1.5074219703674316       
train step #126 accuracy: 0.53125, loss: 1.3601495027542114       
train step #127 accuracy: 0.40625, loss: 1.4737846851348877       
train step #128 accuracy: 0.515625, loss: 1.2262933254241943       
train step #129 accuracy: 0.46875, loss: 1.4745912551879883       
train step #130 accuracy: 0.4375, loss: 1.5535123348236084       
train step #131 accuracy: 0.515625, loss: 1.4332613945007324       
train step #132 accuracy: 0.609375, loss: 1.1841676235198975       
train step #133 accuracy: 0.59375, loss: 1.2551127672195435       
train step #134 accuracy:   0.5, loss: 1.3790018558502197       
train step #135 accuracy: 0.515625, loss: 1.314753770828247        
train step #136 accuracy: 0.609375, loss: 1.3310905694961548       
train step #137 accuracy: 0.546875, loss: 1.3479448556900024       
train step #138 accuracy: 0.515625, loss: 1.3389321565628052       
train step #139 accuracy: 0.671875, loss: 1.1904875040054

train step #247 accuracy: 0.71875, loss: 0.9378808736801147       
train step #248 accuracy: 0.6875, loss: 1.0291564464569092       
train step #249 accuracy: 0.71875, loss: 0.812649130821228        
train step #250 accuracy: 0.6875, loss: 1.0779352188110352       
train step #251 accuracy: 0.703125, loss: 0.8310824632644653       
train step #252 accuracy: 0.609375, loss: 1.1507413387298584       
train step #253 accuracy: 0.578125, loss: 1.1010152101516724       
train step #254 accuracy: 0.703125, loss: 0.8615227341651917       
train step #255 accuracy: 0.765625, loss: 0.796261727809906        
train step #256 accuracy: 0.65625, loss: 1.0495136976242065       
train step #257 accuracy: 0.734375, loss: 0.8052343726158142       
train step #258 accuracy: 0.828125, loss: 0.6626462936401367       
train step #259 accuracy: 0.671875, loss: 1.0098766088485718       
train step #260 accuracy: 0.71875, loss: 1.0065826177597046       
train step #261 accuracy: 0.6875, loss: 0.97114396095275

dev accuracy: 0.9375, loss: 0.5912676453590393       
dev accuracy:  0.75, loss: 0.9014200568199158       
dev accuracy: 0.8125, loss: 0.6786653995513916       
dev accuracy: 0.6875, loss: 0.992845892906189        
dev accuracy: 0.9375, loss: 0.5874707698822021       
dev accuracy: 0.875, loss: 0.8343795537948608       
dev accuracy: 0.625, loss: 1.5827196836471558       
dev accuracy: 0.6875, loss: 1.1307759284973145       
dev accuracy:  0.75, loss: 0.7607854604721069       
dev accuracy:   0.5, loss: 1.5315074920654297       
dev accuracy: 0.8125, loss: 0.7784029245376587       
dev accuracy: 0.9375, loss: 0.4699128270149231       
dev accuracy: 0.875, loss: 0.5683741569519043       
dev accuracy: 0.8125, loss: 0.8013444542884827       
dev accuracy: 0.625, loss: 1.3412015438079834       
dev accuracy:  0.75, loss: 0.9572815299034119       
dev accuracy: 0.875, loss: 0.6469597816467285       
dev accuracy: 0.875, loss: 0.6679229736328125       
dev accuracy: 0.625, loss: 0.977177739

dev accuracy: 0.875, loss: 0.8064447641372681       
dev accuracy:  0.75, loss: 1.0199278593063354       
dev accuracy:  0.75, loss: 0.7074865102767944       
dev accuracy: 0.8125, loss: 0.5280083417892456       
dev accuracy:  0.75, loss: 0.9609752297401428       
dev accuracy: 0.6875, loss: 1.2425576448440552       
dev accuracy: 0.6875, loss: 1.175414800643921        
dev accuracy: 0.8125, loss: 0.7339799404144287       
dev accuracy: 0.625, loss: 1.2195992469787598       
dev accuracy:  0.75, loss: 0.915190577507019        
dev accuracy:  0.75, loss: 0.576938271522522        
dev accuracy: 0.6875, loss: 1.3360960483551025       
dev accuracy: 0.6666666666666666, loss: 0.45454859733581543      
final dev accuracy: 0.7463487972508592
saving best model...
train step #348 accuracy: 0.671875, loss: 0.8103102445602417       
train step #349 accuracy: 0.796875, loss: 0.6990839838981628       
train step #350 accuracy: 0.796875, loss: 0.6088122129440308       
train step #351 accuracy: 0.7

train step #459 accuracy: 0.875, loss: 0.5210042595863342       
train step #460 accuracy:  0.75, loss: 0.7224792838096619       
train step #461 accuracy: 0.84375, loss: 0.47671282291412354      
train step #462 accuracy: 0.875, loss: 0.38984692096710205      
train step #463 accuracy: 0.8125, loss: 0.6320125460624695       
train step #464 accuracy: 0.6875, loss: 0.8698963522911072       
train step #465 accuracy: 0.8125, loss: 0.6444043517112732       
train step #466 accuracy: 0.890625, loss: 0.5241720676422119       
train step #467 accuracy: 0.765625, loss: 0.7012597322463989       
train step #468 accuracy: 0.796875, loss: 0.5877965092658997       
train step #469 accuracy: 0.8125, loss: 0.6945542097091675       
train step #470 accuracy: 0.78125, loss: 0.5800239443778992       
train step #471 accuracy: 0.796875, loss: 0.6350106000900269       
train step #472 accuracy: 0.796875, loss: 0.5584632158279419       
train step #473 accuracy: 0.8125, loss: 0.4628323018550873       
t

train step #582 accuracy:  0.75, loss: 0.7426272630691528       
train step #583 accuracy: 0.8125, loss: 0.6042454242706299       
train step #584 accuracy: 0.90625, loss: 0.30631110072135925      
train step #585 accuracy: 0.859375, loss: 0.39225053787231445      
train step #586 accuracy: 0.796875, loss: 0.6309025287628174       
train step #587 accuracy: 0.84375, loss: 0.49928557872772217      
train step #588 accuracy: 0.765625, loss: 0.7556442618370056       
train step #589 accuracy: 0.875, loss: 0.49124133586883545      
train step #590 accuracy: 0.796875, loss: 0.528489887714386        
train step #591 accuracy: 0.8125, loss: 0.5292081832885742       
train step #592 accuracy: 0.84375, loss: 0.6959702968597412       
train step #593 accuracy: 0.828125, loss: 0.5038726925849915       
train step #594 accuracy: 0.859375, loss: 0.5226267576217651       
train step #595 accuracy: 0.828125, loss: 0.7122702598571777       
train step #596 accuracy: 0.90625, loss: 0.3491179645061493  

dev accuracy: 0.875, loss: 0.3237675726413727       
dev accuracy: 0.875, loss: 0.3418372571468353       
dev accuracy: 0.9375, loss: 0.3483610451221466       
dev accuracy: 0.875, loss: 0.49154767394065857      
dev accuracy: 0.875, loss: 0.4506339132785797       
dev accuracy:  0.75, loss: 0.5983823537826538       
dev accuracy: 0.9375, loss: 0.36624225974082947      
dev accuracy: 0.875, loss: 0.4401472210884094       
dev accuracy: 0.875, loss: 0.31067901849746704      
dev accuracy:   1.0, loss: 0.21311646699905396      
dev accuracy: 0.8125, loss: 0.6689149141311646       
dev accuracy: 0.9375, loss: 0.15862426161766052      
dev accuracy:  0.75, loss: 0.6081251502037048       
dev accuracy:  0.75, loss: 0.48125046491622925      
dev accuracy: 0.8125, loss: 0.37910744547843933      
dev accuracy: 0.8125, loss: 0.41911160945892334      
dev accuracy: 0.625, loss: 1.1301543712615967       
dev accuracy: 0.8125, loss: 0.5299848914146423       
dev accuracy: 0.8125, loss: 0.420652866

dev accuracy:  0.75, loss: 0.7855867147445679       
dev accuracy: 0.9375, loss: 0.47214406728744507      
dev accuracy: 0.9375, loss: 0.14810220897197723      
dev accuracy: 0.9375, loss: 0.2138228416442871       
dev accuracy: 0.875, loss: 0.43536248803138733      
dev accuracy: 0.8125, loss: 0.4386327266693115       
dev accuracy: 0.8125, loss: 0.5777715444564819       
dev accuracy: 0.875, loss: 0.5499103665351868       
dev accuracy: 0.9375, loss: 0.3005184233188629       
dev accuracy:   1.0, loss: 0.08977630734443665      
dev accuracy: 0.8125, loss: 0.4867893159389496       
dev accuracy: 0.9375, loss: 0.25651901960372925      
dev accuracy: 0.8125, loss: 0.4921709895133972       
dev accuracy: 0.8125, loss: 0.5077401399612427       
dev accuracy:  0.75, loss: 0.9920893311500549       
dev accuracy: 0.875, loss: 0.4165487587451935       
dev accuracy: 0.8125, loss: 0.7027818560600281       
dev accuracy:   1.0, loss: 0.12817656993865967      
dev accuracy:   1.0, loss: 0.116493

train step #795 accuracy: 0.875, loss: 0.4300798773765564       
train step #796 accuracy: 0.828125, loss: 0.47023075819015503      
train step #797 accuracy: 0.859375, loss: 0.44225990772247314      
train step #798 accuracy: 0.828125, loss: 0.5496166944503784       
train step #799 accuracy: 0.828125, loss: 0.41226160526275635      
train step #800 accuracy: 0.828125, loss: 0.45089051127433777      
train step #801 accuracy: 0.8125, loss: 0.5296439528465271       
train step #802 accuracy: 0.890625, loss: 0.31095290184020996      
train step #803 accuracy: 0.796875, loss: 0.6017547845840454       
train step #804 accuracy: 0.890625, loss: 0.3610886037349701       
train step #805 accuracy: 0.921875, loss: 0.26930898427963257      
train step #806 accuracy: 0.921875, loss: 0.2654650807380676       
train step #807 accuracy: 0.875, loss: 0.3787534236907959       
train step #808 accuracy: 0.90625, loss: 0.2744883596897125       
train step #809 accuracy: 0.828125, loss: 0.6665169596672

train step #917 accuracy: 0.875, loss: 0.45573586225509644      
train step #918 accuracy: 0.84375, loss: 0.49747925996780396      
train step #919 accuracy: 0.84375, loss: 0.5419078469276428       
train step #920 accuracy: 0.90625, loss: 0.3713459074497223       
train step #921 accuracy: 0.890625, loss: 0.3206481337547302       
train step #922 accuracy: 0.828125, loss: 0.4395195245742798       
train step #923 accuracy: 0.84375, loss: 0.3987141251564026       
train step #924 accuracy: 0.9375, loss: 0.17606176435947418      
train step #925 accuracy: 0.921875, loss: 0.27387237548828125      
train step #926 accuracy: 0.859375, loss: 0.4233781397342682       
train step #927 accuracy: 0.90625, loss: 0.35055840015411377      
train step #928 accuracy: 0.890625, loss: 0.34047025442123413      
train step #929 accuracy: 0.90625, loss: 0.35087624192237854      
train step #930 accuracy: 0.875, loss: 0.5025882124900818       
train step #931 accuracy: 0.796875, loss: 0.45883050560951233 

train step #1039 accuracy: 0.875, loss: 0.34834015369415283      
train step #1040 accuracy: 0.96875, loss: 0.14337563514709473      
train step #1041 accuracy: 0.890625, loss: 0.5409772396087646       
dev accuracy: 0.9375, loss: 0.13661377131938934      
dev accuracy: 0.875, loss: 0.41968411207199097      
dev accuracy:  0.75, loss: 0.4515356421470642       
dev accuracy: 0.8125, loss: 0.42663857340812683      
dev accuracy: 0.9375, loss: 0.23460820317268372      
dev accuracy: 0.9375, loss: 0.476374089717865        
dev accuracy:  0.75, loss: 0.6494755744934082       
dev accuracy: 0.9375, loss: 0.23526640236377716      
dev accuracy: 0.8125, loss: 0.6152892112731934       
dev accuracy: 0.8125, loss: 0.4512729048728943       
dev accuracy: 0.9375, loss: 0.19095994532108307      
dev accuracy: 0.8125, loss: 0.6441839337348938       
dev accuracy:  0.75, loss: 0.6308186650276184       
dev accuracy: 0.9375, loss: 0.176998570561409        
dev accuracy: 0.8125, loss: 0.482545733451843

dev accuracy:   1.0, loss: 0.09273332357406616      
dev accuracy: 0.875, loss: 0.2900233864784241       
dev accuracy:  0.75, loss: 0.9368761777877808       
dev accuracy:   1.0, loss: 0.029333919286727905     
dev accuracy: 0.875, loss: 0.2312087118625641       
dev accuracy: 0.8125, loss: 0.8382302522659302       
dev accuracy: 0.875, loss: 0.8819791078567505       
dev accuracy:   1.0, loss: 0.040294647216796875     
dev accuracy: 0.8125, loss: 0.5750263333320618       
dev accuracy: 0.8125, loss: 0.6414759755134583       
dev accuracy:   1.0, loss: 0.08809548616409302      
dev accuracy: 0.9375, loss: 0.24410560727119446      
dev accuracy: 0.9375, loss: 0.22655707597732544      
dev accuracy:   1.0, loss: 0.061563730239868164     
dev accuracy: 0.875, loss: 0.620944619178772        
dev accuracy: 0.9375, loss: 0.26764407753944397      
dev accuracy: 0.9375, loss: 0.2170284539461136       
dev accuracy:   1.0, loss: 0.045192569494247437     
dev accuracy: 0.875, loss: 0.6354770660

train step #1127 accuracy: 0.875, loss: 0.3970598578453064       
train step #1128 accuracy: 0.890625, loss: 0.4865233600139618       
train step #1129 accuracy: 0.890625, loss: 0.32048845291137695      
train step #1130 accuracy: 0.890625, loss: 0.41553977131843567      
train step #1131 accuracy: 0.890625, loss: 0.44445502758026123      
train step #1132 accuracy: 0.890625, loss: 0.432993620634079        
train step #1133 accuracy: 0.90625, loss: 0.3209252953529358       
train step #1134 accuracy: 0.875, loss: 0.40526729822158813      
train step #1135 accuracy: 0.875, loss: 0.4853951334953308       
train step #1136 accuracy: 0.828125, loss: 0.4337661862373352       
train step #1137 accuracy: 0.8125, loss: 0.5860119462013245       
train step #1138 accuracy: 0.875, loss: 0.3830842971801758       
train step #1139 accuracy: 0.9375, loss: 0.3065681457519531       
train step #1140 accuracy: 0.921875, loss: 0.337762713432312        
train step #1141 accuracy: 0.859375, loss: 0.454754

train step #1248 accuracy: 0.765625, loss: 0.6516322493553162       
train step #1249 accuracy: 0.890625, loss: 0.41513553261756897      
train step #1250 accuracy: 0.890625, loss: 0.28278297185897827      
train step #1251 accuracy: 0.90625, loss: 0.2188689410686493       
train step #1252 accuracy: 0.90625, loss: 0.25703465938568115      
train step #1253 accuracy: 0.90625, loss: 0.3125869631767273       
train step #1254 accuracy: 0.921875, loss: 0.21738377213478088      
train step #1255 accuracy: 0.90625, loss: 0.4379793405532837       
train step #1256 accuracy: 0.90625, loss: 0.3104671239852905       
train step #1257 accuracy: 0.9375, loss: 0.22452637553215027      
train step #1258 accuracy: 0.953125, loss: 0.1510835886001587       
train step #1259 accuracy: 0.9375, loss: 0.27977895736694336      
train step #1260 accuracy: 0.9375, loss: 0.2724549174308777       
train step #1261 accuracy: 0.90625, loss: 0.2753872871398926       
train step #1262 accuracy: 0.859375, loss: 0.3

train step #1369 accuracy: 0.90625, loss: 0.25182396173477173      
train step #1370 accuracy: 0.90625, loss: 0.2648330628871918       
train step #1371 accuracy: 0.9375, loss: 0.25114476680755615      
train step #1372 accuracy: 0.890625, loss: 0.338492751121521        
train step #1373 accuracy: 0.890625, loss: 0.40878820419311523      
train step #1374 accuracy: 0.96875, loss: 0.17527052760124207      
train step #1375 accuracy: 0.96875, loss: 0.22559255361557007      
train step #1376 accuracy: 0.90625, loss: 0.3173307180404663       
train step #1377 accuracy: 0.890625, loss: 0.4127441942691803       
train step #1378 accuracy: 0.96875, loss: 0.2464795708656311       
train step #1379 accuracy: 0.90625, loss: 0.2173355668783188       
train step #1380 accuracy: 0.9375, loss: 0.23760145902633667      
train step #1381 accuracy: 0.9375, loss: 0.26496750116348267      
train step #1382 accuracy: 0.890625, loss: 0.4454362094402313       
train step #1383 accuracy: 0.90625, loss: 0.436

dev accuracy: 0.6875, loss: 0.5648728609085083       
dev accuracy:   1.0, loss: 0.07607138156890869      
dev accuracy: 0.9375, loss: 0.23930621147155762      
dev accuracy: 0.9375, loss: 0.21174973249435425      
dev accuracy:   1.0, loss: 0.05567634105682373      
dev accuracy: 0.8125, loss: 0.8612437844276428       
dev accuracy: 0.875, loss: 0.29126420617103577      
dev accuracy: 0.9375, loss: 0.31933730840682983      
dev accuracy:  0.75, loss: 0.7491900324821472       
dev accuracy:  0.75, loss: 0.5903586149215698       
dev accuracy: 0.9375, loss: 0.2293095588684082       
dev accuracy: 0.875, loss: 0.35388994216918945      
dev accuracy: 0.9375, loss: 0.30672696232795715      
dev accuracy: 0.875, loss: 0.21125122904777527      
dev accuracy: 0.8125, loss: 0.37745049595832825      
dev accuracy: 0.875, loss: 0.32320940494537354      
dev accuracy: 0.875, loss: 0.4782350957393646       
dev accuracy: 0.9375, loss: 0.16813799738883972      
dev accuracy:  0.75, loss: 0.52127742

train step #1458 accuracy: 0.875, loss: 0.3753063678741455       
train step #1459 accuracy: 0.984375, loss: 0.08528263121843338      
train step #1460 accuracy: 0.90625, loss: 0.339921236038208        
train step #1461 accuracy: 0.890625, loss: 0.36979514360427856      
train step #1462 accuracy: 0.78125, loss: 0.6718323230743408       
train step #1463 accuracy: 0.921875, loss: 0.19448886811733246      
train step #1464 accuracy: 0.875, loss: 0.3747556805610657       
train step #1465 accuracy: 0.953125, loss: 0.23570767045021057      
train step #1466 accuracy: 0.9375, loss: 0.2827305793762207       
train step #1467 accuracy: 0.890625, loss: 0.38575753569602966      
train step #1468 accuracy: 0.9375, loss: 0.25192829966545105      
train step #1469 accuracy: 0.953125, loss: 0.23888149857521057      
train step #1470 accuracy: 0.921875, loss: 0.25770968198776245      
train step #1471 accuracy: 0.9375, loss: 0.17425931990146637      
train step #1472 accuracy: 0.9375, loss: 0.36287

train step #1579 accuracy: 0.9375, loss: 0.3204094171524048       
train step #1580 accuracy: 0.921875, loss: 0.3141712546348572       
train step #1581 accuracy: 0.90625, loss: 0.30199041962623596      
train step #1582 accuracy: 0.875, loss: 0.44556838274002075      
train step #1583 accuracy: 0.921875, loss: 0.2511078715324402       
train step #1584 accuracy: 0.9375, loss: 0.23064865171909332      
train step #1585 accuracy: 0.9375, loss: 0.206525057554245        
train step #1586 accuracy: 0.890625, loss: 0.2997817099094391       
train step #1587 accuracy: 0.953125, loss: 0.2078763246536255       
train step #1588 accuracy: 0.875, loss: 0.3427209258079529       
train step #1589 accuracy: 0.953125, loss: 0.15843838453292847      
train step #1590 accuracy: 0.921875, loss: 0.22334659099578857      
train step #1591 accuracy: 0.96875, loss: 0.19503656029701233      
train step #1592 accuracy: 0.953125, loss: 0.19688525795936584      
train step #1593 accuracy: 0.9375, loss: 0.24493

train step #1700 accuracy: 0.9375, loss: 0.27277612686157227      
train step #1701 accuracy: 0.84375, loss: 0.5153321027755737       
train step #1702 accuracy: 0.921875, loss: 0.32609713077545166      
train step #1703 accuracy: 0.890625, loss: 0.45879918336868286      
train step #1704 accuracy: 0.9375, loss: 0.24987037479877472      
train step #1705 accuracy: 0.921875, loss: 0.32031139731407166      
train step #1706 accuracy: 0.875, loss: 0.3123457431793213       
train step #1707 accuracy: 0.90625, loss: 0.3334541320800781       
train step #1708 accuracy: 0.953125, loss: 0.2073705941438675       
train step #1709 accuracy: 0.84375, loss: 0.4051705002784729       
train step #1710 accuracy: 0.9375, loss: 0.24321818351745605      
train step #1711 accuracy: 0.90625, loss: 0.404419481754303        
train step #1712 accuracy: 0.90625, loss: 0.29048919677734375      
train step #1713 accuracy: 0.984375, loss: 0.07012808322906494      
train step #1714 accuracy: 0.8125, loss: 0.57814

dev accuracy: 0.9375, loss: 0.17074714601039886      
dev accuracy: 0.9375, loss: 0.43493086099624634      
dev accuracy: 0.9375, loss: 0.18567496538162231      
dev accuracy: 0.8125, loss: 0.49400877952575684      
dev accuracy:   1.0, loss: 0.012523949146270752     
dev accuracy: 0.875, loss: 0.3263348340988159       
dev accuracy: 0.9375, loss: 0.17424261569976807      
dev accuracy: 0.9375, loss: 0.20940959453582764      
dev accuracy: 0.875, loss: 0.4159559905529022       
dev accuracy: 0.8125, loss: 0.8261871337890625       
dev accuracy: 0.875, loss: 0.3197575807571411       
dev accuracy:  0.75, loss: 0.7152577042579651       
dev accuracy:   1.0, loss: 0.006162285804748535     
dev accuracy: 0.9375, loss: 0.2043125331401825       
dev accuracy:   1.0, loss: 0.012962043285369873     
dev accuracy: 0.875, loss: 0.28372722864151         
dev accuracy: 0.8125, loss: 0.43673309683799744      
dev accuracy: 0.875, loss: 0.2787126302719116       
dev accuracy: 0.9375, loss: 0.4061629

train step #1788 accuracy: 0.921875, loss: 0.28392526507377625      
train step #1789 accuracy: 0.9375, loss: 0.1807837039232254       
train step #1790 accuracy: 0.9375, loss: 0.15856072306632996      
train step #1791 accuracy: 0.9375, loss: 0.219178706407547        
train step #1792 accuracy: 0.953125, loss: 0.11065871268510818      
train step #1793 accuracy: 0.96875, loss: 0.12946096062660217      
train step #1794 accuracy: 0.96875, loss: 0.11576636880636215      
train step #1795 accuracy: 0.875, loss: 0.3528927266597748       
train step #1796 accuracy: 0.953125, loss: 0.20130720734596252      
train step #1797 accuracy: 0.859375, loss: 0.4245645999908447       
train step #1798 accuracy: 0.890625, loss: 0.36067649722099304      
train step #1799 accuracy: 0.828125, loss: 0.6236488819122314       
train step #1800 accuracy: 0.921875, loss: 0.19728030264377594      
train step #1801 accuracy: 0.90625, loss: 0.24545693397521973      
train step #1802 accuracy: 0.890625, loss: 0.3

train step #1909 accuracy: 0.9375, loss: 0.25642335414886475      
train step #1910 accuracy: 0.9375, loss: 0.20761661231517792      
train step #1911 accuracy: 0.96875, loss: 0.2051626741886139       
train step #1912 accuracy: 0.9375, loss: 0.2704734802246094       
train step #1913 accuracy: 0.90625, loss: 0.24450689554214478      
train step #1914 accuracy: 0.921875, loss: 0.19986765086650848      
train step #1915 accuracy: 0.984375, loss: 0.08678605407476425      
train step #1916 accuracy: 0.953125, loss: 0.2359011173248291       
train step #1917 accuracy: 0.953125, loss: 0.18983380496501923      
train step #1918 accuracy: 0.828125, loss: 0.5723716020584106       
train step #1919 accuracy: 0.90625, loss: 0.26813048124313354      
train step #1920 accuracy: 0.96875, loss: 0.13559895753860474      
train step #1921 accuracy: 0.875, loss: 0.3674353063106537       
train step #1922 accuracy: 0.90625, loss: 0.3711585998535156       
train step #1923 accuracy: 0.9375, loss: 0.22088

train step #2030 accuracy: 0.875, loss: 0.2540806829929352       
train step #2031 accuracy: 0.921875, loss: 0.2523178458213806       
train step #2032 accuracy: 0.953125, loss: 0.12498536705970764      
train step #2033 accuracy: 0.96875, loss: 0.12419728189706802      
train step #2034 accuracy: 0.96875, loss: 0.14718449115753174      
train step #2035 accuracy: 0.96875, loss: 0.18310217559337616      
train step #2036 accuracy: 0.9375, loss: 0.14859436452388763      
train step #2037 accuracy: 0.984375, loss: 0.16327224671840668      
train step #2038 accuracy: 0.890625, loss: 0.24994942545890808      
train step #2039 accuracy: 0.890625, loss: 0.4051487445831299       
train step #2040 accuracy: 0.890625, loss: 0.33764779567718506      
train step #2041 accuracy: 0.96875, loss: 0.12039888650178909      
train step #2042 accuracy: 0.90625, loss: 0.18928110599517822      
train step #2043 accuracy: 0.9375, loss: 0.1694669872522354       
train step #2044 accuracy: 0.96875, loss: 0.15

dev accuracy: 0.875, loss: 0.5887781381607056       
dev accuracy: 0.875, loss: 0.35662418603897095      
dev accuracy: 0.875, loss: 0.20575964450836182      
dev accuracy: 0.8125, loss: 0.2781214714050293       
dev accuracy:   1.0, loss: 0.08007384836673737      
dev accuracy:  0.75, loss: 0.5466129779815674       
dev accuracy: 0.9375, loss: 0.29618167877197266      
dev accuracy:   1.0, loss: 0.030552655458450317     
dev accuracy: 0.9375, loss: 0.22785545885562897      
dev accuracy: 0.875, loss: 0.18949614465236664      
dev accuracy: 0.9375, loss: 0.48886722326278687      
dev accuracy:   1.0, loss: 0.15980808436870575      
dev accuracy: 0.875, loss: 0.2495088130235672       
dev accuracy:   1.0, loss: 0.09037449955940247      
dev accuracy: 0.8125, loss: 0.2908087372779846       
dev accuracy:   1.0, loss: 0.028426319360733032     
dev accuracy: 0.9375, loss: 0.12236985564231873      
dev accuracy: 0.9375, loss: 0.19833175837993622      
dev accuracy: 0.9375, loss: 0.124810546

train step #2119 accuracy: 0.953125, loss: 0.24059177935123444      
train step #2120 accuracy: 0.90625, loss: 0.25753676891326904      
train step #2121 accuracy: 0.9375, loss: 0.16771200299263         
train step #2122 accuracy: 0.96875, loss: 0.1595279574394226       
train step #2123 accuracy: 0.921875, loss: 0.13384918868541718      
train step #2124 accuracy: 0.921875, loss: 0.44848042726516724      
train step #2125 accuracy: 0.96875, loss: 0.11082034558057785      
train step #2126 accuracy: 0.921875, loss: 0.40376460552215576      
train step #2127 accuracy: 0.9375, loss: 0.23242469131946564      
train step #2128 accuracy: 0.921875, loss: 0.2827228307723999       
train step #2129 accuracy: 0.953125, loss: 0.1953137218952179       
train step #2130 accuracy: 0.953125, loss: 0.1627611666917801       
train step #2131 accuracy: 0.953125, loss: 0.11877164244651794      
train step #2132 accuracy: 0.890625, loss: 0.36589622497558594      
train step #2133 accuracy: 0.921875, loss

train step #2240 accuracy:   1.0, loss: 0.04894676059484482      
train step #2241 accuracy: 0.9375, loss: 0.1566319316625595       
train step #2242 accuracy: 0.984375, loss: 0.0799124464392662       
train step #2243 accuracy: 0.9375, loss: 0.2635705769062042       
train step #2244 accuracy: 0.984375, loss: 0.10614529252052307      
train step #2245 accuracy: 0.90625, loss: 0.36931079626083374      
train step #2246 accuracy: 0.96875, loss: 0.08587421476840973      
train step #2247 accuracy: 0.9375, loss: 0.2536194920539856       
train step #2248 accuracy: 0.890625, loss: 0.38122424483299255      
train step #2249 accuracy: 0.921875, loss: 0.25182777643203735      
train step #2250 accuracy: 0.90625, loss: 0.27878010272979736      
train step #2251 accuracy: 0.921875, loss: 0.2762264013290405       
train step #2252 accuracy: 0.953125, loss: 0.1437874138355255       
train step #2253 accuracy: 0.90625, loss: 0.3035724461078644       
train step #2254 accuracy: 0.9375, loss: 0.1933

train step #2361 accuracy: 0.984375, loss: 0.1458454579114914       
train step #2362 accuracy:   1.0, loss: 0.05402247980237007      
train step #2363 accuracy: 0.84375, loss: 0.5098816752433777       
train step #2364 accuracy: 0.953125, loss: 0.24122637510299683      
train step #2365 accuracy: 0.953125, loss: 0.20345669984817505      
train step #2366 accuracy: 0.96875, loss: 0.09456497430801392      
train step #2367 accuracy: 0.953125, loss: 0.2928115129470825       
train step #2368 accuracy: 0.953125, loss: 0.23230794072151184      
train step #2369 accuracy: 0.921875, loss: 0.34709545969963074      
train step #2370 accuracy: 0.921875, loss: 0.21594272553920746      
train step #2371 accuracy: 0.96875, loss: 0.0889139473438263       
train step #2372 accuracy: 0.890625, loss: 0.3311232924461365       
train step #2373 accuracy: 0.96875, loss: 0.09493383020162582      
train step #2374 accuracy: 0.953125, loss: 0.20020011067390442      
train step #2375 accuracy: 0.953125, loss

dev accuracy: 0.875, loss: 0.3604118227958679       
dev accuracy: 0.875, loss: 0.8419930338859558       
dev accuracy:   1.0, loss: 0.07299163937568665      
dev accuracy:   1.0, loss: 0.01773509383201599      
dev accuracy:   1.0, loss: 0.10796685516834259      
dev accuracy: 0.9375, loss: 0.19014370441436768      
dev accuracy: 0.9375, loss: 0.3610663414001465       
dev accuracy: 0.875, loss: 0.2218931019306183       
dev accuracy:   1.0, loss: 0.039105236530303955     
dev accuracy: 0.9375, loss: 0.10669785737991333      
dev accuracy: 0.8125, loss: 0.565527617931366        
dev accuracy: 0.9375, loss: 0.43672165274620056      
dev accuracy: 0.9375, loss: 0.2053942084312439       
dev accuracy: 0.875, loss: 0.2811514735221863       
dev accuracy: 0.8125, loss: 0.8967019319534302       
dev accuracy: 0.9375, loss: 0.10607334971427917      
dev accuracy: 0.875, loss: 0.20062196254730225      
dev accuracy:   1.0, loss: 0.03627185523509979      
dev accuracy:   1.0, loss: 0.022162079

train step #2450 accuracy: 0.84375, loss: 0.5267018675804138       
train step #2451 accuracy: 0.921875, loss: 0.23073726892471313      
train step #2452 accuracy: 0.953125, loss: 0.22307565808296204      
train step #2453 accuracy: 0.953125, loss: 0.1744949221611023       
train step #2454 accuracy: 0.984375, loss: 0.08228781074285507      
train step #2455 accuracy: 0.921875, loss: 0.22355353832244873      
train step #2456 accuracy: 0.90625, loss: 0.2777535319328308       
train step #2457 accuracy:   1.0, loss: 0.10104800760746002      
train step #2458 accuracy: 0.96875, loss: 0.12518230080604553      
train step #2459 accuracy: 0.96875, loss: 0.22636079788208008      
train step #2460 accuracy: 0.9375, loss: 0.1704477220773697       
train step #2461 accuracy: 0.953125, loss: 0.16734765470027924      
train step #2462 accuracy: 0.984375, loss: 0.08370880782604218      
train step #2463 accuracy: 0.984375, loss: 0.10713957995176315      
train step #2464 accuracy: 0.953125, loss: 

train step #2570 accuracy: 0.9375, loss: 0.27945825457572937      
train step #2571 accuracy: 0.90625, loss: 0.2437368631362915       
train step #2572 accuracy: 0.96875, loss: 0.12854966521263123      
train step #2573 accuracy: 0.96875, loss: 0.12769150733947754      
train step #2574 accuracy: 0.9375, loss: 0.19582390785217285      
train step #2575 accuracy: 0.96875, loss: 0.17119964957237244      
train step #2576 accuracy: 0.9375, loss: 0.21632586419582367      
train step #2577 accuracy: 0.953125, loss: 0.13880442082881927      
train step #2578 accuracy: 0.953125, loss: 0.18090924620628357      
train step #2579 accuracy: 0.921875, loss: 0.21874406933784485      
train step #2580 accuracy: 0.921875, loss: 0.1958557814359665       
train step #2581 accuracy: 0.953125, loss: 0.0964122787117958       
train step #2582 accuracy: 0.9375, loss: 0.2347986251115799       
train step #2583 accuracy: 0.953125, loss: 0.13754555583000183      
train step #2584 accuracy: 0.90625, loss: 0.36

train step #2691 accuracy: 0.953125, loss: 0.22542870044708252      
train step #2692 accuracy: 0.9375, loss: 0.20791347324848175      
train step #2693 accuracy: 0.984375, loss: 0.10692887008190155      
train step #2694 accuracy: 0.96875, loss: 0.17399384081363678      
train step #2695 accuracy: 0.953125, loss: 0.19860410690307617      
train step #2696 accuracy: 0.9375, loss: 0.19255012273788452      
train step #2697 accuracy: 0.859375, loss: 0.30920663475990295      
train step #2698 accuracy: 0.953125, loss: 0.23443014919757843      
train step #2699 accuracy: 0.90625, loss: 0.2513686418533325       
train step #2700 accuracy: 0.859375, loss: 0.41801393032073975      
train step #2701 accuracy: 0.875, loss: 0.4980396032333374       
train step #2702 accuracy: 0.9375, loss: 0.22564321756362915      
train step #2703 accuracy: 0.953125, loss: 0.21567173302173615      
train step #2704 accuracy: 0.96875, loss: 0.12264379858970642      
train step #2705 accuracy: 0.828125, loss: 0.4

dev accuracy:   1.0, loss: 0.013109743595123291     
dev accuracy: 0.9375, loss: 0.15431305766105652      
dev accuracy: 0.9375, loss: 0.10746157169342041      
dev accuracy: 0.8125, loss: 0.8856987953186035       
dev accuracy:   1.0, loss: 0.04645121097564697      
dev accuracy:   1.0, loss: 0.005660742521286011     
dev accuracy: 0.9375, loss: 0.1532074511051178       
dev accuracy:   1.0, loss: 0.12436144053936005      
dev accuracy:  0.75, loss: 1.21307373046875         
dev accuracy: 0.9375, loss: 0.16380640864372253      
dev accuracy: 0.875, loss: 0.33315712213516235      
dev accuracy:   1.0, loss: 0.08373197913169861      
dev accuracy: 0.9375, loss: 0.14918747544288635      
dev accuracy: 0.875, loss: 0.21155264973640442      
dev accuracy: 0.875, loss: 0.18247991800308228      
dev accuracy: 0.9375, loss: 0.19773033261299133      
dev accuracy: 0.875, loss: 0.22855597734451294      
dev accuracy: 0.9375, loss: 0.21534907817840576      
dev accuracy:   1.0, loss: 0.068217128

train step #2780 accuracy: 0.921875, loss: 0.21029017865657806      
train step #2781 accuracy: 0.90625, loss: 0.3040958046913147       
train step #2782 accuracy: 0.90625, loss: 0.31018248200416565      
train step #2783 accuracy: 0.953125, loss: 0.2213825285434723       
train step #2784 accuracy: 0.953125, loss: 0.17187795042991638      
train step #2785 accuracy: 0.9375, loss: 0.20912164449691772      
train step #2786 accuracy: 0.9375, loss: 0.20568445324897766      
train step #2787 accuracy: 0.96875, loss: 0.08410293608903885      
train step #2788 accuracy: 0.953125, loss: 0.17092131078243256      
train step #2789 accuracy: 0.921875, loss: 0.21225351095199585      
train step #2790 accuracy: 0.96875, loss: 0.12703359127044678      
train step #2791 accuracy: 0.96875, loss: 0.1428348869085312       
train step #2792 accuracy: 0.90625, loss: 0.24289144575595856      
train step #2793 accuracy: 0.953125, loss: 0.18342013657093048      
train step #2794 accuracy: 0.9375, loss: 0.1

train step #2900 accuracy: 0.96875, loss: 0.1544559746980667       
train step #2901 accuracy: 0.984375, loss: 0.06766197830438614      
train step #2902 accuracy: 0.921875, loss: 0.276180624961853        
train step #2903 accuracy: 0.9375, loss: 0.20499414205551147      
train step #2904 accuracy:   1.0, loss: 0.0844520553946495       
train step #2905 accuracy: 0.96875, loss: 0.17689135670661926      
train step #2906 accuracy: 0.90625, loss: 0.31885185837745667      
train step #2907 accuracy: 0.9375, loss: 0.243546724319458        
train step #2908 accuracy: 0.984375, loss: 0.10833334177732468      
train step #2909 accuracy: 0.9375, loss: 0.21257808804512024      
train step #2910 accuracy: 0.953125, loss: 0.18035437166690826      
train step #2911 accuracy: 0.921875, loss: 0.33759164810180664      
train step #2912 accuracy: 0.984375, loss: 0.13688014447689056      
train step #2913 accuracy: 0.9375, loss: 0.2534477114677429       
train step #2914 accuracy: 0.953125, loss: 0.206

train step #3020 accuracy: 0.9375, loss: 0.31369900703430176      
train step #3021 accuracy: 0.921875, loss: 0.16383524239063263      
train step #3022 accuracy: 0.9375, loss: 0.20915141701698303      
train step #3023 accuracy: 0.890625, loss: 0.454181432723999        
train step #3024 accuracy: 0.9375, loss: 0.2692543864250183       
train step #3025 accuracy: 0.9375, loss: 0.2162887156009674       
train step #3026 accuracy: 0.96875, loss: 0.12333410233259201      
train step #3027 accuracy: 0.921875, loss: 0.25510966777801514      
train step #3028 accuracy: 0.953125, loss: 0.20500481128692627      
train step #3029 accuracy: 0.953125, loss: 0.18225215375423431      
train step #3030 accuracy: 0.96875, loss: 0.06256145238876343      
train step #3031 accuracy: 0.921875, loss: 0.26157504320144653      
train step #3032 accuracy:   1.0, loss: 0.03302788361907005      
train step #3033 accuracy: 0.984375, loss: 0.06684338301420212      
train step #3034 accuracy: 0.953125, loss: 0.16

dev accuracy: 0.9375, loss: 0.15523570775985718      
dev accuracy:   1.0, loss: 0.03770849108695984      
dev accuracy: 0.875, loss: 0.1969819813966751       
dev accuracy:   1.0, loss: 0.04541853070259094      
dev accuracy:   1.0, loss: 0.013702094554901123     
dev accuracy: 0.8125, loss: 0.4611934423446655       
dev accuracy: 0.875, loss: 0.2894343137741089       
dev accuracy: 0.9375, loss: 0.4462059438228607       
dev accuracy: 0.8125, loss: 0.3517889976501465       
dev accuracy:   1.0, loss: 0.04793006181716919      
dev accuracy: 0.9375, loss: 0.0836864709854126       
dev accuracy:   1.0, loss: 0.03719690442085266      
dev accuracy: 0.9375, loss: 0.2318563014268875       
dev accuracy:   1.0, loss: 0.0017844438552856445    
dev accuracy:   1.0, loss: 0.09698036313056946      
dev accuracy: 0.9375, loss: 0.22719994187355042      
dev accuracy:   1.0, loss: 0.05993497371673584      
dev accuracy: 0.9375, loss: 0.07870447635650635      
dev accuracy: 0.9375, loss: 0.16855195

dev accuracy:   1.0, loss: 0.05579903721809387      
dev accuracy:  0.75, loss: 0.41246163845062256      
dev accuracy: 0.875, loss: 0.2187284231185913       
dev accuracy:   1.0, loss: 0.007580280303955078     
dev accuracy: 0.875, loss: 0.326755553483963        
dev accuracy: 0.9375, loss: 0.2082662284374237       
dev accuracy: 0.875, loss: 0.4017694890499115       
dev accuracy:   1.0, loss: 0.03406389057636261      
dev accuracy: 0.9375, loss: 0.26837313175201416      
dev accuracy:   1.0, loss: 0.023754805326461792     
dev accuracy:   1.0, loss: 0.04785206913948059      
dev accuracy:   1.0, loss: 0.06384193897247314      
dev accuracy:   1.0, loss: 0.008936047554016113     
dev accuracy:   1.0, loss: 0.05725647509098053      
dev accuracy: 0.875, loss: 0.13625556230545044      
dev accuracy:   1.0, loss: 0.035515040159225464     
dev accuracy: 0.9375, loss: 0.175498366355896        
dev accuracy:   1.0, loss: 0.11223894357681274      
dev accuracy:   1.0, loss: 0.00361728668212

train step #3229 accuracy: 0.9375, loss: 0.2611315846443176       
train step #3230 accuracy: 0.9375, loss: 0.2147621214389801       
train step #3231 accuracy: 0.921875, loss: 0.3051956593990326       
train step #3232 accuracy: 0.953125, loss: 0.19230154156684875      
train step #3233 accuracy: 0.984375, loss: 0.10807580500841141      
train step #3234 accuracy: 0.9375, loss: 0.20683294534683228      
train step #3235 accuracy: 0.96875, loss: 0.08761800080537796      
train step #3236 accuracy: 0.953125, loss: 0.2000138908624649       
train step #3237 accuracy: 0.9375, loss: 0.19695596396923065      
train step #3238 accuracy: 0.96875, loss: 0.06945127248764038      
train step #3239 accuracy: 0.9375, loss: 0.2193683534860611       
train step #3240 accuracy: 0.953125, loss: 0.14929339289665222      
train step #3241 accuracy: 0.984375, loss: 0.03853510692715645      
train step #3242 accuracy: 0.953125, loss: 0.3053176999092102       
train step #3243 accuracy: 0.90625, loss: 0.27

train step #3349 accuracy: 0.953125, loss: 0.12404023110866547      
train step #3350 accuracy: 0.984375, loss: 0.10310447216033936      
train step #3351 accuracy: 0.984375, loss: 0.09925378859043121      
train step #3352 accuracy: 0.984375, loss: 0.1308746337890625       
train step #3353 accuracy: 0.96875, loss: 0.10744491964578629      
train step #3354 accuracy: 0.984375, loss: 0.05514006316661835      
train step #3355 accuracy: 0.9375, loss: 0.28121352195739746      
train step #3356 accuracy: 0.9375, loss: 0.18207788467407227      
train step #3357 accuracy: 0.984375, loss: 0.11426225304603577      
train step #3358 accuracy: 0.9375, loss: 0.15169765055179596      
train step #3359 accuracy: 0.984375, loss: 0.11361058801412582      
train step #3360 accuracy: 0.953125, loss: 0.1866425722837448       
train step #3361 accuracy: 0.953125, loss: 0.1803579330444336       
train step #3362 accuracy: 0.953125, loss: 0.13198243081569672      
train step #3363 accuracy:   1.0, loss: 0

train step #3470 accuracy: 0.96875, loss: 0.1583373099565506       
dev accuracy: 0.9375, loss: 0.46238839626312256      
dev accuracy: 0.9375, loss: 0.08516120910644531      
dev accuracy: 0.9375, loss: 0.07840868830680847      
dev accuracy:   1.0, loss: 0.0735611766576767       
dev accuracy: 0.8125, loss: 0.6548024415969849       
dev accuracy: 0.9375, loss: 0.37341657280921936      
dev accuracy: 0.8125, loss: 0.3123745620250702       
dev accuracy:   1.0, loss: 0.026959240436553955     
dev accuracy:   1.0, loss: 0.0038364529609680176    
dev accuracy:   1.0, loss: 0.02418971061706543      
dev accuracy: 0.9375, loss: 0.10565352439880371      
dev accuracy:   1.0, loss: 0.0006976723670959473    
dev accuracy:   1.0, loss: 0.0069983601570129395    
dev accuracy:   1.0, loss: 0.023881077766418457     
dev accuracy:   1.0, loss: 0.011651933193206787     
dev accuracy:   1.0, loss: 0.017617106437683105     
dev accuracy: 0.875, loss: 0.5232993364334106       
dev accuracy:   1.0, los

dev accuracy:   1.0, loss: 0.004177898168563843     
dev accuracy:   1.0, loss: 0.164450004696846        
dev accuracy:   1.0, loss: 0.06502178311347961      
dev accuracy: 0.8125, loss: 0.4591144025325775       
dev accuracy: 0.9375, loss: 0.09545236825942993      
dev accuracy: 0.9375, loss: 0.3923204839229584       
dev accuracy: 0.8125, loss: 0.5025646686553955       
dev accuracy: 0.8125, loss: 0.37625861167907715      
dev accuracy: 0.9375, loss: 0.10927307605743408      
dev accuracy:   1.0, loss: 0.04448208212852478      
dev accuracy:   1.0, loss: 0.0020351409912109375    
dev accuracy:   1.0, loss: 0.14774905145168304      
dev accuracy: 0.9375, loss: 0.12545956671237946      
dev accuracy:   1.0, loss: 0.007784098386764526     
dev accuracy: 0.9375, loss: 0.19770877063274384      
dev accuracy:   1.0, loss: 0.0435217022895813       
dev accuracy:   1.0, loss: 0.02350333333015442      
dev accuracy: 0.9375, loss: 0.09560823440551758      
dev accuracy:   1.0, loss: 0.02095174

train step #3559 accuracy: 0.984375, loss: 0.08426696807146072      
train step #3560 accuracy: 0.9375, loss: 0.1206575334072113       
train step #3561 accuracy: 0.96875, loss: 0.21307480335235596      
train step #3562 accuracy: 0.984375, loss: 0.039953235536813736     
train step #3563 accuracy: 0.953125, loss: 0.1731404811143875       
train step #3564 accuracy: 0.96875, loss: 0.14382155239582062      
train step #3565 accuracy: 0.96875, loss: 0.0953931212425232       
train step #3566 accuracy: 0.984375, loss: 0.058731116354465485     
train step #3567 accuracy: 0.96875, loss: 0.10193914920091629      
train step #3568 accuracy: 0.96875, loss: 0.1436309963464737       
train step #3569 accuracy:   1.0, loss: 0.036507926881313324     
train step #3570 accuracy: 0.921875, loss: 0.29696810245513916      
train step #3571 accuracy: 0.96875, loss: 0.14635375142097473      
train step #3572 accuracy: 0.921875, loss: 0.27320706844329834      
train step #3573 accuracy: 0.96875, loss: 0.1

train step #3680 accuracy: 0.96875, loss: 0.11835940927267075      
train step #3681 accuracy: 0.96875, loss: 0.22488607466220856      
train step #3682 accuracy: 0.984375, loss: 0.05411569029092789      
train step #3683 accuracy: 0.96875, loss: 0.1885293871164322       
train step #3684 accuracy: 0.953125, loss: 0.1458788365125656       
train step #3685 accuracy: 0.875, loss: 0.2718968987464905       
train step #3686 accuracy: 0.90625, loss: 0.3413228392601013       
train step #3687 accuracy: 0.953125, loss: 0.13558727502822876      
train step #3688 accuracy: 0.9375, loss: 0.1517074555158615       
train step #3689 accuracy: 0.96875, loss: 0.12015952169895172      
train step #3690 accuracy: 0.953125, loss: 0.13407252728939056      
train step #3691 accuracy: 0.953125, loss: 0.23234188556671143      
train step #3692 accuracy: 0.96875, loss: 0.13179491460323334      
train step #3693 accuracy: 0.9375, loss: 0.18404598534107208      
train step #3694 accuracy: 0.984375, loss: 0.08

train step #3801 accuracy: 0.984375, loss: 0.05054135620594025      
train step #3802 accuracy:   1.0, loss: 0.03319180756807327      
train step #3803 accuracy: 0.953125, loss: 0.1268559843301773       
train step #3804 accuracy: 0.96875, loss: 0.06762368977069855      
train step #3805 accuracy: 0.96875, loss: 0.11086992919445038      
train step #3806 accuracy: 0.984375, loss: 0.10124948620796204      
train step #3807 accuracy: 0.96875, loss: 0.17003993690013885      
train step #3808 accuracy: 0.96875, loss: 0.08687011152505875      
train step #3809 accuracy: 0.9375, loss: 0.2504938840866089       
train step #3810 accuracy: 0.96875, loss: 0.09258878976106644      
train step #3811 accuracy: 0.953125, loss: 0.1871068775653839       
train step #3812 accuracy: 0.96875, loss: 0.15247689187526703      
train step #3813 accuracy: 0.984375, loss: 0.08531872183084488      
train step #3814 accuracy: 0.96875, loss: 0.15148814022541046      
train step #3815 accuracy:   1.0, loss: 0.0309

dev accuracy: 0.875, loss: 0.18288134038448334      
dev accuracy:   1.0, loss: 0.06609082221984863      
dev accuracy:   1.0, loss: 0.17860618233680725      
dev accuracy: 0.875, loss: 0.16078117489814758      
dev accuracy: 0.9375, loss: 0.0940917432308197       
dev accuracy: 0.9375, loss: 0.31914740800857544      
dev accuracy: 0.9375, loss: 0.06139940023422241      
dev accuracy:   1.0, loss: 0.014971762895584106     
dev accuracy: 0.9375, loss: 0.21491451561450958      
dev accuracy: 0.9375, loss: 0.12844493985176086      
dev accuracy: 0.9375, loss: 0.31803175806999207      
dev accuracy:   1.0, loss: 0.006569623947143555     
dev accuracy:   1.0, loss: 0.007019162178039551     
dev accuracy: 0.9375, loss: 0.17475533485412598      
dev accuracy: 0.9375, loss: 0.10012134909629822      
dev accuracy:   1.0, loss: 0.008364617824554443     
dev accuracy: 0.9375, loss: 0.16497746109962463      
dev accuracy:   1.0, loss: 0.0054486095905303955    
dev accuracy: 0.875, loss: 0.52998673

train step #3890 accuracy: 0.9375, loss: 0.23084388673305511      
train step #3891 accuracy: 0.953125, loss: 0.2129623293876648       
train step #3892 accuracy: 0.984375, loss: 0.10155680775642395      
train step #3893 accuracy: 0.953125, loss: 0.12015719711780548      
train step #3894 accuracy: 0.9375, loss: 0.12934455275535583      
train step #3895 accuracy: 0.9375, loss: 0.28374606370925903      
train step #3896 accuracy: 0.96875, loss: 0.1402401477098465       
train step #3897 accuracy: 0.953125, loss: 0.08881061524152756      
train step #3898 accuracy: 0.9375, loss: 0.20694144070148468      
train step #3899 accuracy: 0.96875, loss: 0.1320151388645172       
train step #3900 accuracy: 0.9375, loss: 0.15320779383182526      
train step #3901 accuracy: 0.9375, loss: 0.24103385210037231      
train step #3902 accuracy:   1.0, loss: 0.05110858008265495      
train step #3903 accuracy: 0.953125, loss: 0.1720532774925232       
train step #3904 accuracy: 0.96875, loss: 0.0615991

train step #4011 accuracy:   1.0, loss: 0.016498588025569916     
train step #4012 accuracy: 0.953125, loss: 0.10763601958751678      
train step #4013 accuracy:   1.0, loss: 0.027046486735343933     
train step #4014 accuracy: 0.9375, loss: 0.2575177848339081       
train step #4015 accuracy: 0.984375, loss: 0.1356489658355713       
train step #4016 accuracy: 0.953125, loss: 0.19052153825759888      
train step #4017 accuracy: 0.921875, loss: 0.2625390291213989       
train step #4018 accuracy: 0.921875, loss: 0.27755987644195557      
train step #4019 accuracy:   1.0, loss: 0.06873574107885361      
train step #4020 accuracy: 0.984375, loss: 0.056685470044612885     
train step #4021 accuracy: 0.9375, loss: 0.22406920790672302      
train step #4022 accuracy: 0.9375, loss: 0.13431400060653687      
train step #4023 accuracy:   1.0, loss: 0.025258749723434448     
train step #4024 accuracy: 0.96875, loss: 0.15116623044013977      
train step #4025 accuracy: 0.984375, loss: 0.08390263

train step #4132 accuracy: 0.96875, loss: 0.10316977649927139      
train step #4133 accuracy: 0.953125, loss: 0.15889085829257965      
train step #4134 accuracy:   1.0, loss: 0.03855238854885101      
train step #4135 accuracy: 0.96875, loss: 0.1768135279417038       
train step #4136 accuracy: 0.96875, loss: 0.1777358204126358       
train step #4137 accuracy: 0.953125, loss: 0.12787675857543945      
train step #4138 accuracy: 0.96875, loss: 0.16748695075511932      
train step #4139 accuracy: 0.96875, loss: 0.13095518946647644      
train step #4140 accuracy: 0.96875, loss: 0.12833863496780396      
train step #4141 accuracy: 0.984375, loss: 0.0919640064239502       
train step #4142 accuracy: 0.9375, loss: 0.3194414973258972       
train step #4143 accuracy: 0.90625, loss: 0.39945507049560547      
train step #4144 accuracy: 0.984375, loss: 0.09355638921260834      
train step #4145 accuracy:   1.0, loss: 0.03234557807445526      
train step #4146 accuracy: 0.953125, loss: 0.1126

dev accuracy:   1.0, loss: 0.08565083146095276      
dev accuracy:   1.0, loss: 0.01144835352897644      
dev accuracy:   1.0, loss: 0.039845943450927734     
dev accuracy: 0.875, loss: 0.25061389803886414      
dev accuracy: 0.9375, loss: 0.12579280138015747      
dev accuracy: 0.9375, loss: 0.2347492128610611       
dev accuracy: 0.875, loss: 0.2241573929786682       
dev accuracy:   1.0, loss: 0.024410516023635864     
dev accuracy:   1.0, loss: 0.0018117427825927734    
dev accuracy: 0.875, loss: 0.37643975019454956      
dev accuracy:   1.0, loss: 0.09086428582668304      
dev accuracy:   1.0, loss: 0.061465173959732056     
dev accuracy:   1.0, loss: 0.0006180405616760254    
dev accuracy: 0.9375, loss: 0.41458895802497864      
dev accuracy: 0.9375, loss: 0.049928635358810425     
dev accuracy:   1.0, loss: 0.004205197095870972     
dev accuracy: 0.875, loss: 0.4799210727214813       
dev accuracy:   1.0, loss: 0.0008015632629394531    
dev accuracy: 0.875, loss: 0.3087798357009

train step #4221 accuracy: 0.96875, loss: 0.13511136174201965      
train step #4222 accuracy: 0.953125, loss: 0.15373186767101288      
train step #4223 accuracy:   1.0, loss: 0.02854049950838089      
train step #4224 accuracy: 0.984375, loss: 0.10303758829832077      
train step #4225 accuracy: 0.953125, loss: 0.14816182851791382      
train step #4226 accuracy: 0.984375, loss: 0.08871474862098694      
train step #4227 accuracy: 0.984375, loss: 0.07324448227882385      
train step #4228 accuracy: 0.96875, loss: 0.13076649606227875      
train step #4229 accuracy:   1.0, loss: 0.037475332617759705     
train step #4230 accuracy: 0.984375, loss: 0.10806995630264282      
train step #4231 accuracy: 0.953125, loss: 0.2838534116744995       
train step #4232 accuracy: 0.921875, loss: 0.24930573999881744      
train step #4233 accuracy: 0.984375, loss: 0.10063383728265762      
train step #4234 accuracy: 0.984375, loss: 0.07940340787172318      
train step #4235 accuracy: 0.921875, loss:

train step #4342 accuracy: 0.984375, loss: 0.11534465104341507      
train step #4343 accuracy: 0.90625, loss: 0.2756722569465637       
train step #4344 accuracy: 0.96875, loss: 0.07786349952220917      
train step #4345 accuracy: 0.96875, loss: 0.10235245525836945      
train step #4346 accuracy: 0.953125, loss: 0.21530696749687195      
train step #4347 accuracy: 0.984375, loss: 0.1130194142460823       
train step #4348 accuracy: 0.9375, loss: 0.16303062438964844      
train step #4349 accuracy: 0.96875, loss: 0.22909638285636902      
train step #4350 accuracy: 0.96875, loss: 0.13588756322860718      
train step #4351 accuracy: 0.96875, loss: 0.1714959442615509       
train step #4352 accuracy: 0.890625, loss: 0.27572616934776306      
train step #4353 accuracy: 0.953125, loss: 0.11454074829816818      
train step #4354 accuracy: 0.984375, loss: 0.041977658867836        
train step #4355 accuracy: 0.953125, loss: 0.1163717731833458       
train step #4356 accuracy: 0.984375, loss:

train step #4463 accuracy: 0.9375, loss: 0.262202650308609        
train step #4464 accuracy: 0.96875, loss: 0.12368176877498627      
train step #4465 accuracy: 0.984375, loss: 0.11516565829515457      
train step #4466 accuracy: 0.984375, loss: 0.09885182976722717      
train step #4467 accuracy: 0.984375, loss: 0.09380660951137543      
train step #4468 accuracy:   1.0, loss: 0.038348518311977386     
train step #4469 accuracy: 0.875, loss: 0.3906032145023346       
train step #4470 accuracy: 0.921875, loss: 0.18839436769485474      
train step #4471 accuracy: 0.984375, loss: 0.056465812027454376     
train step #4472 accuracy: 0.953125, loss: 0.1728716641664505       
train step #4473 accuracy:   1.0, loss: 0.02602892369031906      
train step #4474 accuracy: 0.953125, loss: 0.1303713023662567       
train step #4475 accuracy: 0.9375, loss: 0.2894901931285858       
train step #4476 accuracy: 0.890625, loss: 0.32206398248672485      
train step #4477 accuracy: 0.984375, loss: 0.088

dev accuracy: 0.875, loss: 0.6402831673622131       
dev accuracy:   1.0, loss: 0.06814226508140564      
dev accuracy: 0.9375, loss: 0.10582776367664337      
dev accuracy: 0.875, loss: 0.5743741393089294       
dev accuracy:   1.0, loss: 0.0005437135696411133    
dev accuracy: 0.9375, loss: 0.13516274094581604      
dev accuracy:   1.0, loss: 0.03757941722869873      
dev accuracy:   1.0, loss: 0.008365929126739502     
dev accuracy: 0.9375, loss: 0.32847633957862854      
dev accuracy: 0.9375, loss: 0.18961769342422485      
dev accuracy:   1.0, loss: 0.004434704780578613     
dev accuracy:   1.0, loss: 0.04294055700302124      
dev accuracy:   1.0, loss: 0.015132725238800049     
dev accuracy: 0.875, loss: 0.8960036039352417       
dev accuracy:   1.0, loss: 0.05061286687850952      
dev accuracy:   1.0, loss: 0.0014161467552185059    
dev accuracy: 0.9375, loss: 0.08415251970291138      
dev accuracy: 0.8125, loss: 0.5704669952392578       
dev accuracy:   1.0, loss: 0.01120659708

train step #4551 accuracy: 0.921875, loss: 0.25060340762138367      
train step #4552 accuracy: 0.984375, loss: 0.10730676352977753      
train step #4553 accuracy: 0.90625, loss: 0.26683443784713745      
train step #4554 accuracy: 0.96875, loss: 0.122425377368927        
train step #4555 accuracy: 0.96875, loss: 0.12211279571056366      
train step #4556 accuracy:   1.0, loss: 0.017552442848682404     
train step #4557 accuracy: 0.9375, loss: 0.19964180886745453      
train step #4558 accuracy: 0.96875, loss: 0.12392466515302658      
train step #4559 accuracy: 0.96875, loss: 0.0942886546254158       
train step #4560 accuracy: 0.9375, loss: 0.3219955265522003       
train step #4561 accuracy: 0.984375, loss: 0.08911438286304474      
train step #4562 accuracy: 0.953125, loss: 0.20620781183242798      
train step #4563 accuracy: 0.96875, loss: 0.16219907999038696      
train step #4564 accuracy: 0.984375, loss: 0.050460249185562134     
train step #4565 accuracy: 0.984375, loss: 0.07

train step #4672 accuracy: 0.96875, loss: 0.11495474725961685      
train step #4673 accuracy: 0.96875, loss: 0.0690605565905571       
train step #4674 accuracy: 0.96875, loss: 0.16801762580871582      
train step #4675 accuracy: 0.921875, loss: 0.2716233432292938       
train step #4676 accuracy: 0.984375, loss: 0.11955061554908752      
train step #4677 accuracy: 0.953125, loss: 0.22054405510425568      
train step #4678 accuracy: 0.984375, loss: 0.06388431787490845      
train step #4679 accuracy: 0.9375, loss: 0.25976914167404175      
train step #4680 accuracy: 0.9375, loss: 0.1251388043165207       
train step #4681 accuracy: 0.96875, loss: 0.11435248702764511      
train step #4682 accuracy: 0.984375, loss: 0.06993713229894638      
train step #4683 accuracy: 0.96875, loss: 0.12441820651292801      
train step #4684 accuracy: 0.984375, loss: 0.042651474475860596     
train step #4685 accuracy: 0.953125, loss: 0.16664625704288483      
train step #4686 accuracy: 0.921875, loss: 

train step #4793 accuracy: 0.953125, loss: 0.2072426825761795       
train step #4794 accuracy: 0.96875, loss: 0.09126567095518112      
train step #4795 accuracy: 0.9375, loss: 0.151482492685318        
train step #4796 accuracy:   1.0, loss: 0.048066750168800354     
train step #4797 accuracy: 0.96875, loss: 0.10850045830011368      
train step #4798 accuracy: 0.96875, loss: 0.10562703758478165      
train step #4799 accuracy: 0.984375, loss: 0.09084809571504593      
train step #4800 accuracy: 0.953125, loss: 0.2897529900074005       
train step #4801 accuracy: 0.9375, loss: 0.11354444175958633      
train step #4802 accuracy: 0.96875, loss: 0.11543406546115875      
train step #4803 accuracy:   1.0, loss: 0.03245566040277481      
train step #4804 accuracy: 0.953125, loss: 0.17760008573532104      
train step #4805 accuracy: 0.96875, loss: 0.12288714945316315      
train step #4806 accuracy: 0.96875, loss: 0.1777021288871765       
train step #4807 accuracy: 0.96875, loss: 0.050034

dev accuracy:   1.0, loss: 0.03830835223197937      
dev accuracy:   1.0, loss: 0.048653244972229004     
dev accuracy: 0.9375, loss: 0.22789211571216583      
dev accuracy:   1.0, loss: 0.04924607276916504      
dev accuracy:   1.0, loss: 0.041128456592559814     
dev accuracy: 0.9375, loss: 0.12117272615432739      
dev accuracy:   1.0, loss: 0.012014150619506836     
dev accuracy: 0.9375, loss: 0.1530616730451584       
dev accuracy: 0.9375, loss: 0.38479799032211304      
dev accuracy: 0.875, loss: 0.29625123739242554      
dev accuracy:   1.0, loss: 0.0061910152435302734    
dev accuracy:   1.0, loss: 0.004793226718902588     
dev accuracy: 0.875, loss: 0.3442337214946747       
dev accuracy:   1.0, loss: 0.003226727247238159     
dev accuracy: 0.875, loss: 0.16640815138816833      
dev accuracy:   1.0, loss: 0.011220663785934448     
dev accuracy: 0.8125, loss: 0.7238321900367737       
dev accuracy: 0.9375, loss: 0.13856077194213867      
dev accuracy: 0.875, loss: 0.28282934427

train step #4883 accuracy: 0.96875, loss: 0.12377788126468658      
train step #4884 accuracy: 0.953125, loss: 0.20185969769954681      
train step #4885 accuracy: 0.96875, loss: 0.1778212934732437       
train step #4886 accuracy: 0.96875, loss: 0.13668867945671082      
train step #4887 accuracy: 0.96875, loss: 0.2138916403055191       
train step #4888 accuracy: 0.96875, loss: 0.12117060273885727      
train step #4889 accuracy:   1.0, loss: 0.03776610642671585      
train step #4890 accuracy: 0.9375, loss: 0.17093728482723236      
train step #4891 accuracy: 0.9375, loss: 0.2386469542980194       
train step #4892 accuracy: 0.96875, loss: 0.11843361705541611      
train step #4893 accuracy: 0.96875, loss: 0.09457066655158997      
train step #4894 accuracy: 0.96875, loss: 0.09889949858188629      
train step #4895 accuracy: 0.96875, loss: 0.10378357023000717      
train step #4896 accuracy: 0.953125, loss: 0.140694260597229        
train step #4897 accuracy:   1.0, loss: 0.04864243

train step #5003 accuracy: 0.953125, loss: 0.09963131695985794      
train step #5004 accuracy: 0.9375, loss: 0.14836150407791138      
train step #5005 accuracy: 0.96875, loss: 0.17721664905548096      
train step #5006 accuracy: 0.96875, loss: 0.11254820972681046      
train step #5007 accuracy: 0.953125, loss: 0.18470820784568787      
train step #5008 accuracy: 0.96875, loss: 0.1697302907705307       
train step #5009 accuracy: 0.96875, loss: 0.17338815331459045      
train step #5010 accuracy: 0.984375, loss: 0.07731717824935913      
train step #5011 accuracy:   1.0, loss: 0.04576132446527481      
train step #5012 accuracy: 0.984375, loss: 0.047980379313230515     
train step #5013 accuracy: 0.96875, loss: 0.08840036392211914      
train step #5014 accuracy: 0.984375, loss: 0.07353799790143967      
train step #5015 accuracy: 0.953125, loss: 0.2989013195037842       
train step #5016 accuracy: 0.984375, loss: 0.10911531746387482      
train step #5017 accuracy: 0.984375, loss: 0

train step #5124 accuracy: 0.96875, loss: 0.09036459773778915      
train step #5125 accuracy: 0.953125, loss: 0.19660677015781403      
train step #5126 accuracy: 0.96875, loss: 0.13431183993816376      
train step #5127 accuracy: 0.984375, loss: 0.13915768265724182      
train step #5128 accuracy:   1.0, loss: 0.026291411370038986     
train step #5129 accuracy: 0.9375, loss: 0.14962482452392578      
train step #5130 accuracy: 0.953125, loss: 0.17430861294269562      
train step #5131 accuracy: 0.984375, loss: 0.06634679436683655      
train step #5132 accuracy:   1.0, loss: 0.04925929382443428      
train step #5133 accuracy: 0.984375, loss: 0.06909380853176117      
train step #5134 accuracy: 0.953125, loss: 0.12394096702337265      
train step #5135 accuracy: 0.984375, loss: 0.041738688945770264     
train step #5136 accuracy: 0.9375, loss: 0.21106301248073578      
train step #5137 accuracy: 0.921875, loss: 0.23751144111156464      
train step #5138 accuracy: 0.9375, loss: 0.142

dev accuracy: 0.875, loss: 0.28015613555908203      
dev accuracy: 0.875, loss: 0.2977180480957031       
dev accuracy: 0.9375, loss: 0.14497771859169006      
dev accuracy:   1.0, loss: 0.007581353187561035     
dev accuracy: 0.9375, loss: 0.13477560877799988      
dev accuracy:   1.0, loss: 0.004276752471923828     
dev accuracy:   1.0, loss: 0.05232959985733032      
dev accuracy: 0.9375, loss: 0.051752716302871704     
dev accuracy: 0.875, loss: 0.21665021777153015      
dev accuracy:   1.0, loss: 0.01994583010673523      
dev accuracy: 0.875, loss: 0.4927634000778198       
dev accuracy:   1.0, loss: 0.012760430574417114     
dev accuracy: 0.9375, loss: 0.13471314311027527      
dev accuracy: 0.9375, loss: 0.1931338906288147       
dev accuracy:   1.0, loss: 0.002848803997039795     
dev accuracy: 0.875, loss: 0.321913480758667        
dev accuracy: 0.9375, loss: 0.21730878949165344      
dev accuracy:   1.0, loss: 0.046158552169799805     
dev accuracy:   1.0, loss: 0.01837021112

train step #5212 accuracy: 0.96875, loss: 0.11471433937549591      
train step #5213 accuracy: 0.90625, loss: 0.22877031564712524      
train step #5214 accuracy: 0.9375, loss: 0.2189374417066574       
train step #5215 accuracy: 0.921875, loss: 0.23160859942436218      
train step #5216 accuracy:   1.0, loss: 0.04235924407839775      
train step #5217 accuracy: 0.9375, loss: 0.18681472539901733      
train step #5218 accuracy: 0.953125, loss: 0.14859679341316223      
train step #5219 accuracy: 0.9375, loss: 0.2092306911945343       
train step #5220 accuracy: 0.984375, loss: 0.0413106344640255       
train step #5221 accuracy: 0.953125, loss: 0.20264551043510437      
train step #5222 accuracy: 0.953125, loss: 0.17222054302692413      
train step #5223 accuracy: 0.953125, loss: 0.11801742017269135      
train step #5224 accuracy:   1.0, loss: 0.027958832681179047     
train step #5225 accuracy: 0.953125, loss: 0.18798434734344482      
train step #5226 accuracy: 0.96875, loss: 0.0976

train step #5332 accuracy: 0.953125, loss: 0.16336661577224731      
train step #5333 accuracy: 0.953125, loss: 0.13188232481479645      
train step #5334 accuracy: 0.9375, loss: 0.24321550130844116      
train step #5335 accuracy:   1.0, loss: 0.09022980183362961      
train step #5336 accuracy: 0.9375, loss: 0.1258690357208252       
train step #5337 accuracy: 0.9375, loss: 0.20279407501220703      
train step #5338 accuracy: 0.953125, loss: 0.35095077753067017      
train step #5339 accuracy: 0.953125, loss: 0.19798791408538818      
train step #5340 accuracy: 0.96875, loss: 0.09318052977323532      
train step #5341 accuracy:   1.0, loss: 0.029455646872520447     
train step #5342 accuracy: 0.9375, loss: 0.2587393820285797       
train step #5343 accuracy: 0.953125, loss: 0.14546769857406616      
train step #5344 accuracy:   1.0, loss: 0.026381880044937134     
train step #5345 accuracy: 0.984375, loss: 0.051442138850688934     
train step #5346 accuracy:   1.0, loss: 0.0460480116

train step #5453 accuracy: 0.96875, loss: 0.16853326559066772      
train step #5454 accuracy: 0.953125, loss: 0.18540948629379272      
train step #5455 accuracy: 0.984375, loss: 0.06751817464828491      
train step #5456 accuracy: 0.96875, loss: 0.09102292358875275      
train step #5457 accuracy: 0.96875, loss: 0.0881003588438034       
train step #5458 accuracy: 0.953125, loss: 0.10960527509450912      
train step #5459 accuracy: 0.96875, loss: 0.16270852088928223      
train step #5460 accuracy: 0.953125, loss: 0.1646304726600647       
train step #5461 accuracy: 0.96875, loss: 0.1478610783815384       
train step #5462 accuracy: 0.953125, loss: 0.12201903015375137      
train step #5463 accuracy: 0.96875, loss: 0.12625117599964142      
train step #5464 accuracy: 0.96875, loss: 0.22903941571712494      
train step #5465 accuracy: 0.96875, loss: 0.14298617839813232      
train step #5466 accuracy: 0.96875, loss: 0.16065990924835205      
train step #5467 accuracy: 0.90625, loss: 0

dev accuracy:   1.0, loss: 0.00666472315788269      
dev accuracy:   1.0, loss: 0.013289839029312134     
dev accuracy:   1.0, loss: 0.0064723193645477295    
dev accuracy: 0.9375, loss: 0.10335728526115417      
dev accuracy: 0.9375, loss: 0.17077291011810303      
dev accuracy: 0.9375, loss: 0.1951839029788971       
dev accuracy: 0.9375, loss: 0.2760511040687561       
dev accuracy: 0.875, loss: 0.7465400695800781       
dev accuracy: 0.875, loss: 0.36847782135009766      
dev accuracy: 0.9375, loss: 0.13076052069664001      
dev accuracy:   1.0, loss: 0.00464966893196106      
dev accuracy:   1.0, loss: 0.011913150548934937     
dev accuracy: 0.9375, loss: 0.5311250686645508       
dev accuracy:   1.0, loss: 0.0036867260932922363    
dev accuracy: 0.9375, loss: 0.16135790944099426      
dev accuracy: 0.8125, loss: 0.7712477445602417       
dev accuracy:   1.0, loss: 0.04744940996170044      
dev accuracy:   1.0, loss: 0.08154013752937317      
dev accuracy: 0.9375, loss: 0.23186886

dev accuracy:   1.0, loss: 0.026924967765808105     
dev accuracy:   1.0, loss: 0.012370914220809937     
dev accuracy:   1.0, loss: 0.12029567360877991      
dev accuracy: 0.9375, loss: 0.0912696123123169       
dev accuracy: 0.875, loss: 0.3876596987247467       
dev accuracy:   1.0, loss: 0.02726636826992035      
dev accuracy:   1.0, loss: 0.03758569061756134      
dev accuracy:   1.0, loss: 0.013623058795928955     
dev accuracy: 0.875, loss: 0.46620887517929077      
dev accuracy: 0.9375, loss: 0.20789484679698944      
dev accuracy: 0.9375, loss: 0.06753131747245789      
dev accuracy:   1.0, loss: 0.00046253204345703125   
final dev accuracy: 0.9510309278350515
train step #5553 accuracy: 0.9375, loss: 0.15859197080135345      
train step #5554 accuracy:   1.0, loss: 0.033583786338567734     
train step #5555 accuracy: 0.9375, loss: 0.16252782940864563      
train step #5556 accuracy: 0.984375, loss: 0.06792927533388138      
train step #5557 accuracy: 0.984375, loss: 0.05077045

train step #5664 accuracy: 0.96875, loss: 0.161833256483078        
train step #5665 accuracy: 0.96875, loss: 0.1557353436946869       
train step #5666 accuracy: 0.953125, loss: 0.16538596153259277      
train step #5667 accuracy: 0.984375, loss: 0.0559745654463768       
train step #5668 accuracy: 0.96875, loss: 0.10187430679798126      
train step #5669 accuracy: 0.984375, loss: 0.04988178238272667      
train step #5670 accuracy: 0.875, loss: 0.323489785194397        
train step #5671 accuracy:   1.0, loss: 0.030677393078804016     
train step #5672 accuracy: 0.921875, loss: 0.2678929567337036       
train step #5673 accuracy: 0.953125, loss: 0.11786240339279175      
train step #5674 accuracy: 0.96875, loss: 0.11249373853206635      
train step #5675 accuracy: 0.953125, loss: 0.09299282729625702      
train step #5676 accuracy: 0.9375, loss: 0.22160236537456512      
train step #5677 accuracy: 0.984375, loss: 0.05126003921031952      
train step #5678 accuracy: 0.96875, loss: 0.13

train step #5785 accuracy: 0.96875, loss: 0.10516121983528137      
train step #5786 accuracy: 0.921875, loss: 0.2953091859817505       
train step #5787 accuracy: 0.953125, loss: 0.1546696573495865       
train step #5788 accuracy: 0.984375, loss: 0.10949595272541046      
train step #5789 accuracy: 0.9375, loss: 0.15404930710792542      
train step #5790 accuracy: 0.984375, loss: 0.12595218420028687      
train step #5791 accuracy:   1.0, loss: 0.05310835689306259      
train step #5792 accuracy:   1.0, loss: 0.036905501037836075     
train step #5793 accuracy: 0.953125, loss: 0.17823953926563263      
train step #5794 accuracy: 0.96875, loss: 0.13162349164485931      
train step #5795 accuracy: 0.953125, loss: 0.11189203709363937      
train step #5796 accuracy: 0.984375, loss: 0.09589673578739166      
train step #5797 accuracy: 0.984375, loss: 0.03939260542392731      
train step #5798 accuracy:   1.0, loss: 0.026010222733020782     
train step #5799 accuracy: 0.953125, loss: 0.17

dev accuracy: 0.8125, loss: 0.3682008385658264       
dev accuracy:   1.0, loss: 0.0635019838809967       
dev accuracy: 0.9375, loss: 0.18562525510787964      
dev accuracy:   1.0, loss: 0.04396381974220276      
dev accuracy: 0.875, loss: 0.16721628606319427      
dev accuracy: 0.9375, loss: 0.1687123030424118       
dev accuracy: 0.9375, loss: 0.3093530833721161       
dev accuracy: 0.9375, loss: 0.20277021825313568      
dev accuracy: 0.9375, loss: 0.05707278847694397      
dev accuracy: 0.9375, loss: 0.18749459087848663      
dev accuracy: 0.875, loss: 0.3450584411621094       
dev accuracy:   1.0, loss: 0.022301852703094482     
dev accuracy:   1.0, loss: 0.011204510927200317     
dev accuracy: 0.9375, loss: 0.3591509759426117       
dev accuracy:   1.0, loss: 0.03277894854545593      
dev accuracy:   1.0, loss: 0.0046446919441223145    
dev accuracy:   1.0, loss: 0.015978634357452393     
dev accuracy:   1.0, loss: 0.0010006427764892578    
dev accuracy:   1.0, loss: 0.002697348

dev accuracy: 0.9375, loss: 0.19277344644069672      
dev accuracy:   1.0, loss: 0.037126749753952026     
dev accuracy:   1.0, loss: 0.05552339553833008      
dev accuracy:   1.0, loss: 0.010006368160247803     
dev accuracy: 0.875, loss: 0.6096985936164856       
dev accuracy:   1.0, loss: 0.08956032991409302      
dev accuracy: 0.875, loss: 0.23545362055301666      
dev accuracy: 0.9375, loss: 0.0656270682811737       
dev accuracy:   1.0, loss: 0.006414473056793213     
dev accuracy:   1.0, loss: 0.004535973072052002     
dev accuracy: 0.9375, loss: 0.2138020545244217       
dev accuracy: 0.875, loss: 0.16188926994800568      
dev accuracy: 0.875, loss: 0.48025253415107727      
dev accuracy:   1.0, loss: 0.006284236907958984     
dev accuracy: 0.9375, loss: 0.478180855512619        
dev accuracy:   1.0, loss: 0.07928770780563354      
dev accuracy:   1.0, loss: 0.008270442485809326     
dev accuracy: 0.9375, loss: 0.08214619755744934      
dev accuracy: 0.875, loss: 0.550263583660

train step #5995 accuracy: 0.953125, loss: 0.15958593785762787      
train step #5996 accuracy: 0.9375, loss: 0.14382441341876984      
train step #5997 accuracy: 0.9375, loss: 0.17872992157936096      
train step #5998 accuracy: 0.9375, loss: 0.406980037689209        
train step #5999 accuracy: 0.953125, loss: 0.20463746786117554      
train step #6000 accuracy: 0.96875, loss: 0.15600769221782684      
changing learning rate to 0.001
train step #6001 accuracy: 0.984375, loss: 0.06783555448055267      
train step #6002 accuracy: 0.984375, loss: 0.03793018311262131      
train step #6003 accuracy: 0.953125, loss: 0.20075950026512146      
train step #6004 accuracy: 0.96875, loss: 0.14176851511001587      
train step #6005 accuracy: 0.984375, loss: 0.07213695347309113      
train step #6006 accuracy: 0.9375, loss: 0.3469787836074829       
train step #6007 accuracy: 0.96875, loss: 0.10817914456129074      
train step #6008 accuracy: 0.984375, loss: 0.08231132477521896      
train step #6

train step #6115 accuracy: 0.9375, loss: 0.2632633447647095       
train step #6116 accuracy: 0.96875, loss: 0.08344903588294983      
train step #6117 accuracy: 0.984375, loss: 0.07458707690238953      
train step #6118 accuracy: 0.984375, loss: 0.09035255014896393      
train step #6119 accuracy: 0.96875, loss: 0.09990941733121872      
train step #6120 accuracy:   1.0, loss: 0.025484688580036163     
train step #6121 accuracy: 0.984375, loss: 0.05977160856127739      
train step #6122 accuracy: 0.90625, loss: 0.2839199900627136       
train step #6123 accuracy:   1.0, loss: 0.05026429519057274      
train step #6124 accuracy: 0.96875, loss: 0.07528889179229736      
train step #6125 accuracy: 0.953125, loss: 0.19331786036491394      
train step #6126 accuracy: 0.953125, loss: 0.20961910486221313      
train step #6127 accuracy: 0.984375, loss: 0.08748769015073776      
train step #6128 accuracy:   1.0, loss: 0.031339287757873535     
train step #6129 accuracy: 0.96875, loss: 0.06762

train step #6236 accuracy: 0.96875, loss: 0.1077486053109169       
train step #6237 accuracy: 0.96875, loss: 0.09408771991729736      
train step #6238 accuracy: 0.96875, loss: 0.15129512548446655      
train step #6239 accuracy: 0.953125, loss: 0.29725179076194763      
train step #6240 accuracy: 0.953125, loss: 0.18939171731472015      
train step #6241 accuracy: 0.953125, loss: 0.25305885076522827      
train step #6242 accuracy: 0.984375, loss: 0.059478722512722015     
train step #6243 accuracy: 0.96875, loss: 0.16162839531898499      
train step #6244 accuracy: 0.9375, loss: 0.1911977380514145       
train step #6245 accuracy: 0.984375, loss: 0.10133080184459686      
train step #6246 accuracy: 0.984375, loss: 0.052407875657081604     
dev accuracy:   1.0, loss: 0.011846780776977539     
dev accuracy:   1.0, loss: 0.02322348952293396      
dev accuracy:   1.0, loss: 0.10081607103347778      
dev accuracy:   1.0, loss: 0.002763509750366211     
dev accuracy:   1.0, loss: 0.029685

dev accuracy:   1.0, loss: 0.020976901054382324     
dev accuracy:   1.0, loss: 0.0031508207321166992    
dev accuracy: 0.875, loss: 0.36079853773117065      
dev accuracy:   1.0, loss: 0.07528644800186157      
dev accuracy: 0.9375, loss: 0.20121556520462036      
dev accuracy:   1.0, loss: 0.014987021684646606     
dev accuracy: 0.9375, loss: 0.12093770503997803      
dev accuracy: 0.9375, loss: 0.22082066535949707      
dev accuracy:   1.0, loss: 0.04633456468582153      
dev accuracy: 0.9375, loss: 0.10823707282543182      
dev accuracy: 0.9375, loss: 0.20745517313480377      
dev accuracy:   1.0, loss: 0.04125630855560303      
dev accuracy: 0.9375, loss: 0.4099663496017456       
dev accuracy: 0.875, loss: 0.444250226020813        
dev accuracy:   1.0, loss: 0.0007790327072143555    
dev accuracy: 0.9375, loss: 0.28085747361183167      
dev accuracy:   1.0, loss: 0.02799972891807556      
dev accuracy: 0.9375, loss: 0.1417424976825714       
dev accuracy: 0.9375, loss: 0.41811293

train step #6326 accuracy: 0.96875, loss: 0.09607339650392532      
train step #6327 accuracy: 0.984375, loss: 0.10323821008205414      
train step #6328 accuracy:   1.0, loss: 0.09502073377370834      
train step #6329 accuracy: 0.96875, loss: 0.16318300366401672      
train step #6330 accuracy: 0.921875, loss: 0.44760221242904663      
train step #6331 accuracy: 0.96875, loss: 0.1769443154335022       
train step #6332 accuracy: 0.96875, loss: 0.15373018383979797      
train step #6333 accuracy: 0.96875, loss: 0.16089241206645966      
train step #6334 accuracy: 0.984375, loss: 0.07982960343360901      
train step #6335 accuracy: 0.921875, loss: 0.1903121918439865       
train step #6336 accuracy: 0.921875, loss: 0.37193506956100464      
train step #6337 accuracy: 0.9375, loss: 0.1957014799118042       
train step #6338 accuracy:   1.0, loss: 0.040623944252729416     
train step #6339 accuracy: 0.96875, loss: 0.06767265498638153      
train step #6340 accuracy: 0.953125, loss: 0.224

train step #6447 accuracy: 0.984375, loss: 0.09388647973537445      
train step #6448 accuracy: 0.921875, loss: 0.29735398292541504      
train step #6449 accuracy: 0.984375, loss: 0.05346309766173363      
train step #6450 accuracy: 0.984375, loss: 0.09137376397848129      
train step #6451 accuracy: 0.96875, loss: 0.07880236208438873      
train step #6452 accuracy: 0.9375, loss: 0.16979797184467316      
train step #6453 accuracy: 0.96875, loss: 0.18083854019641876      
train step #6454 accuracy:   1.0, loss: 0.03757799044251442      
train step #6455 accuracy: 0.984375, loss: 0.09258288890123367      
train step #6456 accuracy: 0.96875, loss: 0.12940403819084167      
train step #6457 accuracy: 0.90625, loss: 0.2719402015209198       
train step #6458 accuracy: 0.96875, loss: 0.13919295370578766      
train step #6459 accuracy: 0.96875, loss: 0.10837845504283905      
train step #6460 accuracy: 0.953125, loss: 0.12520022690296173      
train step #6461 accuracy: 0.953125, loss: 0.

train step #6568 accuracy: 0.984375, loss: 0.09457144886255264      
train step #6569 accuracy: 0.953125, loss: 0.14373400807380676      
train step #6570 accuracy: 0.953125, loss: 0.17070282995700836      
train step #6571 accuracy: 0.953125, loss: 0.13005605340003967      
train step #6572 accuracy: 0.9375, loss: 0.29367583990097046      
train step #6573 accuracy: 0.921875, loss: 0.21920637786388397      
train step #6574 accuracy: 0.84375, loss: 0.40079373121261597      
train step #6575 accuracy: 0.9375, loss: 0.15143096446990967      
train step #6576 accuracy: 0.890625, loss: 0.3660019040107727       
train step #6577 accuracy: 0.953125, loss: 0.15351037681102753      
train step #6578 accuracy: 0.953125, loss: 0.15197773277759552      
train step #6579 accuracy: 0.96875, loss: 0.20164451003074646      
train step #6580 accuracy: 0.921875, loss: 0.2429141104221344       
train step #6581 accuracy: 0.921875, loss: 0.16050437092781067      
train step #6582 accuracy: 0.96875, loss

dev accuracy:   1.0, loss: 0.03763793408870697      
dev accuracy: 0.9375, loss: 0.24896849691867828      
dev accuracy:   1.0, loss: 0.02562454342842102      
dev accuracy: 0.9375, loss: 0.326870322227478        
dev accuracy: 0.9375, loss: 0.267590194940567        
dev accuracy:   1.0, loss: 0.0008026361465454102    
dev accuracy: 0.9375, loss: 0.16791054606437683      
dev accuracy:   1.0, loss: 0.011387228965759277     
dev accuracy: 0.9375, loss: 0.22585025429725647      
dev accuracy:   1.0, loss: 0.07914823293685913      
dev accuracy: 0.9375, loss: 0.1901056170463562       
dev accuracy:   1.0, loss: 0.06975582242012024      
dev accuracy: 0.9375, loss: 0.09505512565374374      
dev accuracy: 0.875, loss: 0.5274302959442139       
dev accuracy: 0.9375, loss: 0.31090304255485535      
dev accuracy: 0.9375, loss: 0.11438348889350891      
dev accuracy:   1.0, loss: 0.0019497871398925781    
dev accuracy:   1.0, loss: 0.04524838924407959      
dev accuracy:   1.0, loss: 0.00367432

train step #6657 accuracy: 0.921875, loss: 0.23973879218101501      
train step #6658 accuracy: 0.953125, loss: 0.13934370875358582      
train step #6659 accuracy:   1.0, loss: 0.04211851954460144      
train step #6660 accuracy: 0.953125, loss: 0.12026058882474899      
train step #6661 accuracy: 0.984375, loss: 0.08143698424100876      
train step #6662 accuracy: 0.953125, loss: 0.18824708461761475      
train step #6663 accuracy: 0.90625, loss: 0.25512897968292236      
train step #6664 accuracy: 0.984375, loss: 0.07231242954730988      
train step #6665 accuracy: 0.984375, loss: 0.06441941857337952      
train step #6666 accuracy: 0.96875, loss: 0.19332215189933777      
train step #6667 accuracy: 0.96875, loss: 0.08152563124895096      
train step #6668 accuracy: 0.984375, loss: 0.07841363549232483      
train step #6669 accuracy: 0.96875, loss: 0.18120722472667694      
train step #6670 accuracy: 0.96875, loss: 0.07806355506181717      
train step #6671 accuracy: 0.953125, loss:

train step #6777 accuracy: 0.890625, loss: 0.34427666664123535      
train step #6778 accuracy: 0.984375, loss: 0.04061714559793472      
train step #6779 accuracy:   1.0, loss: 0.0486442968249321       
train step #6780 accuracy: 0.96875, loss: 0.12449666857719421      
train step #6781 accuracy: 0.96875, loss: 0.06807629764080048      
train step #6782 accuracy: 0.953125, loss: 0.11819697916507721      
train step #6783 accuracy: 0.984375, loss: 0.0877562090754509       
train step #6784 accuracy: 0.984375, loss: 0.06018584221601486      
train step #6785 accuracy: 0.9375, loss: 0.183604434132576        
train step #6786 accuracy: 0.984375, loss: 0.0891999751329422       
train step #6787 accuracy: 0.984375, loss: 0.09598958492279053      
train step #6788 accuracy: 0.953125, loss: 0.21969416737556458      
train step #6789 accuracy:   1.0, loss: 0.04807471111416817      
train step #6790 accuracy: 0.96875, loss: 0.08471186459064484      
train step #6791 accuracy: 0.953125, loss: 0.

train step #6898 accuracy: 0.984375, loss: 0.08051954209804535      
train step #6899 accuracy: 0.953125, loss: 0.24383467435836792      
train step #6900 accuracy: 0.953125, loss: 0.17925770580768585      
train step #6901 accuracy:   1.0, loss: 0.02008882164955139      
train step #6902 accuracy: 0.984375, loss: 0.056461308151483536     
train step #6903 accuracy: 0.9375, loss: 0.1440097838640213       
train step #6904 accuracy: 0.984375, loss: 0.0877528041601181       
train step #6905 accuracy: 0.984375, loss: 0.13986489176750183      
train step #6906 accuracy: 0.96875, loss: 0.15457861125469208      
train step #6907 accuracy: 0.953125, loss: 0.12869565188884735      
train step #6908 accuracy: 0.921875, loss: 0.34781739115715027      
train step #6909 accuracy: 0.953125, loss: 0.16820783913135529      
train step #6910 accuracy: 0.984375, loss: 0.10524725168943405      
train step #6911 accuracy: 0.96875, loss: 0.1134318858385086       
train step #6912 accuracy: 0.921875, loss

dev accuracy:   1.0, loss: 0.010660916566848755     
dev accuracy: 0.9375, loss: 0.2809474766254425       
dev accuracy:   1.0, loss: 0.00963371992111206      
dev accuracy: 0.9375, loss: 0.2648515999317169       
dev accuracy: 0.9375, loss: 0.20173980295658112      
dev accuracy: 0.875, loss: 0.670669674873352        
dev accuracy: 0.875, loss: 0.20135363936424255      
dev accuracy: 0.9375, loss: 0.09085169434547424      
dev accuracy:   1.0, loss: 0.10309787094593048      
dev accuracy: 0.9375, loss: 0.22090312838554382      
dev accuracy: 0.9375, loss: 0.08434462547302246      
dev accuracy: 0.9375, loss: 0.07388970255851746      
dev accuracy:  0.75, loss: 0.5026309490203857       
dev accuracy:   1.0, loss: 0.0034048259258270264    
dev accuracy: 0.875, loss: 0.3307146728038788       
dev accuracy:   1.0, loss: 0.0054558515548706055    
dev accuracy:   1.0, loss: 0.051929861307144165     
dev accuracy:   1.0, loss: 0.02108672261238098      
dev accuracy:   1.0, loss: 0.0213801264

train step #6988 accuracy: 0.96875, loss: 0.15578816831111908      
train step #6989 accuracy: 0.96875, loss: 0.11319506168365479      
train step #6990 accuracy: 0.984375, loss: 0.041628748178482056     
train step #6991 accuracy: 0.9375, loss: 0.24947942793369293      
train step #6992 accuracy: 0.96875, loss: 0.17526468634605408      
train step #6993 accuracy: 0.984375, loss: 0.15023601055145264      
train step #6994 accuracy: 0.984375, loss: 0.09663040935993195      
train step #6995 accuracy: 0.921875, loss: 0.22652767598628998      
train step #6996 accuracy:   1.0, loss: 0.009741045534610748     
train step #6997 accuracy: 0.96875, loss: 0.1306990534067154       
train step #6998 accuracy: 0.96875, loss: 0.14134153723716736      
train step #6999 accuracy: 0.9375, loss: 0.24144954979419708      
train step #7000 accuracy: 0.9375, loss: 0.19062936305999756      
train step #7001 accuracy: 0.953125, loss: 0.168755903840065        
train step #7002 accuracy: 0.953125, loss: 0.255

train step #7109 accuracy: 0.96875, loss: 0.14882586896419525      
train step #7110 accuracy: 0.96875, loss: 0.11358008533716202      
train step #7111 accuracy: 0.96875, loss: 0.15081371366977692      
train step #7112 accuracy:   1.0, loss: 0.02622693032026291      
train step #7113 accuracy: 0.96875, loss: 0.13481901586055756      
train step #7114 accuracy: 0.9375, loss: 0.16383515298366547      
train step #7115 accuracy: 0.96875, loss: 0.09131362289190292      
train step #7116 accuracy: 0.984375, loss: 0.059363290667533875     
train step #7117 accuracy: 0.984375, loss: 0.059499964118003845     
train step #7118 accuracy: 0.9375, loss: 0.28998279571533203      
train step #7119 accuracy: 0.9375, loss: 0.24834956228733063      
train step #7120 accuracy: 0.984375, loss: 0.08453995734453201      
train step #7121 accuracy: 0.90625, loss: 0.33665284514427185      
train step #7122 accuracy: 0.96875, loss: 0.11928164213895798      
train step #7123 accuracy: 0.96875, loss: 0.058557

train step #7230 accuracy: 0.875, loss: 0.3470655679702759       
train step #7231 accuracy: 0.953125, loss: 0.17444546520709991      
train step #7232 accuracy: 0.953125, loss: 0.3073539435863495       
train step #7233 accuracy: 0.96875, loss: 0.10739441215991974      
train step #7234 accuracy: 0.984375, loss: 0.04014206677675247      
train step #7235 accuracy: 0.953125, loss: 0.18842217326164246      
train step #7236 accuracy: 0.921875, loss: 0.2678539752960205       
train step #7237 accuracy: 0.984375, loss: 0.10863620042800903      
train step #7238 accuracy: 0.953125, loss: 0.17638736963272095      
train step #7239 accuracy: 0.953125, loss: 0.21369525790214539      
train step #7240 accuracy: 0.96875, loss: 0.16247335076332092      
train step #7241 accuracy: 0.984375, loss: 0.07395559549331665      
train step #7242 accuracy: 0.96875, loss: 0.16233083605766296      
train step #7243 accuracy: 0.96875, loss: 0.09047819674015045      
train step #7244 accuracy: 0.953125, loss

dev accuracy: 0.8125, loss: 1.118895411491394        
dev accuracy: 0.9375, loss: 0.14863727986812592      
dev accuracy:   1.0, loss: 0.0013254284858703613    
dev accuracy: 0.9375, loss: 0.1383323222398758       
dev accuracy: 0.9375, loss: 0.3475629985332489       
dev accuracy: 0.9375, loss: 0.38446253538131714      
dev accuracy: 0.9375, loss: 0.22925594449043274      
dev accuracy: 0.9375, loss: 0.348439484834671        
dev accuracy: 0.875, loss: 0.15677809715270996      
dev accuracy:   1.0, loss: 0.031107664108276367     
dev accuracy:   1.0, loss: 0.003433316946029663     
dev accuracy: 0.9375, loss: 0.4097076654434204       
dev accuracy: 0.9375, loss: 0.16836468875408173      
dev accuracy:   1.0, loss: 0.007263898849487305     
dev accuracy: 0.8125, loss: 0.999207615852356        
dev accuracy:   1.0, loss: 0.007795363664627075     
dev accuracy:   1.0, loss: 0.04957154393196106      
dev accuracy: 0.9375, loss: 0.11924052238464355      
dev accuracy: 0.875, loss: 0.235451

train step #7320 accuracy: 0.953125, loss: 0.09906415641307831      
train step #7321 accuracy: 0.96875, loss: 0.07272082567214966      
train step #7322 accuracy: 0.96875, loss: 0.10061116516590118      
train step #7323 accuracy: 0.9375, loss: 0.087965227663517        
train step #7324 accuracy: 0.96875, loss: 0.08385388553142548      
train step #7325 accuracy: 0.953125, loss: 0.1367209255695343       
train step #7326 accuracy:   1.0, loss: 0.05427667871117592      
train step #7327 accuracy: 0.984375, loss: 0.10377312451601028      
train step #7328 accuracy: 0.953125, loss: 0.13600406050682068      
train step #7329 accuracy: 0.9375, loss: 0.24096517264842987      
train step #7330 accuracy: 0.96875, loss: 0.13042034208774567      
train step #7331 accuracy: 0.984375, loss: 0.06066380441188812      
train step #7332 accuracy: 0.96875, loss: 0.13336707651615143      
train step #7333 accuracy: 0.953125, loss: 0.16055689752101898      
train step #7334 accuracy: 0.984375, loss: 0.0

train step #7441 accuracy: 0.984375, loss: 0.118315190076828        
train step #7442 accuracy: 0.984375, loss: 0.12750576436519623      
train step #7443 accuracy: 0.984375, loss: 0.1349499225616455       
train step #7444 accuracy: 0.984375, loss: 0.06213555112481117      
train step #7445 accuracy:   1.0, loss: 0.012033767998218536     
train step #7446 accuracy: 0.96875, loss: 0.10377046465873718      
train step #7447 accuracy: 0.984375, loss: 0.10333125293254852      
train step #7448 accuracy: 0.953125, loss: 0.18838432431221008      
train step #7449 accuracy: 0.984375, loss: 0.03910970687866211      
train step #7450 accuracy: 0.9375, loss: 0.1541096568107605       
train step #7451 accuracy: 0.953125, loss: 0.15092241764068604      
train step #7452 accuracy: 0.96875, loss: 0.11488748341798782      
train step #7453 accuracy: 0.953125, loss: 0.1333899199962616       
train step #7454 accuracy: 0.953125, loss: 0.15747518837451935      
train step #7455 accuracy:   1.0, loss: 0

train step #7562 accuracy: 0.953125, loss: 0.20286232233047485      
train step #7563 accuracy: 0.984375, loss: 0.08957047015428543      
train step #7564 accuracy: 0.90625, loss: 0.26873651146888733      
train step #7565 accuracy:   1.0, loss: 0.03440907597541809      
train step #7566 accuracy: 0.953125, loss: 0.1284884512424469       
train step #7567 accuracy: 0.921875, loss: 0.25084418058395386      
train step #7568 accuracy: 0.96875, loss: 0.11861266195774078      
train step #7569 accuracy: 0.96875, loss: 0.1423175036907196       
train step #7570 accuracy: 0.9375, loss: 0.25692519545555115      
train step #7571 accuracy: 0.953125, loss: 0.20700973272323608      
train step #7572 accuracy: 0.9375, loss: 0.25084707140922546      
train step #7573 accuracy: 0.96875, loss: 0.12966673076152802      
train step #7574 accuracy: 0.984375, loss: 0.13907185196876526      
train step #7575 accuracy: 0.984375, loss: 0.048987582325935364     
train step #7576 accuracy: 0.921875, loss: 0.

dev accuracy:   1.0, loss: 0.018157511949539185     
dev accuracy:   1.0, loss: 0.02501162886619568      
dev accuracy: 0.875, loss: 0.2925023138523102       
dev accuracy: 0.875, loss: 0.4754074811935425       
dev accuracy:   1.0, loss: 0.046578675508499146     
dev accuracy:   1.0, loss: 0.018877923488616943     
dev accuracy: 0.9375, loss: 0.1974990963935852       
dev accuracy:   1.0, loss: 0.06078742444515228      
dev accuracy:   1.0, loss: 0.03196236491203308      
dev accuracy: 0.875, loss: 0.5305222868919373       
dev accuracy: 0.8125, loss: 0.8262996673583984       
dev accuracy: 0.9375, loss: 0.07797347009181976      
dev accuracy:   1.0, loss: 0.00978115200996399      
dev accuracy:   1.0, loss: 0.05959522724151611      
dev accuracy: 0.875, loss: 0.5216858386993408       
dev accuracy: 0.9375, loss: 0.15131089091300964      
dev accuracy:   1.0, loss: 0.032848238945007324     
dev accuracy: 0.875, loss: 0.4861498475074768       
dev accuracy: 0.9375, loss: 0.138565540313

train step #7651 accuracy:   1.0, loss: 0.027847066521644592     
train step #7652 accuracy:   1.0, loss: 0.14776800572872162      
train step #7653 accuracy: 0.984375, loss: 0.0584028959274292       
train step #7654 accuracy: 0.984375, loss: 0.08095162361860275      
train step #7655 accuracy: 0.96875, loss: 0.10842885076999664      
train step #7656 accuracy: 0.953125, loss: 0.08696398884057999      
train step #7657 accuracy: 0.9375, loss: 0.20786729454994202      
train step #7658 accuracy: 0.953125, loss: 0.18876025080680847      
train step #7659 accuracy: 0.984375, loss: 0.08696503192186356      
train step #7660 accuracy: 0.96875, loss: 0.09886463731527328      
train step #7661 accuracy: 0.984375, loss: 0.06645660102367401      
train step #7662 accuracy: 0.9375, loss: 0.1857527792453766       
train step #7663 accuracy: 0.921875, loss: 0.2924327850341797       
train step #7664 accuracy: 0.9375, loss: 0.18922697007656097      
train step #7665 accuracy: 0.9375, loss: 0.29795

train step #7772 accuracy: 0.953125, loss: 0.21808725595474243      
train step #7773 accuracy: 0.96875, loss: 0.089014433324337        
train step #7774 accuracy: 0.953125, loss: 0.27078181505203247      
train step #7775 accuracy: 0.96875, loss: 0.12492413073778152      
train step #7776 accuracy: 0.96875, loss: 0.11242654919624329      
train step #7777 accuracy: 0.984375, loss: 0.1106577217578888       
train step #7778 accuracy: 0.9375, loss: 0.14621001482009888      
train step #7779 accuracy: 0.984375, loss: 0.04855787381529808      
train step #7780 accuracy: 0.953125, loss: 0.12336300313472748      
train step #7781 accuracy:   1.0, loss: 0.01705615222454071      
train step #7782 accuracy: 0.96875, loss: 0.10025908052921295      
train step #7783 accuracy: 0.984375, loss: 0.10923793166875839      
train step #7784 accuracy: 0.953125, loss: 0.19565103948116302      
train step #7785 accuracy: 0.953125, loss: 0.19301171600818634      
train step #7786 accuracy: 0.96875, loss: 0

train step #7892 accuracy: 0.9375, loss: 0.23160070180892944      
train step #7893 accuracy: 0.96875, loss: 0.08955350518226624      
train step #7894 accuracy: 0.96875, loss: 0.1280711591243744       
train step #7895 accuracy: 0.9375, loss: 0.2210352122783661       
train step #7896 accuracy: 0.96875, loss: 0.13734132051467896      
train step #7897 accuracy: 0.953125, loss: 0.15004557371139526      
train step #7898 accuracy: 0.921875, loss: 0.19570116698741913      
train step #7899 accuracy: 0.9375, loss: 0.2642933130264282       
train step #7900 accuracy: 0.953125, loss: 0.130264014005661        
train step #7901 accuracy:   1.0, loss: 0.029833845794200897     
train step #7902 accuracy: 0.9375, loss: 0.20848841965198517      
train step #7903 accuracy: 0.9375, loss: 0.1483832448720932       
train step #7904 accuracy:   1.0, loss: 0.023183681070804596     
train step #7905 accuracy: 0.921875, loss: 0.3146612048149109       
train step #7906 accuracy: 0.9375, loss: 0.1675273627

dev accuracy:   1.0, loss: 0.0012089014053344727    
dev accuracy:   1.0, loss: 0.0015210211277008057    
dev accuracy: 0.9375, loss: 0.5334696173667908       
dev accuracy: 0.9375, loss: 0.14594441652297974      
dev accuracy:   1.0, loss: 0.0460965633392334       
dev accuracy:   1.0, loss: 0.02224394679069519      
dev accuracy: 0.9375, loss: 0.22635339200496674      
dev accuracy:   1.0, loss: 0.02660125494003296      
dev accuracy: 0.9375, loss: 0.219464510679245        
dev accuracy:   1.0, loss: 0.009130656719207764     
dev accuracy:   1.0, loss: 0.019348442554473877     
dev accuracy: 0.9375, loss: 0.1951681673526764       
dev accuracy: 0.875, loss: 0.3421807885169983       
dev accuracy: 0.875, loss: 0.24166706204414368      
dev accuracy: 0.9375, loss: 0.10772073268890381      
dev accuracy: 0.9375, loss: 0.26019129157066345      
dev accuracy:   1.0, loss: 0.05896899104118347      
dev accuracy:   1.0, loss: 0.04373243451118469      
dev accuracy: 0.9375, loss: 0.190083160

train step #7982 accuracy: 0.96875, loss: 0.13771182298660278      
train step #7983 accuracy: 0.96875, loss: 0.11703081429004669      
train step #7984 accuracy:   1.0, loss: 0.03965518996119499      
train step #7985 accuracy: 0.96875, loss: 0.11498327553272247      
train step #7986 accuracy: 0.96875, loss: 0.1321573704481125       
train step #7987 accuracy: 0.984375, loss: 0.10871998965740204      
train step #7988 accuracy:   1.0, loss: 0.042925313115119934     
train step #7989 accuracy: 0.984375, loss: 0.02679295837879181      
train step #7990 accuracy: 0.9375, loss: 0.24435342848300934      
train step #7991 accuracy: 0.96875, loss: 0.0931449607014656       
train step #7992 accuracy: 0.953125, loss: 0.1819518655538559       
train step #7993 accuracy: 0.96875, loss: 0.05994509533047676      
train step #7994 accuracy: 0.953125, loss: 0.17062737047672272      
train step #7995 accuracy: 0.984375, loss: 0.05818503722548485      
train step #7996 accuracy: 0.9375, loss: 0.27411

train step #8103 accuracy: 0.953125, loss: 0.14696243405342102      
train step #8104 accuracy: 0.96875, loss: 0.18543536961078644      
train step #8105 accuracy: 0.96875, loss: 0.1500849723815918       
train step #8106 accuracy: 0.984375, loss: 0.06543934345245361      
train step #8107 accuracy: 0.984375, loss: 0.03635247051715851      
train step #8108 accuracy: 0.953125, loss: 0.19467400014400482      
train step #8109 accuracy: 0.96875, loss: 0.10016871243715286      
train step #8110 accuracy: 0.953125, loss: 0.1907535046339035       
train step #8111 accuracy: 0.96875, loss: 0.12274104356765747      
train step #8112 accuracy: 0.953125, loss: 0.16700352728366852      
train step #8113 accuracy: 0.9375, loss: 0.16374291479587555      
train step #8114 accuracy: 0.953125, loss: 0.14096172153949738      
train step #8115 accuracy: 0.90625, loss: 0.33526912331581116      
train step #8116 accuracy: 0.984375, loss: 0.08096099644899368      
train step #8117 accuracy: 0.9375, loss: 

train step #8223 accuracy: 0.953125, loss: 0.16629527509212494      
train step #8224 accuracy: 0.96875, loss: 0.08732807636260986      
train step #8225 accuracy: 0.96875, loss: 0.16391180455684662      
train step #8226 accuracy: 0.96875, loss: 0.12449948489665985      
train step #8227 accuracy: 0.9375, loss: 0.13805033266544342      
train step #8228 accuracy: 0.984375, loss: 0.047969721257686615     
train step #8229 accuracy:   1.0, loss: 0.021245427429676056     
train step #8230 accuracy: 0.984375, loss: 0.0638519674539566       
train step #8231 accuracy: 0.984375, loss: 0.09534260630607605      
train step #8232 accuracy: 0.96875, loss: 0.2057095468044281       
train step #8233 accuracy: 0.953125, loss: 0.10490294545888901      
train step #8234 accuracy: 0.953125, loss: 0.13076843321323395      
train step #8235 accuracy: 0.984375, loss: 0.08338786661624908      
train step #8236 accuracy: 0.9375, loss: 0.21077264845371246      
train step #8237 accuracy: 0.953125, loss: 0.

dev accuracy:   1.0, loss: 0.010563135147094727     
dev accuracy: 0.875, loss: 0.6257886290550232       
dev accuracy:   1.0, loss: 0.00821039080619812      
dev accuracy:   1.0, loss: 0.010069310665130615     
dev accuracy: 0.9375, loss: 0.08152282238006592      
dev accuracy: 0.875, loss: 0.6887394189834595       
dev accuracy: 0.9375, loss: 0.417586088180542        
dev accuracy: 0.875, loss: 0.699939489364624        
dev accuracy: 0.9375, loss: 0.11044500768184662      
dev accuracy: 0.8125, loss: 1.2307108640670776       
dev accuracy:   1.0, loss: 0.0005266666412353516    
dev accuracy: 0.9375, loss: 0.3005008399486542       
dev accuracy: 0.8125, loss: 0.631926417350769        
dev accuracy:   1.0, loss: 0.005716979503631592     
dev accuracy:   1.0, loss: 0.01796334981918335      
dev accuracy: 0.9375, loss: 0.1511872261762619       
dev accuracy:   1.0, loss: 0.0010496973991394043    
dev accuracy: 0.9375, loss: 0.43177980184555054      
dev accuracy: 0.875, loss: 0.481374949

dev accuracy: 0.9375, loss: 0.11437249183654785      
dev accuracy: 0.9375, loss: 0.09433847665786743      
dev accuracy:   1.0, loss: 0.011026203632354736     
dev accuracy:   1.0, loss: 0.004000604152679443     
dev accuracy: 0.9375, loss: 0.06363540887832642      
dev accuracy:   1.0, loss: 0.02313646674156189      
dev accuracy: 0.9375, loss: 0.4097476899623871       
dev accuracy:   1.0, loss: 0.04983608424663544      
dev accuracy:   1.0, loss: 0.024756699800491333     
dev accuracy:   1.0, loss: 0.013160526752471924     
dev accuracy: 0.9375, loss: 0.17489305138587952      
dev accuracy: 0.9375, loss: 0.1827937364578247       
dev accuracy:   1.0, loss: 0.06503963470458984      
dev accuracy: 0.9375, loss: 0.44327348470687866      
dev accuracy: 0.9375, loss: 0.14657491445541382      
dev accuracy: 0.9375, loss: 0.14188598096370697      
dev accuracy:   1.0, loss: 0.007849186658859253     
dev accuracy:   1.0, loss: 0.005594313144683838     
dev accuracy: 0.9375, loss: 0.0770124

train step #8432 accuracy: 0.984375, loss: 0.1119348406791687       
train step #8433 accuracy: 0.90625, loss: 0.2840613126754761       
train step #8434 accuracy: 0.953125, loss: 0.18258437514305115      
train step #8435 accuracy: 0.96875, loss: 0.11276838183403015      
train step #8436 accuracy: 0.953125, loss: 0.19536356627941132      
train step #8437 accuracy:   1.0, loss: 0.030932500958442688     
train step #8438 accuracy: 0.921875, loss: 0.21287454664707184      
train step #8439 accuracy: 0.953125, loss: 0.168364018201828        
train step #8440 accuracy: 0.984375, loss: 0.10093899816274643      
train step #8441 accuracy: 0.96875, loss: 0.09760934114456177      
train step #8442 accuracy: 0.984375, loss: 0.04357560724020004      
train step #8443 accuracy: 0.953125, loss: 0.22331446409225464      
train step #8444 accuracy: 0.984375, loss: 0.08798878639936447      
train step #8445 accuracy: 0.9375, loss: 0.21463005244731903      
train step #8446 accuracy: 0.953125, loss:

train step #8553 accuracy: 0.96875, loss: 0.11093755811452866      
train step #8554 accuracy: 0.96875, loss: 0.07808391749858856      
train step #8555 accuracy: 0.9375, loss: 0.17318414151668549      
train step #8556 accuracy: 0.984375, loss: 0.06685248017311096      
train step #8557 accuracy: 0.921875, loss: 0.20728248357772827      
train step #8558 accuracy: 0.984375, loss: 0.054580941796302795     
train step #8559 accuracy: 0.953125, loss: 0.18273170292377472      
train step #8560 accuracy: 0.890625, loss: 0.40816837549209595      
train step #8561 accuracy: 0.984375, loss: 0.04778516665101051      
train step #8562 accuracy: 0.96875, loss: 0.0532558336853981       
train step #8563 accuracy: 0.953125, loss: 0.15538997948169708      
train step #8564 accuracy: 0.953125, loss: 0.32705527544021606      
train step #8565 accuracy: 0.96875, loss: 0.14133508503437042      
train step #8566 accuracy: 0.9375, loss: 0.19021075963974         
train step #8567 accuracy: 0.96875, loss: 

train step #8674 accuracy: 0.953125, loss: 0.18213427066802979      
train step #8675 accuracy: 0.9375, loss: 0.20949257910251617      
dev accuracy:   1.0, loss: 0.0013043880462646484    
dev accuracy: 0.9375, loss: 0.40115198493003845      
dev accuracy: 0.875, loss: 0.48600444197654724      
dev accuracy:   1.0, loss: 0.09324911236763         
dev accuracy: 0.9375, loss: 0.08630090951919556      
dev accuracy: 0.875, loss: 0.22274379432201385      
dev accuracy: 0.9375, loss: 0.13760776817798615      
dev accuracy: 0.9375, loss: 0.12736821174621582      
dev accuracy:   1.0, loss: 0.005049020051956177     
dev accuracy: 0.9375, loss: 0.10593780875205994      
dev accuracy:   1.0, loss: 0.004544854164123535     
dev accuracy: 0.9375, loss: 0.13703159987926483      
dev accuracy: 0.875, loss: 0.5978517532348633       
dev accuracy: 0.875, loss: 0.20229509472846985      
dev accuracy: 0.9375, loss: 0.09938670694828033      
dev accuracy: 0.875, loss: 0.18708056211471558      
dev accur

dev accuracy: 0.9375, loss: 0.22124391794204712      
dev accuracy: 0.875, loss: 0.28206878900527954      
dev accuracy:   1.0, loss: 0.14533492922782898      
dev accuracy:   1.0, loss: 0.03575374186038971      
dev accuracy: 0.9375, loss: 0.3385275602340698       
dev accuracy:   1.0, loss: 0.05175754427909851      
dev accuracy: 0.9375, loss: 0.0763901025056839       
dev accuracy:   1.0, loss: 0.017124563455581665     
dev accuracy: 0.9375, loss: 0.09128686785697937      
dev accuracy: 0.875, loss: 0.7584766149520874       
dev accuracy: 0.9375, loss: 0.09876704216003418      
dev accuracy:   1.0, loss: 0.029720723628997803     
dev accuracy: 0.9375, loss: 0.41954824328422546      
dev accuracy:   1.0, loss: 0.0001965165138244629    
dev accuracy: 0.875, loss: 0.18113529682159424      
dev accuracy:   1.0, loss: 0.0857158750295639       
dev accuracy: 0.9375, loss: 0.2521195709705353       
dev accuracy: 0.9375, loss: 0.06619849801063538      
dev accuracy: 0.9375, loss: 0.14213339

train step #8763 accuracy: 0.9375, loss: 0.2011992186307907       
train step #8764 accuracy: 0.96875, loss: 0.11307436972856522      
train step #8765 accuracy: 0.984375, loss: 0.06270374357700348      
train step #8766 accuracy: 0.984375, loss: 0.10123256593942642      
train step #8767 accuracy: 0.984375, loss: 0.0925520807504654       
train step #8768 accuracy: 0.953125, loss: 0.24216672778129578      
train step #8769 accuracy: 0.953125, loss: 0.16076302528381348      
train step #8770 accuracy: 0.921875, loss: 0.2862793803215027       
train step #8771 accuracy: 0.96875, loss: 0.13643169403076172      
train step #8772 accuracy: 0.96875, loss: 0.20923790335655212      
train step #8773 accuracy: 0.984375, loss: 0.1192551851272583       
train step #8774 accuracy: 0.984375, loss: 0.04288510978221893      
train step #8775 accuracy:   1.0, loss: 0.06836123764514923      
train step #8776 accuracy: 0.984375, loss: 0.07463914901018143      
train step #8777 accuracy:   1.0, loss: 0.

train step #8884 accuracy: 0.96875, loss: 0.0991571694612503       
train step #8885 accuracy: 0.96875, loss: 0.1680261790752411       
train step #8886 accuracy: 0.96875, loss: 0.1706988662481308       
train step #8887 accuracy: 0.953125, loss: 0.13455641269683838      
train step #8888 accuracy: 0.96875, loss: 0.11194086074829102      
train step #8889 accuracy:   1.0, loss: 0.029042057693004608     
train step #8890 accuracy: 0.984375, loss: 0.08233031630516052      
train step #8891 accuracy: 0.953125, loss: 0.23741625249385834      
train step #8892 accuracy: 0.984375, loss: 0.06875140964984894      
train step #8893 accuracy: 0.96875, loss: 0.06994032114744186      
train step #8894 accuracy: 0.96875, loss: 0.10164930671453476      
train step #8895 accuracy: 0.921875, loss: 0.22693946957588196      
train step #8896 accuracy: 0.953125, loss: 0.2036258578300476       
train step #8897 accuracy: 0.96875, loss: 0.11145221441984177      
train step #8898 accuracy:   1.0, loss: 0.02

train step #9005 accuracy: 0.953125, loss: 0.22378776967525482      
train step #9006 accuracy: 0.953125, loss: 0.1931251883506775       
train step #9007 accuracy: 0.96875, loss: 0.19103071093559265      
train step #9008 accuracy: 0.984375, loss: 0.03447716683149338      
train step #9009 accuracy: 0.96875, loss: 0.1613484025001526       
train step #9010 accuracy: 0.96875, loss: 0.11439330875873566      
train step #9011 accuracy: 0.96875, loss: 0.10079407691955566      
train step #9012 accuracy: 0.96875, loss: 0.08840620517730713      
train step #9013 accuracy: 0.953125, loss: 0.12818461656570435      
train step #9014 accuracy: 0.96875, loss: 0.05477406829595566      
train step #9015 accuracy: 0.96875, loss: 0.12988345324993134      
train step #9016 accuracy:   1.0, loss: 0.020442456007003784     
train step #9017 accuracy: 0.953125, loss: 0.11765875667333603      
train step #9018 accuracy: 0.96875, loss: 0.15828309953212738      
train step #9019 accuracy: 0.96875, loss: 0.1

dev accuracy: 0.9375, loss: 0.23195970058441162      
dev accuracy: 0.9375, loss: 0.34474772214889526      
dev accuracy:   1.0, loss: 0.04554641246795654      
dev accuracy: 0.875, loss: 0.3456769287586212       
dev accuracy:   1.0, loss: 0.004736065864562988     
dev accuracy:   1.0, loss: 0.024448364973068237     
dev accuracy: 0.9375, loss: 0.4244897663593292       
dev accuracy: 0.9375, loss: 0.42405635118484497      
dev accuracy: 0.875, loss: 0.19665458798408508      
dev accuracy:   1.0, loss: 0.1442660242319107       
dev accuracy: 0.9375, loss: 0.2419506013393402       
dev accuracy: 0.9375, loss: 0.18000692129135132      
dev accuracy: 0.8125, loss: 0.46109670400619507      
dev accuracy: 0.9375, loss: 0.10697042942047119      
dev accuracy:   1.0, loss: 0.01246267557144165      
dev accuracy:   1.0, loss: 0.024306774139404297     
dev accuracy: 0.9375, loss: 0.10578089952468872      
dev accuracy: 0.875, loss: 0.335683673620224        
dev accuracy: 0.9375, loss: 0.2108314

test accuracy:   1.0, loss: 0.013709187507629395     
test accuracy:   1.0, loss: 0.0327606201171875       
test accuracy: 0.9375, loss: 0.20119708776474         
test accuracy: 0.875, loss: 0.7215802669525146       
test accuracy:   1.0, loss: 0.0037711262702941895    
test accuracy:   1.0, loss: 0.017562448978424072     
test accuracy: 0.9375, loss: 0.20317262411117554      
test accuracy:   1.0, loss: 0.0036230087280273438    
test accuracy:   1.0, loss: 0.006030678749084473     
test accuracy:   1.0, loss: 0.009665250778198242     
test accuracy:   1.0, loss: 0.026600241661071777     
test accuracy: 0.9375, loss: 0.12577015161514282      
test accuracy:   1.0, loss: 0.003833949565887451     
test accuracy:   1.0, loss: 0.039680227637290955     
test accuracy:   1.0, loss: 0.011337071657180786     
test accuracy:   1.0, loss: 0.09160539507865906      
test accuracy:   1.0, loss: 0.024337172508239746     
test accuracy:   1.0, loss: 0.09897445142269135      
test accuracy: 0.9375, lo

In [53]:
for model_in, labels in test_loader:
    model_in = Variable(model_in, requires_grad=False).cuda()
    labels = labels.cuda()
    scores = model(model_in)
    break

In [84]:
print(labels[0])
print(int(scores[0].max(0)[1]))

3
3


---

### Make submission

In [48]:
label2word = {v: k.strip("_") for k, v in words.items()}

In [13]:
model.load(config["output_file"])

In [32]:
eval_folder = "./../raw_data/test/audio/"
eval_wavs = {
    os.path.join(eval_folder, file_path): os.path.join(eval_folder, file_path) # TODO: this is hack, be careful
        for file_path in os.listdir("./../raw_data/test/audio/")
            if file_path.endswith(".wav")
}

In [33]:
len(eval_wavs)

158538

In [34]:
eval_set = SpeechDataset(eval_wavs, DatasetType.TEST, test_cfg)
eval_loader = data.DataLoader(eval_set, batch_size=16)

In [50]:
with open("../submission.txt", "w") as sub:
    sub.write("fname,label\n")
    for model_in, wav_paths in eval_loader:
        scores = model(Variable(model_in, requires_grad=False).cuda())
        for i, scorei in enumerate(scores):
            sub.write(
                wav_paths[i].rsplit("/")[-1] + "," + label2word[int(scorei.max(0)[1])] + "\n"
            )

AttributeError: 'int' object has no attribute 'rsplit'

In [47]:
"no".strip("_")

'no'

In [36]:
print(label2word[int(scores[4].max(0)[1])])

no


In [37]:
play_wav(wav_paths[4])