In [1]:
from pathlib import Path
from torch.utils.data import DataLoader
import torch
import torch.optim as optim
import pandas as pd
import json
import tqdm
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import Dataset, DatasetDict
import evaluate
import os
from pprint import pprint

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
NUM_EPOCHS = 50
EXPERIMENT_NAME = "t5-small_falcon2-1eg0r"
EXPERIMENT_DIR = Path('experiments')
MODEL_ARTIFACTS = EXPERIMENT_DIR / EXPERIMENT_NAME
WEIGHTS_DIR = MODEL_ARTIFACTS / 'weights'
VALS_DIR = MODEL_ARTIFACTS / 'validations'
LINKS_PATH = 'falcon_links/1ents-gold_0rels/link_28246.json'

Make appropriate directoreis

In [2]:
WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
VALS_DIR.mkdir(parents=True, exist_ok=True)

Defining the model and tokenizer

In [3]:
model_path = "t5-small"
tokenizer_path = "t5-small"

model = T5ForConditionalGeneration.from_pretrained(model_path, device_map ='auto')
tokenizer = T5Tokenizer.from_pretrained(tokenizer_path)

In [4]:
from pprint import pprint
pprint(model.hf_device_map)

{'': 0}


Define dataset maker

In [5]:
def split_dataframe(df):
  # ratios from Bannerjee
  train = 0.7
  dev = 0.1
  test = 0.2
  assert train + dev + test == 1.0
  data_len = len(df)
  train_set = Dataset.from_pandas(df[:round(data_len * train)])
  dev_set = Dataset.from_pandas(df[round(data_len * train):round(data_len* (train + dev))])
  test_set = Dataset.from_pandas(df[round(data_len * (train + dev)):])
  
  dataset = DatasetDict()
  dataset['train'] = train_set
  dataset['dev'] = dev_set
  dataset['test'] = test_set

  return dataset

Define dataset tokenizer

In [6]:
def tokenize_data(dataset, column):
  model_inputs = tokenizer(dataset[column], padding=True, truncation=True, return_tensors="pt")
  return model_inputs

Define unmasker

In [7]:
from pipeline import T5Converter
converter = T5Converter()

Defining the validation function

In [8]:
def val(val_dataloader, val_path = None):
  model.eval()
  eval_dict = []

  iters = len(val_dataloader)

  # progress_bar = tqdm.tqdm(iters, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}")
  # progress_bar.set_description(f"Eval")

  correct_preds = 0
  total_preds = 0

  for val_batch in val_dataloader:
    batch = {}
    for k,v in val_batch.items():
      if k in {"input_ids", "labels", "attention_mask"}:
        batch[k] = v.to("cuda")

    with torch.no_grad():
      outputs = model(**batch)
    
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    for i, pred in enumerate(tokenizer.batch_decode(predictions)):
      gold = val_batch['gold'][i]
      gold = gold.strip().replace(" ","")
      gold2 = gold.replace(">", "> ").replace("<"," <").replace("  ", " ").strip()
      pred = pred.replace(" ","").replace("</s>", "").replace("<pad>","").replace('<unk>','').replace('<s>','').strip().replace(" ","")
      pred2 = pred.replace(">", "> ").replace("<"," <").replace("  ", " ").strip()
      entry_dict = {
        "Utte": val_batch['utterance'][i],
        "Anno": val_batch['annotated'][i],
        "Gold": val_batch['gold'][i],
        "Gene": pred, # THIS NEEDS TO BE UNMASKED
        "Gol2": converter._unmask_generic(gold2),
        "Gen2": converter._unmask_generic(pred2),
      }
      eval_dict.append(entry_dict)
      total_preds += 1
      if entry_dict['Gol2'] == entry_dict['Gen2']:
        correct_preds += 1
    # progress_bar.update(1)
  
  if val_path:
    with open(val_path, "w") as f:
      json.dump(eval_dict, f, indent=2)

  accuracy = correct_preds/total_preds

  meta = {
    'accuracy': f"{accuracy:.5f}"
  }
  
  model.train()
  return eval_dict, meta

In [9]:
def training_loop(df):
  print("beginning training")

  assert 'utterance' in df.columns
  assert 'annotated' in df.columns
  assert 'gold' in df.columns

  dataset = split_dataframe(df)
  tokenized_dataset = dataset \
    .map(lambda x: tokenize_data(x, 'gold'), batched=True) \
    .rename_column('input_ids', 'labels') \
    .map(lambda x: tokenize_data(x, 'annotated'), batched=True)

  tokenized_dataset.set_format("pt", columns=["input_ids", "attention_mask", "labels"], output_all_columns=True)
  print("data loaded")
  
  train_dataset = tokenized_dataset["train"]
  dev_dataset = tokenized_dataset["dev"]
  test_dataset = tokenized_dataset["test"]

  train_dataloader = DataLoader(train_dataset, batch_size = 10)
  dev_dataloader = DataLoader(dev_dataset, batch_size = 10)

  scalar = 0

  optimizer = optim.AdamW(model.parameters(), lr = 0.0015)
  lr_scheduler=transformers. \
    get_polynomial_decay_schedule_with_warmup(optimizer, 5000, 30000, power=0.5)
  
  epoch_data = {}

  for epoch in range(NUM_EPOCHS):
    print("\nBeginning Epoch:", epoch)
    i = 0
    iters = len(train_dataloader)
    for batch in train_dataloader:
      newbatch = {}
      for k,v in batch.items():
        if k in ["labels", "input_ids", "attention_mask"]:
          newbatch[k] = v.to("cuda")
      
      batch = newbatch
      newbatch = {}

      outputs = model(**batch)
      loss = outputs.loss
      scalar += loss.mean().item()

      if (i+1) % 100 == 0:
        print(f'iteration = {i+1}/{iters}, training loss={scalar/100}')
        scalar = 0

      loss /= 10 
      loss.mean().backward()
      if (i+1) % 1 == 0:
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
      
      del batch
      i += 1
    
    print(f"Validating epoch {epoch}")
    val_filename = f"val_{epoch}.json"
    _, meta = val(dev_dataloader, VALS_DIR / val_filename)
    pprint(meta)
    assert val_filename not in epoch_data
    epoch_data[val_filename] = meta

    with open(MODEL_ARTIFACTS / "meta_data.json", "w") as f:
      json.dump(epoch_data, f, indent=2)

    torch.save(model.state_dict(),
      WEIGHTS_DIR / f"cp_{epoch}.pth")
  print(f"\nValidating final")
  val_filename = f"val_final.json"

  _, meta = val(dev_dataloader, VALS_DIR / val_filename)

  epoch_data[val_filename] = meta

  pprint(meta)
  with open(MODEL_ARTIFACTS / "meta_data.json", "w") as f:
    json.dump(epoch_data, f, indent=2)

  torch.save(model.state_dict(),
    WEIGHTS_DIR / f"cp_final.pth")

Main

In [10]:
df_json = []
with open(LINKS_PATH) as f:
  data_json = json.load(f)

print(data_json[0])


[{'utterance': 'What periodical literature does Delta Air Lines use as a moutpiece?', 'ents': [{'uri': 'http://www.wikidata.org/entity/Q1002697', 'prefix': 'wd:', 'id': 'Q1002697'}, {'uri': 'http://www.wikidata.org/entity/Q188920', 'prefix': 'wd:', 'id': 'Q188920'}, {'uri': 'http://www.wikidata.org/entity/Q416938', 'prefix': 'wd:', 'id': 'Q416938'}], 'rels': []}, {'utterance': 'What periodical literature does Delta Air Lines use as a moutpiece?', 'fragments': ['[DEF]', 'wd:', 'Q1002697 periodical literature', '[DEF]', 'wd:', 'Q188920 Delta', '[DEF]', 'wd:', 'Q416938 Mouthpiece']}, {'inputs': 'What periodical literature does Delta Air Lines use as a moutpiece? <extra_id_59> <extra_id_53> Q1002697 periodical literature <extra_id_59> <extra_id_53> Q188920 Delta <extra_id_59> <extra_id_53> Q416938 Mouthpiece', 'labels': '<extra_id_6> <extra_id_21> <extra_id_39> <extra_id_19> <extra_id_33> <extra_id_53> q188920 <extra_id_54> p2813 <extra_id_39> <extra_id_38> <extra_id_39> <extra_id_54> p31 

In [11]:
# df_json = []
# with open('weekend.json') as f:
#   data_json = json.load(f)

for data in data_json:
  data_dict = {
    "utterance": data[0]["utterance"],
    "annotated": data[2]["inputs"],
    "gold": data[2]["labels"]
  }
  df_json.append(data_dict)

In [12]:
df = pd.DataFrame.from_dict(df_json)
df.head()

Unnamed: 0,utterance,annotated,gold
0,What periodical literature does Delta Air Line...,What periodical literature does Delta Air Line...,<extra_id_6> <extra_id_21> <extra_id_39> <extr...
1,Who is the child of Ranavalona Is husband?,Who is the child of Ranavalona Is husband? <ex...,<extra_id_6> <extra_id_39> <extra_id_19> <extr...
2,Is it true Jeff_Bridges occupation Lane Chandl...,Is it true Jeff_Bridges occupation Lane Chandl...,<extra_id_4> <extra_id_19> <extra_id_33> <extr...
3,What is the pre-requisite of phase matter of G...,What is the pre-requisite of phase matter of G...,<extra_id_6> <extra_id_39> <extra_id_19> <extr...
4,Which is the operating income for Qantas?,Which is the operating income for Qantas? <ext...,<extra_id_6> <extra_id_21> <extra_id_39> <extr...


In [13]:
training_loop(df)

beginning training


Map:   0%|          | 0/19771 [00:00<?, ? examples/s]

Map:   0%|          | 0/2824 [00:00<?, ? examples/s]

Map:   0%|          | 0/5649 [00:00<?, ? examples/s]

Map:   0%|          | 0/19771 [00:00<?, ? examples/s]

Map:   0%|          | 0/2824 [00:00<?, ? examples/s]

Map:   0%|          | 0/5649 [00:00<?, ? examples/s]

data loaded

Beginning Epoch: 0


iteration = 100/1978, training loss=11.375450992584229


iteration = 200/1978, training loss=4.416922595500946


iteration = 300/1978, training loss=2.117065905332565


iteration = 400/1978, training loss=1.2882678866386414


iteration = 500/1978, training loss=1.086832088828087


iteration = 600/1978, training loss=0.9539824879169464


iteration = 700/1978, training loss=0.8412085312604904


iteration = 800/1978, training loss=0.7387849116325378


iteration = 900/1978, training loss=0.650204561650753


iteration = 1000/1978, training loss=0.5742827352881431


iteration = 1100/1978, training loss=0.5264252337813378


iteration = 1200/1978, training loss=0.47143656194210054


iteration = 1300/1978, training loss=0.4158672150969505


iteration = 1400/1978, training loss=0.4059744003415108


iteration = 1500/1978, training loss=0.34937031492590903


iteration = 1600/1978, training loss=0.32352416336536405


iteration = 1700/1978, training loss=0.29328452825546264


iteration = 1800/1978, training loss=0.2714674296975136


iteration = 1900/1978, training loss=0.11165088333189488


Validating epoch 0


{'accuracy': '0.03435'}



Beginning Epoch: 1


iteration = 100/1978, training loss=1.2034206682443618


iteration = 200/1978, training loss=0.5885793939232826


iteration = 300/1978, training loss=0.4718750211596489


iteration = 400/1978, training loss=0.41088558912277223


iteration = 500/1978, training loss=0.3830428312718868


iteration = 600/1978, training loss=0.3645666262507439


iteration = 700/1978, training loss=0.318937134295702


iteration = 800/1978, training loss=0.304052252471447


iteration = 900/1978, training loss=0.28293692886829375


iteration = 1000/1978, training loss=0.26801058545708656


iteration = 1100/1978, training loss=0.2663019306957722


iteration = 1200/1978, training loss=0.25138097137212756


iteration = 1300/1978, training loss=0.23804212018847465


iteration = 1400/1978, training loss=0.23128192514181137


iteration = 1500/1978, training loss=0.21825378492474556


iteration = 1600/1978, training loss=0.22518265031278134


iteration = 1700/1978, training loss=0.2062483935058117


iteration = 1800/1978, training loss=0.19627703130245208


iteration = 1900/1978, training loss=0.07982515215873719


Validating epoch 1


{'accuracy': '0.13314'}



Beginning Epoch: 2


iteration = 100/1978, training loss=0.33429792791604995


iteration = 200/1978, training loss=0.19692666262388228


iteration = 300/1978, training loss=0.20396114453673364


iteration = 400/1978, training loss=0.17678092174232007


iteration = 500/1978, training loss=0.18043093532323837


iteration = 600/1978, training loss=0.183697105422616


iteration = 700/1978, training loss=0.16797387659549712


iteration = 800/1978, training loss=0.1687184602767229


iteration = 900/1978, training loss=0.16406406991183758


iteration = 1000/1978, training loss=0.15980811409652232


iteration = 1100/1978, training loss=0.15747010380029677


iteration = 1200/1978, training loss=0.15576050899922847


iteration = 1300/1978, training loss=0.14361597456037997


iteration = 1400/1978, training loss=0.16328697197139264


iteration = 1500/1978, training loss=0.16408470407128334


iteration = 1600/1978, training loss=0.1601589271426201


iteration = 1700/1978, training loss=0.14294610001146793


iteration = 1800/1978, training loss=0.14128215372562408


iteration = 1900/1978, training loss=0.05799531182274222


Validating epoch 2


{'accuracy': '0.22415'}



Beginning Epoch: 3


iteration = 100/1978, training loss=0.24120673533529044


iteration = 200/1978, training loss=0.1417400825023651


iteration = 300/1978, training loss=0.13181134328246116


iteration = 400/1978, training loss=0.12932567700743675


iteration = 500/1978, training loss=0.13377028442919253


iteration = 600/1978, training loss=0.13319156683981417


iteration = 700/1978, training loss=0.12317390199750662


iteration = 800/1978, training loss=0.12022815477102995


iteration = 900/1978, training loss=0.12140979908406735


iteration = 1000/1978, training loss=0.11476648285984993


iteration = 1100/1978, training loss=0.12303676500916481


iteration = 1200/1978, training loss=0.12393349800258875


iteration = 1300/1978, training loss=0.11091586966067553


iteration = 1400/1978, training loss=0.11563411246985197


iteration = 1500/1978, training loss=0.10899311341345311


iteration = 1600/1978, training loss=0.12116041455417871


iteration = 1700/1978, training loss=0.10671995431184769


iteration = 1800/1978, training loss=0.1036617811024189


iteration = 1900/1978, training loss=0.045588274262845514


Validating epoch 3


{'accuracy': '0.29037'}



Beginning Epoch: 4


iteration = 100/1978, training loss=0.19411171324551105


iteration = 200/1978, training loss=0.11986211773008108


iteration = 300/1978, training loss=0.11068983025848865


iteration = 400/1978, training loss=0.10959693968296051


iteration = 500/1978, training loss=0.11216063749045134


iteration = 600/1978, training loss=0.11205910854041576


iteration = 700/1978, training loss=0.10584972940385341


iteration = 800/1978, training loss=0.10031522629782558


iteration = 900/1978, training loss=0.10064620450139046


iteration = 1000/1978, training loss=0.09963256258517504


iteration = 1100/1978, training loss=0.10546553425490857


iteration = 1200/1978, training loss=0.10660779617726802


iteration = 1300/1978, training loss=0.09320085767656565


iteration = 1400/1978, training loss=0.09893813416361809


iteration = 1500/1978, training loss=0.09318720933049918


iteration = 1600/1978, training loss=0.1085455347970128


iteration = 1700/1978, training loss=0.09369589040055872


iteration = 1800/1978, training loss=0.092475247643888


iteration = 1900/1978, training loss=0.03990100769326091


Validating epoch 4


{'accuracy': '0.30843'}



Beginning Epoch: 5


iteration = 100/1978, training loss=0.1600280049815774


iteration = 200/1978, training loss=0.09633211821317672


iteration = 300/1978, training loss=0.09075488641858101


iteration = 400/1978, training loss=0.09300428401678801


iteration = 500/1978, training loss=0.08944733954966068


iteration = 600/1978, training loss=0.08919406849890947


iteration = 700/1978, training loss=0.091610961034894


iteration = 800/1978, training loss=0.08558330126106739


iteration = 900/1978, training loss=0.08719234343618154


iteration = 1000/1978, training loss=0.08176181642338634


iteration = 1100/1978, training loss=0.08448650818318129


iteration = 1200/1978, training loss=0.08206413526088


iteration = 1300/1978, training loss=0.07655458988621831


iteration = 1400/1978, training loss=0.08709169384092093


iteration = 1500/1978, training loss=0.07907516924664378


iteration = 1600/1978, training loss=0.08872916799038649


iteration = 1700/1978, training loss=0.07875374898314476


iteration = 1800/1978, training loss=0.08290789000689984


iteration = 1900/1978, training loss=0.037225230606272815


Validating epoch 5


{'accuracy': '0.35977'}



Beginning Epoch: 6


iteration = 100/1978, training loss=0.13569144231732935


iteration = 200/1978, training loss=0.08220080837607384


iteration = 300/1978, training loss=0.0743382296897471


iteration = 400/1978, training loss=0.07345765884965658


iteration = 500/1978, training loss=0.07590086363255978


iteration = 600/1978, training loss=0.07725667621940374


iteration = 700/1978, training loss=0.07330514665693044


iteration = 800/1978, training loss=0.07100003313273191


iteration = 900/1978, training loss=0.07322029992938042


iteration = 1000/1978, training loss=0.07274211207404732


iteration = 1100/1978, training loss=0.07263675475493074


iteration = 1200/1978, training loss=0.07104496899992227


iteration = 1300/1978, training loss=0.06787868166342377


iteration = 1400/1978, training loss=0.06993868697434663


iteration = 1500/1978, training loss=0.06368296856060623


iteration = 1600/1978, training loss=0.07416011344641447


iteration = 1700/1978, training loss=0.06866469360888004


iteration = 1800/1978, training loss=0.0676386708021164


iteration = 1900/1978, training loss=0.033251990545541045


Validating epoch 6


{'accuracy': '0.37252'}



Beginning Epoch: 7


iteration = 100/1978, training loss=0.11280821065418423


iteration = 200/1978, training loss=0.07199223117902875


iteration = 300/1978, training loss=0.06560131853446365


iteration = 400/1978, training loss=0.06628801411017776


iteration = 500/1978, training loss=0.06812881801277398


iteration = 600/1978, training loss=0.06838633552193642


iteration = 700/1978, training loss=0.06762854443863034


iteration = 800/1978, training loss=0.060117428004741666


iteration = 900/1978, training loss=0.06471144842915237


iteration = 1000/1978, training loss=0.06331022689118981


iteration = 1100/1978, training loss=0.061598554532974956


iteration = 1200/1978, training loss=0.06298325520008802


iteration = 1300/1978, training loss=0.056943948082625866


iteration = 1400/1978, training loss=0.06100261759012938


iteration = 1500/1978, training loss=0.05603576060384512


iteration = 1600/1978, training loss=0.06027255056425929


iteration = 1700/1978, training loss=0.059177229655906556


iteration = 1800/1978, training loss=0.056701854448765514


iteration = 1900/1978, training loss=0.029877544688060878


Validating epoch 7


{'accuracy': '0.39518'}



Beginning Epoch: 8


iteration = 100/1978, training loss=0.09701106635853649


iteration = 200/1978, training loss=0.06161156083457172


iteration = 300/1978, training loss=0.05301317749544978


iteration = 400/1978, training loss=0.05560277691110969


iteration = 500/1978, training loss=0.05505758821964264


iteration = 600/1978, training loss=0.056313158702105286


iteration = 700/1978, training loss=0.06102942862547934


iteration = 800/1978, training loss=0.05383901171386242


iteration = 900/1978, training loss=0.05682058679871261


iteration = 1000/1978, training loss=0.056229170523583887


iteration = 1100/1978, training loss=0.052603463353589175


iteration = 1200/1978, training loss=0.0541712680272758


iteration = 1300/1978, training loss=0.04938063709996641


iteration = 1400/1978, training loss=0.049921043775975706


iteration = 1500/1978, training loss=0.0470712739508599


iteration = 1600/1978, training loss=0.05255290045402944


iteration = 1700/1978, training loss=0.04729733840562403


iteration = 1800/1978, training loss=0.05180725861340761


iteration = 1900/1978, training loss=0.02699458645656705


Validating epoch 8


{'accuracy': '0.42847'}



Beginning Epoch: 9


iteration = 100/1978, training loss=0.08100020982325078


iteration = 200/1978, training loss=0.05105061963200569


iteration = 300/1978, training loss=0.04674410436302424


iteration = 400/1978, training loss=0.04697095594368875


iteration = 500/1978, training loss=0.047550665624439714


iteration = 600/1978, training loss=0.04873140565119684


iteration = 700/1978, training loss=0.048862017346546056


iteration = 800/1978, training loss=0.0435902491165325


iteration = 900/1978, training loss=0.048015792965888975


iteration = 1000/1978, training loss=0.04770342730917036


iteration = 1100/1978, training loss=0.043345575071871284


iteration = 1200/1978, training loss=0.04916356784291565


iteration = 1300/1978, training loss=0.04212380792014301


iteration = 1400/1978, training loss=0.043063826514407995


iteration = 1500/1978, training loss=0.042936028130352494


iteration = 1600/1978, training loss=0.04608893803320825


iteration = 1700/1978, training loss=0.0419924679864198


iteration = 1800/1978, training loss=0.04456443225964904


iteration = 1900/1978, training loss=0.024463521731086076


Validating epoch 9


{'accuracy': '0.43555'}



Beginning Epoch: 10


iteration = 100/1978, training loss=0.07003318023867905


iteration = 200/1978, training loss=0.04573654701933265


iteration = 300/1978, training loss=0.041204644832760096


iteration = 400/1978, training loss=0.042234270041808485


iteration = 500/1978, training loss=0.0408456514403224


iteration = 600/1978, training loss=0.040423614932224156


iteration = 700/1978, training loss=0.04230791201815009


iteration = 800/1978, training loss=0.04082719171885401


iteration = 900/1978, training loss=0.04097352245822549


iteration = 1000/1978, training loss=0.040047905934043226


iteration = 1100/1978, training loss=0.03791843213140964


iteration = 1200/1978, training loss=0.04065126820467412


iteration = 1300/1978, training loss=0.037760563269257544


iteration = 1400/1978, training loss=0.039470827532932164


iteration = 1500/1978, training loss=0.03501081523485482


iteration = 1600/1978, training loss=0.0393444303330034


iteration = 1700/1978, training loss=0.03667577457847074


iteration = 1800/1978, training loss=0.04068445141427219


iteration = 1900/1978, training loss=0.02188964222557843


Validating epoch 10


{'accuracy': '0.46601'}



Beginning Epoch: 11


iteration = 100/1978, training loss=0.05972043084912002


iteration = 200/1978, training loss=0.04184736331459135


iteration = 300/1978, training loss=0.036098110843449834


iteration = 400/1978, training loss=0.03607779195066541


iteration = 500/1978, training loss=0.03600994555279612


iteration = 600/1978, training loss=0.03492350625805557


iteration = 700/1978, training loss=0.037652563084848226


iteration = 800/1978, training loss=0.03338589599821717


iteration = 900/1978, training loss=0.03387762580066919


iteration = 1000/1978, training loss=0.035102459855843336


iteration = 1100/1978, training loss=0.03139636148232967


iteration = 1200/1978, training loss=0.034157380522228775


iteration = 1300/1978, training loss=0.030565300988964737


iteration = 1400/1978, training loss=0.03271256717853248


iteration = 1500/1978, training loss=0.0295759466663003


iteration = 1600/1978, training loss=0.034095889646559956


iteration = 1700/1978, training loss=0.030979993999935687


iteration = 1800/1978, training loss=0.03299978650175035


iteration = 1900/1978, training loss=0.019786165680270643


Validating epoch 11


{'accuracy': '0.46069'}



Beginning Epoch: 12


iteration = 100/1978, training loss=0.04737348987720907


iteration = 200/1978, training loss=0.03230720380321145


iteration = 300/1978, training loss=0.03198290806729347


iteration = 400/1978, training loss=0.031212423099204897


iteration = 500/1978, training loss=0.02912318276707083


iteration = 600/1978, training loss=0.031094205165281892


iteration = 700/1978, training loss=0.03162315003573894


iteration = 800/1978, training loss=0.02881980488076806


iteration = 900/1978, training loss=0.028911909107118845


iteration = 1000/1978, training loss=0.02900033408543095


iteration = 1100/1978, training loss=0.029568684352561832


iteration = 1200/1978, training loss=0.02945324477273971


iteration = 1300/1978, training loss=0.02758526723831892


iteration = 1400/1978, training loss=0.027449767540674658


iteration = 1500/1978, training loss=0.025601823660545052


iteration = 1600/1978, training loss=0.029961733527015894


iteration = 1700/1978, training loss=0.028085175751475618


iteration = 1800/1978, training loss=0.029275262244045735


iteration = 1900/1978, training loss=0.017724340655840932


Validating epoch 12


{'accuracy': '0.48938'}



Beginning Epoch: 13


iteration = 100/1978, training loss=0.042550201420672236


iteration = 200/1978, training loss=0.02766273407265544


iteration = 300/1978, training loss=0.023750703867990525


iteration = 400/1978, training loss=0.024970148392021656


iteration = 500/1978, training loss=0.024653182334732265


iteration = 600/1978, training loss=0.025352146562654526


iteration = 700/1978, training loss=0.02669843568233773


iteration = 800/1978, training loss=0.02294988505775109


iteration = 900/1978, training loss=0.025322399213910104


iteration = 1000/1978, training loss=0.024113861685618757


iteration = 1100/1978, training loss=0.022545432744082064


iteration = 1200/1978, training loss=0.023280334062874317


iteration = 1300/1978, training loss=0.022637509446358308


iteration = 1400/1978, training loss=0.02309002381982282


iteration = 1500/1978, training loss=0.021706558156292886


iteration = 1600/1978, training loss=0.024549993402324617


iteration = 1700/1978, training loss=0.021076346829067914


iteration = 1800/1978, training loss=0.02190012811915949


iteration = 1900/1978, training loss=0.015601940685883164


Validating epoch 13


{'accuracy': '0.49610'}



Beginning Epoch: 14


iteration = 100/1978, training loss=0.03492453345679678


iteration = 200/1978, training loss=0.022767363050952553


iteration = 300/1978, training loss=0.019969663517549634


iteration = 400/1978, training loss=0.02065965937799774


iteration = 500/1978, training loss=0.02153073578607291


iteration = 600/1978, training loss=0.019874964380869643


iteration = 700/1978, training loss=0.020599054760532455


iteration = 800/1978, training loss=0.018803444121731447


iteration = 900/1978, training loss=0.020786337186582387


iteration = 1000/1978, training loss=0.0225677776010707


iteration = 1100/1978, training loss=0.018614335909951478


iteration = 1200/1978, training loss=0.019008715914096683


iteration = 1300/1978, training loss=0.01968729027779773


iteration = 1400/1978, training loss=0.018540152623318137


iteration = 1500/1978, training loss=0.017422752593411132


iteration = 1600/1978, training loss=0.02001362561946735


iteration = 1700/1978, training loss=0.018202193322940728


iteration = 1800/1978, training loss=0.018567728511989116


iteration = 1900/1978, training loss=0.013500047058332711


Validating epoch 14


{'accuracy': '0.51452'}



Beginning Epoch: 15


iteration = 100/1978, training loss=0.02802969782729633


iteration = 200/1978, training loss=0.017916771059390157


iteration = 300/1978, training loss=0.016313808906124903


iteration = 400/1978, training loss=0.01653646384831518


iteration = 500/1978, training loss=0.016357029684004373


iteration = 600/1978, training loss=0.015475614063907414


iteration = 700/1978, training loss=0.016715878715040162


iteration = 800/1978, training loss=0.01469567378400825


iteration = 900/1978, training loss=0.0162994905479718


iteration = 1000/1978, training loss=0.017218886679038405


iteration = 1100/1978, training loss=0.01411176361492835


iteration = 1200/1978, training loss=0.015364012529607863


iteration = 1300/1978, training loss=0.014776291350717657


iteration = 1400/1978, training loss=0.014893494712887333


iteration = 1500/1978, training loss=0.01420695043343585


iteration = 1600/1978, training loss=0.01565031346050091


iteration = 1700/1978, training loss=0.015079890889755915


iteration = 1800/1978, training loss=0.014987405363935977


iteration = 1900/1978, training loss=0.011692948834970594


Validating epoch 15


{'accuracy': '0.51948'}



Beginning Epoch: 16


iteration = 100/1978, training loss=0.023723758103442377


iteration = 200/1978, training loss=0.015894144292687998


iteration = 300/1978, training loss=0.014643155268859119


iteration = 400/1978, training loss=0.01625283892441075


iteration = 500/1978, training loss=0.015607471977709793


iteration = 600/1978, training loss=0.01614462420111522


iteration = 700/1978, training loss=0.017333498692605646


iteration = 800/1978, training loss=0.014322840950335376


iteration = 900/1978, training loss=0.01612983638420701


iteration = 1000/1978, training loss=0.016724072460201567


iteration = 1100/1978, training loss=0.013753307315055282


iteration = 1200/1978, training loss=0.01582771370653063


iteration = 1300/1978, training loss=0.015550453930627555


iteration = 1400/1978, training loss=0.015102622910635546


iteration = 1500/1978, training loss=0.014267541811568663


iteration = 1600/1978, training loss=0.015367429753532634


iteration = 1700/1978, training loss=0.015203796411515213


iteration = 1800/1978, training loss=0.015212351501686499


iteration = 1900/1978, training loss=0.01138807891868055


Validating epoch 16


{'accuracy': '0.51948'}



Beginning Epoch: 17


iteration = 100/1978, training loss=0.02447669481916819


iteration = 200/1978, training loss=0.01664519009180367


iteration = 300/1978, training loss=0.01425170841277577


iteration = 400/1978, training loss=0.01676194371539168


iteration = 500/1978, training loss=0.016159690491622313


iteration = 600/1978, training loss=0.016750810312805696


iteration = 700/1978, training loss=0.016367668338352813


iteration = 800/1978, training loss=0.014967984860995785


iteration = 900/1978, training loss=0.01614946611924097


iteration = 1000/1978, training loss=0.016696997818071393


iteration = 1100/1978, training loss=0.013581203386420384


iteration = 1200/1978, training loss=0.01609652560437098


iteration = 1300/1978, training loss=0.014729958575917407


iteration = 1400/1978, training loss=0.014257713265833446


iteration = 1500/1978, training loss=0.01452192471944727


iteration = 1600/1978, training loss=0.016130530327791347


iteration = 1700/1978, training loss=0.014290297140250914


iteration = 1800/1978, training loss=0.014295119568705559


iteration = 1900/1978, training loss=0.011051367409527302


Validating epoch 17


{'accuracy': '0.51912'}



Beginning Epoch: 18


iteration = 100/1978, training loss=0.024053743311960717


iteration = 200/1978, training loss=0.01612125848827418
