# Text2SPARQL

This is a development workbook for getting the hang of training models.

In [1]:
import os
import torch

os.environ["CUDA_VISIBLE_DEVICES"]="1"
print(f"CUDA Available: {torch.cuda.is_available()}")

CUDA Available: True


In [2]:
DATASET_NAME = "lcquad2"
DATASET_FOLDER = "data"
DATASET_PATH = os.path.join(DATASET_FOLDER, DATASET_NAME)

ACCELERATE_USE = False 
ACCELERATE_STR = "-accelerate" if ACCELERATE_USE else ""

MODEL_NAME = "t5-small" # With t5-small, the non accelerated training works better than accelerated?
MODEL_TYPE = "text2sparql"
MODEL_FULL = f"{MODEL_TYPE}-{MODEL_NAME}-{DATASET_NAME}{ACCELERATE_STR}"

MODEL_FOLDER = "models"
MODEL_PATH = os.path.join(MODEL_FOLDER, MODEL_FULL)

EVALUATION_FOLDER = os.path.join(MODEL_PATH, "evaluations")
CHECKPOINT_FOLDER = os.path.join(MODEL_PATH, "checkpoints")

folders = [MODEL_FOLDER, EVALUATION_FOLDER, CHECKPOINT_FOLDER]

In [3]:
assert os.path.exists(DATASET_PATH)

for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

## Preprocessing

Bannerjee does some preprocessing of the LCQuAD dataset,
I try to replicate that here.

First we load some files into memory

In [4]:
import json
import pickle
import os
from os.path import join
from pprint import pprint

assert DATASET_PATH.endswith("lcquad2")
lcquad2_dir = DATASET_PATH

# LCQuAD2 entity labels
with open(join(lcquad2_dir, "lcq2_labels.pickle"), "rb") as f:
    labels = pickle.load(f)

pprint("# Sample Entity Labels")
pprint(list(labels[k] for k in ['q51366', 'q15779', 'q23906217']))
print()

# Training Data has exactly the same file size as the official one
with open(join(lcquad2_dir, "train.json")) as f:
    data = json.load(f)

pprint("# Sample Question, Query")
pprint(list(data[1][k] for k in ["question", "sparql_wikidata"]))
print()

# Load the relation labels
with open(join(lcquad2_dir, "relations.json")) as f:
    rel_labels = json.load(f)

pprint("# Sample Relation Labels")
pprint(list(rel_labels[k] for k in ["P10", "P6"]))
print()

# Load the sparql vocabulary
with open(join(lcquad2_dir, "vocab.txt")) as f:
    vocab = list(map(lambda x: x.strip(), f.readlines()))
    vocab.append('null') # not too sure what this is for

pprint("# Sample SPARQL Vocab")
pprint(vocab[1:5])
print()

'# Sample Entity Labels'
['Chandrasekhar limit', 'toluene', 'Olympic victor, stadion']



'# Sample Question, Query'
["Who is the child of Ranavalona I's husband?",
 'SELECT ?answer WHERE { wd:Q169794 wdt:P26 ?X . ?X wdt:P22 ?answer}']

'# Sample Relation Labels'
['video', 'head of government']

'# Sample SPARQL Vocab'
['(', 'rdfs:label', 'by', 'ask']



Some labels are missing from the lcq2_labels.pickle,
and cause run time errors in the script.
We add them back here to avoid this problem
(though ideally we should find a better label to entity map)

In [5]:
labels['quercia']='null'
labels['qui']='null'
labels['}']='null'
labels['p5122'] = 'Ontario public library ID'.lower()
labels['p3888']='Boijmans artist ID'
labels['p5388']='Bulgarian Antarctic Gazetteer ID'
labels['p5151']='Israel Film Fund ID'
labels['p3633']='British Museum place ID'
labels['p1733']='Steam application ID'

Next we assign vocabularies to tokens.

In [6]:
vocab_dict = {}
for i, text in enumerate(vocab):
    vocab_dict[text] = f'<extra_id_{i}>'

pprint("# Sample Masked Tokens")
pprint([vocab_dict[k] for k in ['"', 'null', '?value']])
print()

'# Sample Masked Tokens'
['<extra_id_0>', '<extra_id_60>', '<extra_id_16>']



And adjust some labels to use the null token

In [7]:
for k in labels:
    if labels[k] is None:
        labels[k] = vocab_dict['null']
        # print(f'{k}: {labels[k]}')


## Some Useful Functions

In [8]:
def xprint(thing):
    pprint(thing)
    return thing

def compare(x, y=None):

    def _compare(z):
        pprint(f"Old: {x}")
        pprint(f"New: {z}")
    
    if not y:
        return lambda z : _compare(z)
    else:
        return lambda : _compare(y)

Now we reformate the dataset
- Note: it seems that Bannerjee replaces training data
that has no questions with the Natural Language version.

For reference these are the definition of each feature,
taken **verbatim** from their [homepage](https://sda.tech/projects/lc-quad-2/)
```
{
     "uid": a unique id number
     "sparql_wikidata": a sparql fro wikidata endpoint
     "sparql_dbpedia18": a sparql for DBpedia endpoint which has wikidata information
     "NNQT_question": system generated question,
     "question": Verbalised question,
     "paraphrased_question": paraphrased version of the verbalised question,
     "template_id": id for the template
     "template": template discription    
}
```

In [9]:
import re

data_x, data_y = [], []
data_x_shuffle = []

for i, inst in enumerate(data):
    wikisparql = inst['sparql_wikidata']
    if inst['question'] is None:
        question = inst['NNQT_question']
    else:
        question = inst['question']
    question = question.replace('{', '').replace('}', '')

    match_str = r"\'(.*?)\'"
    hashi = {}
    # To mask filter literals
    if re.search(match_str, wikisparql):
        lits=re.findall(match_str,wikisparql)
        # print(f"Old: {wikisparql}")
        for j, lit in enumerate(lits):
            idx = j + 1
            wikisparql = wikisparql.replace(f"'{lit.strip()}'", f"'###{idx}'")
            hashi[f'###{idx}'] = lit.strip()
        # print(f"New: {wikisparql}")
    
    # there is an extra space beacuse of http: and https:
    sparql = wikisparql.replace('(',' ( ').replace(')',' ) ') \
    .replace('{',' { '). \
    replace('}',' } ').replace('wd:','wd: ').replace('wdt:','wdt: '). \
    replace(' p:',' p: ').replace(' ps:',' ps: ').replace('pq:','pq: '). \
    replace(',',' , ').replace(",'",", '").replace("'"," ' ").replace('.',' . '). \
    replace('=',' = ').replace('  ',' ').lower()
    
    # print(f"sparql: {sparql}")
    # select distinct ?obj where { wd: q188920 wdt: p2813 ?obj . ?obj wdt: p31 wd: q1002697 } 

    _ents = re.findall( r'wd: (?:.*?) ', sparql) # ['wd: q188920 ', 'wd: q1002697 ']
    _ents_for_labels = re.findall( r'wd: (.*?) ', sparql) # ['q188920', 'q1002697']
    
    _rels = re.findall( r'wdt: (?:.*?) ',sparql)
    _rels += re.findall( r' p: (?:.*?) ',sparql)
    _rels += re.findall( r' ps: (?:.*?) ',sparql)
    _rels += re.findall( r'pq: (?:.*?) ',sparql) # ['wdt: p2813 ', 'wdt: p31 ']
    # Missing rdfs:label, not sure if that is important
    
    _rels_for_labels = re.findall( r'wdt: (.*?) ',sparql)
    _rels_for_labels += re.findall( r' p: (.*?) ',sparql)
    _rels_for_labels += re.findall( r' ps: (.*?) ',sparql)
    _rels_for_labels += re.findall( r'pq: (.*?) ',sparql) # ['p2813', 'p31']

    # print(_rels)
    # print(_rels_for_labels)
    for j in range(len(_ents_for_labels)):
        # print('Q'+_ents_for_labels[j][1:])
        if '}' in _ents[j]: # Entry 12686 is malformed
            # pprint(inst)
            # pprint(_ents)
            _ents[j]=''
        _ents[j]=_ents[j]+labels[_ents_for_labels[j]]+' '
        # wd: q36970 -> wd: q36970 Jackie Chan

    for j in range(len(_rels_for_labels)):
        if _rels_for_labels[j].upper() not in rel_labels:
            # For some reasons the original preprocess.py didnt convert to upper?
            rel_labels['P'+_rels_for_labels[j][1:]]=vocab_dict['null']
        _rels[j]=_rels[j]+rel_labels['P'+_rels_for_labels[j][1:]]+' '
        # wdt: p26 -> wdt: p26 spouse
    # print(_ents)

    _ents+=_rels
    # random.shuffle(_ents)
    # random.shuffle(_rels)

    # move to a function
    newvars = ['?vr0','?vr1','?vr2','?vr3','?vr4','?vr5']
    sparql_split = sparql.split()
    variables = set([x for x in sparql_split if x[0] == '?'])
    for j, var in enumerate(sorted(variables)):
        if var == '?maskvar1': #???
            print(sparql)
            continue
        sparql = sparql.replace(var, newvars[j]) # Normalize var names
    
    # old = compare(sparql)

    split = sparql.split()
    
    for j, item in enumerate(split):
        if item in vocab_dict:
            split[j] = vocab_dict[item]
    
    split = ' '.join(split).strip()
    # old(split)

    for keys in hashi:
        split = split.replace(keys, hashi[keys])
    
    data_y.append(split)

    for rel in _ents:
        rel=rel.replace('wd:',vocab_dict['wd:']+' ')
        rel=rel.replace('wdt:',vocab_dict['wdt:']+' ')
        old = compare(rel)
        if 'p:' in rel:
            if 'http' in rel:
                print(inst) # There are no more http
            rel=rel.replace('p:',vocab_dict['p:']+' ')
            # old(rel)
        rel=rel.replace('ps:',vocab_dict['ps:']+' ')
        rel=rel.replace('pq:',vocab_dict['pq:']+' ')
        question=question+' '+vocab_dict['[DEF]']+' '+rel
    data_x.append(question.strip())

assert len(data_x) == len(data_y)

Now we need to save the data.

In [10]:
import pandas as pd
df = pd.DataFrame({
    'x': data_x,
    'y': data_y,
    })

save_file = join(lcquad2_dir, 'preprocessed_data.csv')
df.to_csv(save_file)

In [11]:
df.head()

Unnamed: 0,x,y
0,What periodical literature does Delta Air Line...,<extra_id_6> <extra_id_21> <extra_id_39> <extr...
1,Who is the child of Ranavalona I's husband? <e...,<extra_id_6> <extra_id_39> <extra_id_19> <extr...
2,Is it true Jeff_Bridges occupation Lane Chandl...,<extra_id_4> <extra_id_19> <extra_id_33> <extr...
3,What is the pre-requisite of phase matter of G...,<extra_id_6> <extra_id_39> <extra_id_19> <extr...
4,Which is the operating income for Qantas? <ext...,<extra_id_6> <extra_id_21> <extra_id_39> <extr...


# Model

Now we need to generate a T5 model for fine tuning

In [12]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import transformers
# from accelerate import init_empty_weights, dispatch_model, infer_auto_device_map, load_checkpoint_and_dispatch
from huggingface_hub import hf_hub_download
import torch
import torch.nn as nn
import torch.optim as optim
import random
import math

model_name = "t5-small"

class Model(nn.Module):
    def __init__(self, model_name):
        super(Model, self).__init__()
        self.model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")

        pprint("# Model Device Map")
        pprint(self.model.hf_device_map)
        print()
    
    def forward(self, input):
        outputs = self.model(
            input_ids = input['input_ids'],
            labels = input['labels'],
            attention_mask = input['attention_mask'],
            output_hidden_states = True,
            output_attentions = True
        )

        return outputs.loss

# model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto") # Device_map splits the load over multiple GPUs, this seems to be quite new

And the Trainer

In [13]:
import tqdm

class Train:
    def __init__(self,data,data_val, model_name):
        self.data=data
        self.dev_data=data_val

        self.tokenizer=T5Tokenizer.from_pretrained(model_name)
        self.model=Model(model_name)
        # self.model.to(f'cuda:{self.model.device_ids[0]}')  
           
        # Modify lr?
        self.optimizer=optim.AdamW(self.model.parameters(),lr=0.0015)
        self.lr_scheduler=transformers. \
        get_polynomial_decay_schedule_with_warmup(self.optimizer, 5000, 30000,power=0.5)

        self.iters=60000
        self.print_every=100
        self.eval_every=8000
        # self.num_gpus=1
        self.eval_bs=6
        self.bs=5
        self.back_propogate=10
        
        self.train()

    def generate_batch(self):
        output=random.sample(self.data,self.bs)
        inp,label=[],[]
        for dat in output:
            inp.append(dat[0])
            label.append(dat[1])

        return inp,label

    def preprocess_function(self,inputs, targets):
        model_inputs=self.tokenizer(inputs, padding=True, \
                        return_tensors='pt',max_length=512, truncation=True)
        labels=self.tokenizer(targets,padding=True,max_length=512, truncation=True)

        if True:
            labels["input_ids"] = [
            [(l if l != self.tokenizer.pad_token_id else -100) \
             for l in label] for label in labels["input_ids"]
            ]
        labels['input_ids']=torch.tensor(labels['input_ids'])
        model_inputs["labels"]=labels["input_ids"].to(0)
        model_inputs["input_ids"]=model_inputs["input_ids"].to(0)
        model_inputs["attention_mask"]=model_inputs["attention_mask"].to(0)

        return model_inputs

    def val(self,o):
        pprint('# Validating...')
        self.model.eval()
        acc,bs,i=0,self.eval_bs,0
        saver=[]

        # progress_bar = tqdm.auto.tqdm(range(math.ceil(len(self.dev_data) / bs)))
        # progress_bar.set_description(f"Eval {o}")
           
        while i<len(self.dev_data):
            bs_=min(bs,len(self.dev_data)-i)
            if i % (100) == 0:
                print(f"Evaluation {i}/{len(self.dev_data)}")
            i+=bs_
            inp,label=[],[]
            for j in range(i-bs_,i):
                inp.append(self.dev_data[j][0])
                label.append(self.dev_data[j][1])
            

            input=self.preprocess_function(inp,label)

            output=self.model.model.generate(input_ids=input['input_ids'],
                      num_beams=10,attention_mask=input['attention_mask'], \
                        early_stopping=True, max_length=200,output_hidden_states=True,output_attentions=True)
            
            out=self.tokenizer.batch_decode(output,skip_special_tokens=False)

            for k in range(len(out)):
                #print(out[k].replace('<pad>','').replace('</s>','').strip())
                a1=out[k].replace('<pad>','').replace('</s>','').replace('<unk>','').replace('<s>','').strip().replace(' ','')
                a2=label[k].strip().replace(' ','')
                # print(a1, '       ', a2)
                saver.append({'input':inp[k],'gold':label[k].strip(),'generated':out[k].replace('<pad>',''). \
                      replace('</s>','').replace('<unk>','').replace('<s>','').strip()})
                if a1==a2:
                    acc+=1; #print('ttt')

            # progress_bar.update(1)
        
        file=open(join(EVALUATION_FOLDER, 'dev_result_'+str(o)+'.json'),'w')
        json.dump(saver,file)
        pprint(f'# Saved {file.name}')
        file.close()
        return 100*acc/len(self.dev_data)

    def train(self):

        scalar=0
        for i in range(self.iters):
            self.model.train()
            inp,label=self.generate_batch()
            input=self.preprocess_function(inp,label)
            loss=self.model(input)

            scalar+=loss.mean().item()
            if(i+1)%self.print_every==0:
                print('iteration={}, training loss={}'.format(i+1,scalar/self.print_every))
                scalar=0
            if(i + 1)%self.eval_every==0:
                acc=self.val(i+1)
                print('validation acc={}'.format(acc))

                torch.save(self.model.state_dict(),
                       join(CHECKPOINT_FOLDER,'checkpoint_'+str(i + 1)+'.pth'))
                
            loss/=self.back_propogate
            loss.mean().backward()
            if (i+1)%self.back_propogate:
                self.optimizer.step();
                self.lr_scheduler.step();
                self.optimizer.zero_grad()


In [14]:
data = df.values.tolist()
total_len = len(data)
final_data, final_data_dev = data[:total_len//10], data[total_len//10:]
pprint("# Beginning training")
trainer = Train(final_data, final_data_dev, "t5-small")

'# Beginning training'


'# Model Device Map'
{'': 0}



iteration=100, training loss=11.68093433380127


iteration=200, training loss=8.518257756233215


iteration=300, training loss=5.029802253246308


iteration=400, training loss=3.461314232349396


iteration=500, training loss=2.8436633574962618


iteration=600, training loss=2.4745264518260957


iteration=700, training loss=2.171669164896011


iteration=800, training loss=2.0378014171123504


iteration=900, training loss=1.897230726480484


iteration=1000, training loss=1.7125919997692107


iteration=1100, training loss=1.5562376964092255


iteration=1200, training loss=1.4146362733840943


iteration=1300, training loss=1.2593027675151824


iteration=1400, training loss=1.2143048638105391


iteration=1500, training loss=1.0890581375360489


iteration=1600, training loss=1.0457393914461135


iteration=1700, training loss=0.9536520016193389


iteration=1800, training loss=0.8556354784965515


iteration=1900, training loss=0.7776207837462425


iteration=2000, training loss=0.7281395408511162


iteration=2100, training loss=0.6402705600857734


iteration=2200, training loss=0.5694461117684841


iteration=2300, training loss=0.5467203672230244


iteration=2400, training loss=0.5002071578800679


iteration=2500, training loss=0.4771059563755989


iteration=2600, training loss=0.4164818511903286


iteration=2700, training loss=0.3959523817151785


iteration=2800, training loss=0.3533776617795229


iteration=2900, training loss=0.3205835447460413


iteration=3000, training loss=0.29292478643357756


iteration=3100, training loss=0.25061223044991493


iteration=3200, training loss=0.22458880811929702


iteration=3300, training loss=0.21164320416748525


iteration=3400, training loss=0.21521639987826346


iteration=3500, training loss=0.20626938857138158


iteration=3600, training loss=0.18509655974805356


iteration=3700, training loss=0.1805964794754982


iteration=3800, training loss=0.18615600045770406


iteration=3900, training loss=0.18740014426410198


iteration=4000, training loss=0.15377637673169375


iteration=4100, training loss=0.12840659299865365


iteration=4200, training loss=0.10762018034234642


iteration=4300, training loss=0.10043066561222076


iteration=4400, training loss=0.11161686838604509


iteration=4500, training loss=0.20100881585851313


iteration=4600, training loss=0.12783961219713091


iteration=4700, training loss=0.11476004648953676


iteration=4800, training loss=0.0946281873062253


iteration=4900, training loss=0.09457142231985927


iteration=5000, training loss=0.11044510031118988


iteration=5100, training loss=0.12904877767898143


iteration=5200, training loss=0.09858143635094166


iteration=5300, training loss=0.10570141004398464


iteration=5400, training loss=0.12095147122628987


iteration=5500, training loss=0.10651561226230115


iteration=5600, training loss=0.09951349444687367


iteration=5700, training loss=0.08561947772279382


iteration=5800, training loss=0.1088585552154109


iteration=5900, training loss=0.17774808701127767


iteration=6000, training loss=0.10273528386838734


iteration=6100, training loss=0.06922938629519194


iteration=6200, training loss=0.07376599319279194


iteration=6300, training loss=0.06951762332580984


iteration=6400, training loss=0.06036932306364179


iteration=6500, training loss=0.06682614974677563


iteration=6600, training loss=0.06299650401808321


iteration=6700, training loss=0.09015592405572534


iteration=6800, training loss=0.07906168318819255


iteration=6900, training loss=0.058951009064912796


iteration=7000, training loss=0.0517506666155532


iteration=7100, training loss=0.05544580911751837


iteration=7200, training loss=0.06145408872514963


iteration=7300, training loss=0.05960888257715851


iteration=7400, training loss=0.061040404862724246


iteration=7500, training loss=0.051557907229289414


iteration=7600, training loss=0.05974997684825212


iteration=7700, training loss=0.04540822071954608


iteration=7800, training loss=0.05488188307965174


iteration=7900, training loss=0.04934733151923865


iteration=8000, training loss=0.02944453700678423
'# Validating...'
Evaluation 0/21762


Evaluation 300/21762


Evaluation 600/21762


Evaluation 900/21762


Evaluation 1200/21762


Evaluation 1500/21762


Evaluation 1800/21762


Evaluation 2100/21762


Evaluation 2400/21762


Evaluation 2700/21762


Evaluation 3000/21762


Evaluation 3300/21762


Evaluation 3600/21762


Evaluation 3900/21762


Evaluation 4200/21762


Evaluation 4500/21762


Evaluation 4800/21762


Evaluation 5100/21762


Evaluation 5400/21762


Evaluation 5700/21762


Evaluation 6000/21762


Evaluation 6300/21762


Evaluation 6600/21762


Evaluation 6900/21762


Evaluation 7200/21762


Evaluation 7500/21762


Evaluation 7800/21762


Evaluation 8100/21762


Evaluation 8400/21762


Evaluation 8700/21762


Evaluation 9000/21762


Evaluation 9300/21762


Evaluation 9600/21762


Evaluation 9900/21762


Evaluation 10200/21762


Evaluation 10500/21762


Evaluation 10800/21762


Evaluation 11100/21762


Evaluation 11400/21762


Evaluation 11700/21762


Evaluation 12000/21762


Evaluation 12300/21762


Evaluation 12600/21762


Evaluation 12900/21762


Evaluation 13200/21762


Evaluation 13500/21762


Evaluation 13800/21762


Evaluation 14100/21762


Evaluation 14400/21762


Evaluation 14700/21762


Evaluation 15000/21762


Evaluation 15300/21762


Evaluation 15600/21762


Evaluation 15900/21762


Evaluation 16200/21762


Evaluation 16500/21762


Evaluation 16800/21762


Evaluation 17100/21762


Evaluation 17400/21762


Evaluation 17700/21762


Evaluation 18000/21762


Evaluation 18300/21762


Evaluation 18600/21762


Evaluation 18900/21762


Evaluation 19200/21762


Evaluation 19500/21762


Evaluation 19800/21762


Evaluation 20100/21762


Evaluation 20400/21762


Evaluation 20700/21762


Evaluation 21000/21762


Evaluation 21300/21762


Evaluation 21600/21762


'# Saved models/text2sparql-t5-small-lcquad2/evaluations/dev_result_8000.json'
validation acc=73.3847991912508


iteration=8100, training loss=0.03294706489192322


iteration=8200, training loss=0.03755561514757574


iteration=8300, training loss=0.04994904987514019


iteration=8400, training loss=0.05518811824731529


iteration=8500, training loss=0.04169673192547634


iteration=8600, training loss=0.03691276517929509


iteration=8700, training loss=0.03841483066906221


iteration=8800, training loss=0.05018469580099918


iteration=8900, training loss=0.038026418818626555


iteration=9000, training loss=0.03740756177227013


iteration=9100, training loss=0.04271751801483333


iteration=9200, training loss=0.046319140224950386


iteration=9300, training loss=0.04580391664640047


iteration=9400, training loss=0.04871678913244978


iteration=9500, training loss=0.031119341609301045


iteration=9600, training loss=0.031649535668548195


iteration=9700, training loss=0.04432465181453153


iteration=9800, training loss=0.03954547755420208


iteration=9900, training loss=0.029651777610415594


iteration=10000, training loss=0.02869790207594633


iteration=10100, training loss=0.03135451696929522


iteration=10200, training loss=0.03130619916482828


iteration=10300, training loss=0.043044542209245265


iteration=10400, training loss=0.04098012858070433


iteration=10500, training loss=0.047186924633570014


iteration=10600, training loss=0.06743550366489216


iteration=10700, training loss=0.043831586469896135


iteration=10800, training loss=0.03871364279650152


iteration=10900, training loss=0.05622860243078321


iteration=11000, training loss=0.057349178912118076


iteration=11100, training loss=0.03783080320456065


iteration=11200, training loss=0.03862080397317186


iteration=11300, training loss=0.034215473603690046


iteration=11400, training loss=0.04181267573963851


iteration=11500, training loss=0.031207858286798


iteration=11600, training loss=0.028323133358499036


iteration=11700, training loss=0.04076180849340744


iteration=11800, training loss=0.03019289775053039


iteration=11900, training loss=0.020805408257292584


iteration=12000, training loss=0.02636081784963608


iteration=12100, training loss=0.037046904641319996


iteration=12200, training loss=0.030314526164438575


iteration=12300, training loss=0.01860415267525241


iteration=12400, training loss=0.0199753880337812


iteration=12500, training loss=0.022800073347752914


iteration=12600, training loss=0.01983498790941667


iteration=12700, training loss=0.025369206922478044


iteration=12800, training loss=0.0274547673529014


iteration=12900, training loss=0.025067756068019663


iteration=13000, training loss=0.03456396237597801


iteration=13100, training loss=0.03594946165685542


iteration=13200, training loss=0.0344157694443129


iteration=13300, training loss=0.026168820175807923


iteration=13400, training loss=0.027886954582063483


iteration=13500, training loss=0.0347567981807515


iteration=13600, training loss=0.018101534108282066


iteration=13700, training loss=0.016649721899593715


iteration=13800, training loss=0.01621468555327738


iteration=13900, training loss=0.01938500930205919


iteration=14000, training loss=0.010973504139692523


iteration=14100, training loss=0.012482370805810206


iteration=14200, training loss=0.020017139817064163


iteration=14300, training loss=0.034349256692221386


iteration=14400, training loss=0.026009750380180777


iteration=14500, training loss=0.016063123291532973


iteration=14600, training loss=0.017731243299786002


iteration=14700, training loss=0.021703170516411775


iteration=14800, training loss=0.023382003159495072


iteration=14900, training loss=0.021049112039036116


iteration=15000, training loss=0.026902333309990353


iteration=15100, training loss=0.0400279939500615


iteration=15200, training loss=0.03168815111974254


iteration=15300, training loss=0.01116099199221935


iteration=15400, training loss=0.012029863004863727


iteration=15500, training loss=0.032451396900287366


iteration=15600, training loss=0.025687736253021284


iteration=15700, training loss=0.019196195572149008


iteration=15800, training loss=0.011753193220647518


iteration=15900, training loss=0.018033917599823326


iteration=16000, training loss=0.01535936556669185
'# Validating...'
Evaluation 0/21762


Evaluation 300/21762


Evaluation 600/21762


Evaluation 900/21762


Evaluation 1200/21762


Evaluation 1500/21762


Evaluation 1800/21762


Evaluation 2100/21762


Evaluation 2400/21762


Evaluation 2700/21762


Evaluation 3000/21762


Evaluation 3300/21762


Evaluation 3600/21762


Evaluation 3900/21762


Evaluation 4200/21762


Evaluation 4500/21762


Evaluation 4800/21762


Evaluation 5100/21762


Evaluation 5400/21762


Evaluation 5700/21762


Evaluation 6000/21762


Evaluation 6300/21762


Evaluation 6600/21762


Evaluation 6900/21762


Evaluation 7200/21762


Evaluation 7500/21762


Evaluation 7800/21762


Evaluation 8100/21762


Evaluation 8400/21762


Evaluation 8700/21762


Evaluation 9000/21762


Evaluation 9300/21762


Evaluation 9600/21762


Evaluation 9900/21762


Evaluation 10200/21762


Evaluation 10500/21762


Evaluation 10800/21762


Evaluation 11100/21762


Evaluation 11400/21762


Evaluation 11700/21762


Evaluation 12000/21762


Evaluation 12300/21762


Evaluation 12600/21762


Evaluation 12900/21762


Evaluation 13200/21762


Evaluation 13500/21762


Evaluation 13800/21762


Evaluation 14100/21762


Evaluation 14400/21762


Evaluation 14700/21762


Evaluation 15000/21762


Evaluation 15300/21762


Evaluation 15600/21762


Evaluation 15900/21762


Evaluation 16200/21762


Evaluation 16500/21762


Evaluation 16800/21762


Evaluation 17100/21762


Evaluation 17400/21762


Evaluation 17700/21762


Evaluation 18000/21762


Evaluation 18300/21762


Evaluation 18600/21762


Evaluation 18900/21762


Evaluation 19200/21762


Evaluation 19500/21762


Evaluation 19800/21762


Evaluation 20100/21762


Evaluation 20400/21762


Evaluation 20700/21762


Evaluation 21000/21762


Evaluation 21300/21762


Evaluation 21600/21762


'# Saved models/text2sparql-t5-small-lcquad2/evaluations/dev_result_16000.json'
validation acc=77.92022792022792


iteration=16100, training loss=0.01193765327159781


iteration=16200, training loss=0.008324217274785042


iteration=16300, training loss=0.014461278868257068


iteration=16400, training loss=0.010518124184454792


iteration=16500, training loss=0.014188007708871736


iteration=16600, training loss=0.014196348193800078


iteration=16700, training loss=0.013074350571259856


iteration=16800, training loss=0.011070950418652501


iteration=16900, training loss=0.016525650286348538


iteration=17000, training loss=0.012030173663224559


iteration=17100, training loss=0.014689188578049652


iteration=17200, training loss=0.012740716100961436


iteration=17300, training loss=0.0216739118041005


iteration=17400, training loss=0.01963382113201078


iteration=17500, training loss=0.011876624500146135


iteration=17600, training loss=0.014838150940922787


iteration=17700, training loss=0.044888900635996834


iteration=17800, training loss=0.01671699766360689


iteration=17900, training loss=0.012243973266449756


iteration=18000, training loss=0.01556724679947365


iteration=18100, training loss=0.014973615964991041


iteration=18200, training loss=0.02451908033603104


iteration=18300, training loss=0.021423860291251914


iteration=18400, training loss=0.021321707180468365


iteration=18500, training loss=0.014959254361747299


iteration=18600, training loss=0.01709485085099004


iteration=18700, training loss=0.010481261933746282


iteration=18800, training loss=0.026783581983181648


iteration=18900, training loss=0.011206911240296904


iteration=19000, training loss=0.010493223184603266


iteration=19100, training loss=0.012298255498171784


iteration=19200, training loss=0.006531550422369037


iteration=19300, training loss=0.009858416690985906


iteration=19400, training loss=0.012718421403405955


iteration=19500, training loss=0.01073770678602159


iteration=19600, training loss=0.0051533809823740735


iteration=19700, training loss=0.00783466008520918


iteration=19800, training loss=0.011227096386137418


iteration=19900, training loss=0.007533691846620058


iteration=20000, training loss=0.018195124969352036


iteration=20100, training loss=0.02144956966396421


iteration=20200, training loss=0.014345587427378632


iteration=20300, training loss=0.010421564997523091


iteration=20400, training loss=0.011875393231166527


iteration=20500, training loss=0.0030130602829740384


iteration=20600, training loss=0.016737293678015703


iteration=20700, training loss=0.015252510691061616


iteration=20800, training loss=0.008747603224328486


iteration=20900, training loss=0.007446558859955985


iteration=21000, training loss=0.00938962906730012


iteration=21100, training loss=0.007050451752729714


iteration=21200, training loss=0.011446439155406551


iteration=21300, training loss=0.007832081243832363


iteration=21400, training loss=0.015760464143531862


iteration=21500, training loss=0.011548353010439313


iteration=21600, training loss=0.010488668816688005


iteration=21700, training loss=0.0044497172800038245


iteration=21800, training loss=0.0036485180861200207


iteration=21900, training loss=0.013129704037273768


iteration=22000, training loss=0.007890710095234682


iteration=22100, training loss=0.01584499355085427


iteration=22200, training loss=0.01276922088523861


iteration=22300, training loss=0.006269786341581493


iteration=22400, training loss=0.005167481104726903


iteration=22500, training loss=0.0019485134939168348


iteration=22600, training loss=0.007105796065661707


iteration=22700, training loss=0.006374271776148816


iteration=22800, training loss=0.002816573168747709


iteration=22900, training loss=0.0089566294697579


iteration=23000, training loss=0.008072454308712623


iteration=23100, training loss=0.009565999795595416


iteration=23200, training loss=0.005816492479934823


iteration=23300, training loss=0.0065722160549921685


iteration=23400, training loss=0.01215682912763441


iteration=23500, training loss=0.023103550158266442


iteration=23600, training loss=0.008431106352363714


iteration=23700, training loss=0.005582983728818363


iteration=23800, training loss=0.0031521035970217782


iteration=23900, training loss=0.00726623789727455


iteration=24000, training loss=0.00892301041909377
'# Validating...'
Evaluation 0/21762


Evaluation 300/21762


Evaluation 600/21762


Evaluation 900/21762


Evaluation 1200/21762


Evaluation 1500/21762


Evaluation 1800/21762


Evaluation 2100/21762


Evaluation 2400/21762


Evaluation 2700/21762


Evaluation 3000/21762


Evaluation 3300/21762


Evaluation 3600/21762


Evaluation 3900/21762


Evaluation 4200/21762


Evaluation 4500/21762


Evaluation 4800/21762


Evaluation 5100/21762


Evaluation 5400/21762


Evaluation 5700/21762


Evaluation 6000/21762


Evaluation 6300/21762


Evaluation 6600/21762


Evaluation 6900/21762


Evaluation 7200/21762


Evaluation 7500/21762


Evaluation 7800/21762


Evaluation 8100/21762


Evaluation 8400/21762


Evaluation 8700/21762


Evaluation 9000/21762


Evaluation 9300/21762


Evaluation 9600/21762


Evaluation 9900/21762


Evaluation 10200/21762


Evaluation 10500/21762


Evaluation 10800/21762


Evaluation 11100/21762


Evaluation 11400/21762


Evaluation 11700/21762


Evaluation 12000/21762


Evaluation 12300/21762


Evaluation 12600/21762


Evaluation 12900/21762


Evaluation 13200/21762


Evaluation 13500/21762


Evaluation 13800/21762


Evaluation 14100/21762


Evaluation 14400/21762


Evaluation 14700/21762


Evaluation 15000/21762
