In [None]:
# !pip install datasets

Collecting datasets
  Using cached datasets-3.1.0-py3-none-any.whl (480 kB)
Collecting pyarrow>=15.0.0
  Using cached pyarrow-18.0.0.tar.gz (1.1 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
[?25hCollecting xxhash
  Using cached xxhash-3.5.0-cp39-cp39-macosx_10_9_x86_64.whl (31 kB)
Collecting multiprocess<0.70.17
  Using cached multiprocess-0.70.16-py39-none-any.whl (133 kB)
Collecting dill<0.3.9,>=0.3.0
  Using cached dill-0.3.8-py3-none-any.whl (116 kB)
Collecting requests>=2.32.2
  Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Collecting fsspec[http]<=2024.9.0,>=2023.1.0
  Using cached fsspec-2024.9.0-py3-none-any.whl (179 kB)
Collecting huggingface-hub>=0.23.0
  Using cached huggingface_hub-0.26.2-py3-none-any.whl (447 kB)
Collecting tqdm>=4.66.3
  Using cached tqdm-4.66.6-py3-none-any.whl (78 kB)
Building wheels for collected packages: pyarrow
  Building wheel

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
from datasets import load_dataset
from tokenizers import Tokenizer
from tokenizers.models import WordPiece
from tokenizers import normalizers
from tokenizers.normalizers import NFD, Lowercase, StripAccents
from tokenizers.pre_tokenizers import Whitespace
from tokenizers.trainers import WordPieceTrainer
from tokenizers import decoders
from pathlib import Path
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
import random
import gc

ModuleNotFoundError: No module named 'datasets'

In [None]:
%env TOKENIZERS_PARALLELISM = false

env: TOKENIZERS_PARALLELISM=false


In [None]:
from datasets import load_dataset
import ast

def prepare_dataset(batch_size=32, max_length=128, train_size=10000, val_size=1000, test_size=1000):
    dataset = load_dataset("PedroCJardim/QASports")

    train_dataset = dataset["train"].select(range(min(train_size, len(dataset["train"]))))
    val_dataset = dataset["validation"].select(range(min(val_size, len(dataset["validation"]))))
    test_dataset = dataset["test"].select(range(min(test_size, len(dataset["test"]))))

    train_context = pd.Series(train_dataset["context"])
    val_context = pd.Series(val_dataset["context"])
    # test_context = pd.Series(test_dataset["context"])

    # full_df = pd.concat([train_context,val_context,test_context])
    full_df = pd.concat([train_context,val_context])
    full = list(full_df)
    random.shuffle(full)

    def preprocess_function(examples):
        questions_preprocess = [q.strip() for q in examples["question"] if q]
        answers_preprocess = [ast.literal_eval(a)["text"] for a in examples["answer"] if a]

        df_preprocess = pd.DataFrame(questions_preprocess, columns=['questions'])
        df_preprocess['answers'] = answers_preprocess
        df_preprocess = df_preprocess[df_preprocess["answers"] != ""]
        return df_preprocess

    train_dataset = preprocess_function(train_dataset)
    val_dataset = preprocess_function(val_dataset)
    test_dataset = preprocess_function(test_dataset)
    
    # Reset the index of the DataFrame
    train_dataset = train_dataset.reset_index(drop=True)
    val_dataset = val_dataset.reset_index(drop=True)
    test_dataset = test_dataset.reset_index(drop=True)


    return train_dataset, val_dataset, test_dataset, full

train_dataset, val_dataset, test_dataset, full = prepare_dataset()

In [5]:
train_dataset

Unnamed: 0,questions,answers
0,How many field goals did Kobe Bryant score?,9936
1,Which Lakers forward scored a Finals record 61...,Elgin Baylor
2,Who owns the building?,City of Memphis
3,"Who was the team's president, front office man...",Rick Pitino
4,What college did Marbury commit to?,Georgia Tech
...,...,...
6518,How many points did Beverly score in a win ove...,16
6519,When was the 2017 NBA All-Star Game played?,"February 19, 2017"
6520,"On what date did Westbrook score 20 points, 14...",October 27
6521,Who made a hard box out after a free throw on ...,Plenette Pierson


In [None]:
# train BERT tokenizer on vocabulary from the sportsQA dataset "context" column
bert_tokenizer = Tokenizer(WordPiece(unk_token="<unk>"))
bert_tokenizer.normalizer = normalizers.Sequence([Lowercase()])
bert_tokenizer.pre_tokenizer = Whitespace()
bert_tokenizer.decoder = decoders.WordPiece()
trainer = WordPieceTrainer(special_tokens=["<unk>","<pad>","<bos>","<eos>"])
bert_tokenizer.train_from_iterator(full,trainer)
bert_tokenizer.enable_padding(
    pad_id=bert_tokenizer.token_to_id('<pad>'),
    length=128,
    pad_token='<pad>'
)
bert_tokenizer.enable_truncation(128)

base = Path('tokenizer',)
base.mkdir(exist_ok=True,parents=True)
bert_tokenizer.save(str(base / 'sportsQA_context.json'))






In [None]:
{train_dataset.loc[1,'questions']}

In [7]:
x = bert_tokenizer.encode(f"<bos>{train_dataset.loc[1,'questions']}<eos>")
for a,b in zip(x.ids, x.tokens):
    if b!= '<pad>':
        print(f'{a} : {b}')
    
print('\n',bert_tokenizer.decode(x.ids),'\n\n')

x = bert_tokenizer.encode(f"<bos>{train_dataset.loc[1,'answers']}<eos>")
for a,b in zip(x.ids, x.tokens):
    if b!= '<pad>':
        print(f'{a} : {b}')
    
print('\n',bert_tokenizer.decode(x.ids))

2 : <bos>
1217 : which
793 : lakers
973 : forward
1385 : scored
40 : a
756 : finals
773 : record
1380 : 61
697 : points
33 : ?
3 : <eos>

 which lakers forward scored a finals record 61 points? 


2 : <bos>
5708 : elgin
2413 : baylor
3 : <eos>

 elgin baylor


In [8]:
class Dataset:
    def __init__(self,df):
        self.df = df
    def __len__(self,):
        return len(self.df)
    def __getitem__(self,idx):
        sample = self.df.iloc[idx,:]
        en,lang2 = sample['questions'], sample['answers']
        start_token = "<bos>"
        qn = bert_tokenizer.encode(f'<bos>{en.strip()}<eos>').ids
        ans = bert_tokenizer.encode(f'<bos>{lang2.strip()}<eos>').ids
        ans_shift = ans.copy()
        ans_shift[:-1] = ans[1:]
        ans_shift[-1] = bert_tokenizer.token_to_id('<pad>')
        
        qn = torch.tensor(qn,dtype=torch.long)
        ans = torch.tensor(ans,dtype=torch.long)
        ans_shift = torch.tensor(ans_shift,dtype=torch.long)
        ans_shift[ans_shift==1]=-100
        return qn,ans,ans_shift

In [9]:
train_ds = Dataset(train_dataset)
val_ds = Dataset(val_dataset)

In [10]:
# https://github.com/bzhangGo/rmsnorm/blob/master/rmsnorm_torch.py
class RMSNorm(nn.Module):
    def __init__(self, d, p=-1., eps=1e-8, bias=False):
        """
            Root Mean Square Layer Normalization
        :param d: model size
        :param p: partial RMSNorm, valid value [0, 1], default -1.0 (disabled)
        :param eps:  epsilon value, default 1e-8
        :param bias: whether use bias term for RMSNorm, disabled by
            default because RMSNorm doesn't enforce re-centering invariance.
        """
        super(RMSNorm, self).__init__()

        self.eps = eps
        self.d = d
        self.p = p
        self.bias = bias

        self.scale = nn.Parameter(torch.ones(d))
        self.register_parameter("scale", self.scale)

        if self.bias:
            self.offset = nn.Parameter(torch.zeros(d))
            self.register_parameter("offset", self.offset)

    def forward(self, x):
        if self.p < 0. or self.p > 1.:
            norm_x = x.norm(2, dim=-1, keepdim=True)
            d_x = self.d
        else:
            partial_size = int(self.d * self.p)
            partial_x, _ = torch.split(x, [partial_size, self.d - partial_size], dim=-1)

            norm_x = partial_x.norm(2, dim=-1, keepdim=True)
            d_x = partial_size

        rms_x = norm_x * d_x ** (-1. / 2)
        x_normed = x / (rms_x + self.eps)

        if self.bias:
            return self.scale * x_normed + self.offset

        return self.scale * x_normed


class MultiheadAttention(nn.Module):
    def __init__(self, dim, n_heads, dropout=0.):
        super().__init__()
        self.dim = dim
        self.n_heads = n_heads
        assert dim % n_heads == 0, 'dim should be div by n_heads'
        self.head_dim = self.dim // self.n_heads
        self.q = nn.Linear(dim,dim,bias=False)
        self.k = nn.Linear(dim,dim,bias=False)
        self.v = nn.Linear(dim,dim,bias=False)
        self.attn_dropout = nn.Dropout(dropout)
        self.scale = self.head_dim ** -0.5
        self.out_proj = nn.Linear(dim,dim,bias=False)
        
    def forward(self,q,k,v,mask=None):
        batch,t,c = q.shape
        q = self.q(q)
        k = self.k(k)
        v = self.v(v)
        q = q.view(batch,q.size(1),self.n_heads,self.head_dim).permute(0,2,1,3)
        k = k.view(batch,k.size(1),self.n_heads,self.head_dim).permute(0,2,1,3)
        v = v.view(batch,v.size(1),self.n_heads,self.head_dim).permute(0,2,1,3)
        
        qkT = torch.matmul(q,k.transpose(-1,-2)) * self.scale
        qkT = self.attn_dropout(qkT)
        
        if mask is not None:
            mask = mask.to(dtype=qkT.dtype,device=qkT.device)
            a,b = qkT.size(-2), qkT.size(-1)
            qkT = qkT.masked_fill(mask[:,:,:a,:b]==0,float('-inf'))
            
        qkT = F.softmax(qkT,dim=-1)
            
        attn = torch.matmul(qkT,v)
        attn = attn.permute(0,2,1,3).contiguous().view(batch,t,c)
        out = self.out_proj(attn)
        return out
    


class FeedForward(nn.Module):
    def __init__(self,dim,dropout=0.):
        super().__init__()
        self.feed_forward = nn.Sequential(
            nn.Linear(dim,dim*4,bias=False),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(dim*4,dim,bias=False)
        )
        
    def forward(self, x):
        return self.feed_forward(x)
    


class EncoderBlock(nn.Module):
    def __init__(self, dim, n_heads, attn_dropout=0., mlp_dropout=0.):
        super().__init__()
        self.attn = MultiheadAttention(dim,n_heads,attn_dropout)
        self.ffd = FeedForward(dim,mlp_dropout)
        self.ln_1 = RMSNorm(dim)
        self.ln_2 = RMSNorm(dim)
        
    def forward(self,x,mask=None):
        x = self.ln_1(x)
        x = x + self.attn(x,x,x,mask)
        x = self.ln_2(x)
        x = x + self.ffd(x)
        return x
    


class DecoderBlock(nn.Module):
    def __init__(self, dim, n_heads, attn_dropout=0., mlp_dropout=0.):
        super().__init__()
        self.self_attn = MultiheadAttention(dim,n_heads,attn_dropout)
        self.cross_attn = MultiheadAttention(dim,n_heads,attn_dropout)
        self.ln_1 = RMSNorm(dim)
        self.ln_2 = RMSNorm(dim)
        self.ln_3 = RMSNorm(dim)
        self.ffd = FeedForward(dim,mlp_dropout)
        
    def forward(self, x, enc_out, src_mask, tgt_mask):
        x = self.ln_1(x)
        x = x + self.self_attn(x,x,x,tgt_mask)
        x = self.ln_2(x)
        x = x + self.cross_attn(x,enc_out,enc_out,src_mask) # decoder: q, encoder: k,v
        x = self.ln_3(x)
        x = x + self.ffd(x)
        
        return x
    


class Embedding(nn.Module):
    def __init__(self,vocab_size,max_len,dim):
        super().__init__()
        self.max_len = max_len
        self.class_embedding = nn.Embedding(vocab_size,dim)
        self.pos_embedding = nn.Embedding(max_len,dim)
    def forward(self,x):
        x = self.class_embedding(x)
        pos = torch.arange(0,x.size(1),device=x.device)
        x = x + self.pos_embedding(pos)
        return x
    


class Seq2SeqTransformer(nn.Module):
    def __init__(self, config):
        
        super().__init__()
        
        self.embedding = Embedding(config['vocab_size'],config['max_len'],config['dim'])
        
        self.depth = config['depth']
        self.encoders = nn.ModuleList([
            EncoderBlock(
                dim=config['dim'],
                n_heads=config['n_heads'],
                attn_dropout=config['attn_dropout'],
                mlp_dropout=config['mlp_dropout']
            ) for _ in range(self.depth)
        ])
        self.decoders = nn.ModuleList([
            DecoderBlock(
                dim=config['dim'],
                n_heads=config['n_heads'],
                attn_dropout=config['attn_dropout'],
                mlp_dropout=config['mlp_dropout']
            ) for _ in range(self.depth)
        ])
        
        self.ln_f = RMSNorm(config['dim'])
        self.lm_head = nn.Linear(config['dim'],config['vocab_size'],bias=False)
        
        self.embedding.class_embedding.weight = self.lm_head.weight
        
        self.pad_token_id = config['pad_token_id']
        self.register_buffer('tgt_mask',torch.tril(torch.ones(1,1,config['max_len'],config['max_len'])))
    
        self.apply(self._init_weights)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
    
    def create_src_mask(self,src):
        return (src != self.pad_token_id).unsqueeze(1).unsqueeze(2) # N, 1, 1, src_len
    
    def forward(self, src, tgt, labels=None):
        
        src_mask = self.create_src_mask(src)
        
        enc_out = self.embedding(src)
        dec_out = self.embedding(tgt)
        
        for i in range(self.depth):
            enc_out = self.encoders[i](enc_out,mask=src_mask)
            dec_out = self.decoders[i](dec_out,enc_out,src_mask=src_mask,tgt_mask=self.tgt_mask)
            
        dec_out = self.ln_f(dec_out)
        
        if labels is not None:
            lm_logits = self.lm_head(dec_out)
            loss = F.cross_entropy(lm_logits.view(-1, lm_logits.shape[-1]), labels.view(-1))
            return loss
        
        lm_logits = self.lm_head(dec_out[:,[-1],:])
        return lm_logits
    
    def generate(self,src,max_tokens=80,temperature=1.0,deterministic=False,eos=5,bos=None):
        tgt = torch.ones(1,1).long() * bos
        tgt = tgt.to(src.device)
        for _ in range(max_tokens):
            out = self(src,tgt)
            out = out[:,-1,:] / temperature
            probs = F.softmax(out,dim=-1)
            if deterministic:
                next_token = torch.argmax(probs,dim=-1,keepdim=True)
            else:
                next_token = torch.multinomial(probs,num_samples=1)
            tgt = torch.cat([tgt,next_token],dim=1)
            if next_token.item() == eos:
                break
            
        return tgt.cpu().flatten()

In [11]:
config = {
    'dim': 768,
    'n_heads': 12,
    'attn_dropout': 0.1,
    'mlp_dropout': 0.1,
    'depth': 6,
    'vocab_size': bert_tokenizer.get_vocab_size(),  # Set to tokenizer vocabulary size
    'max_len': 128,
    'pad_token_id': bert_tokenizer.token_to_id('<pad>')
}

In [12]:
model = Seq2SeqTransformer(config).to('cuda')
print(sum([p.numel() for p in model.parameters() if p.requires_grad]))

122252544


In [13]:
print(model)

Seq2SeqTransformer(
  (embedding): Embedding(
    (class_embedding): Embedding(30000, 768)
    (pos_embedding): Embedding(128, 768)
  )
  (encoders): ModuleList(
    (0-5): 6 x EncoderBlock(
      (attn): MultiheadAttention(
        (q): Linear(in_features=768, out_features=768, bias=False)
        (k): Linear(in_features=768, out_features=768, bias=False)
        (v): Linear(in_features=768, out_features=768, bias=False)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (out_proj): Linear(in_features=768, out_features=768, bias=False)
      )
      (ffd): FeedForward(
        (feed_forward): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=False)
          (1): Dropout(p=0.1, inplace=False)
          (2): GELU(approximate='none')
          (3): Linear(in_features=3072, out_features=768, bias=False)
        )
      )
      (ln_1): RMSNorm()
      (ln_2): RMSNorm()
    )
  )
  (decoders): ModuleList(
    (0-5): 6 x DecoderBlock(
      (self_attn): M

In [14]:
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=128,shuffle=True,pin_memory=True,num_workers=2)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=128,shuffle=False,pin_memory=True,num_workers=2)
print(len(train_dl), len(val_dl))


51 6


In [15]:
test_dataset.head()

Unnamed: 0,questions,answers
0,How many.429.000.875 3.7 2002 Minnesota 3 1?,14.3
1,Who punched Kent Benson?,Abdul-Jabbar
2,How many regular-season games did the Huskies ...,126
3,In what year did Mitchell pledge her allegianc...,2013
4,When did the NBL merge with the BAA?,"August 3, 1949"


In [16]:
test_samples = [(test_dataset.loc[i,'questions'],test_dataset.loc[i,'answers']) for i in range(len(test_dataset))]


In [17]:
epochs = 32
train_losses = []
valid_losses = []
best_val_loss = 1e9

all_tl = []
all_lr = []

optim = torch.optim.Adam(model.parameters(),lr=1e-4)
sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optim,
    T_0=250,
    eta_min=1e-8
)

scaler = GradScaler()

  scaler = GradScaler()


In [18]:
for ep in tqdm(range(epochs)):
    model.train()
    trl = 0.
    tprog = tqdm(enumerate(train_dl),total=len(train_dl))
    for i, batch in tprog:
        with autocast():
            src, tgt, labels = [b.to('cuda') for b in batch]
            loss = model(src,tgt,labels)
            scaler.scale(loss).backward()
            scaler.unscale_(optim)
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0, norm_type=2)
            scaler.step(optim)
            scaler.update()
            optim.zero_grad()
            sched.step(ep + i / len(train_dl))
            all_lr.append(sched.get_last_lr())
            trl += loss.item()
            all_tl.append(loss.item())
            tprog.set_description(f'train step loss: {loss.item():.4f}')
    train_losses.append(trl/len(train_dl))
    
    gc.collect()
    torch.cuda.empty_cache()
        
    model.eval()
    with torch.no_grad():
        vrl = 0.
        vprog = tqdm(enumerate(val_dl),total=len(val_dl))
        for i, batch in vprog:
            with autocast():
                src, tgt, labels = [b.to('cuda') for b in batch]
                loss = model(src,tgt,labels)
                vrl += loss.item()
                vprog.set_description(f'valid step loss: {loss.item():.4f}')
        vloss = vrl/len(val_dl)
        valid_losses.append(vloss)
        print(f'epoch {ep} | train_loss: {train_losses[-1]:.4f} valid_loss: {valid_losses[-1]:.4f}')
        
        if vloss < best_val_loss:
            best_val_loss = vloss
            
            print('saving best model...')
            sd = model.state_dict()
            torch.save(sd,'./kaggle_checkpoint/best_model.pt')
            
        print('saving epoch checkpoint...')
        sd = model.state_dict()
        torch.save(sd,f'./kaggle_checkpoint/checkpoint_model_epoch{ep}.pt')
        # TODO: finish fixing this part of the code (tokenizing stuff)
        print('predicting with current epoch model...')
        for (src,tgt) in random.choices(test_samples,k=5):
            input_ids = bert_tokenizer.encode(f"<bos>{src}<eos>").ids
            input_ids = torch.tensor(input_ids,dtype=torch.long).unsqueeze(0).to('cuda')

            bos = bert_tokenizer.token_to_id('<bos>')
            
            tgt_out = model.generate(input_ids,bos=bos,deterministic=True)
            tgt_out = bert_tokenizer.decode(tgt_out.numpy())
            print(f'\nQuestion: {src} \nAnswer: {tgt} \nModelAnswer: {tgt_out}\n')

        # for (src,tgt,lang_id) in random.choices(test_samples,k=5):
        #     input_ids = bert_tokenizer.encode(f"<s-en>{src}</s>").ids
        #     input_ids = torch.tensor(input_ids,dtype=torch.long).unsqueeze(0).to('cuda')
        #     if lang_id == 'hi':
        #         bos = bert_tokenizer.token_to_id('<s-hi>')
        #     else:
        #         bos = bert_tokenizer.token_to_id('<s-te>')
        #     tgt_out = model.generate(input_ids,bos=bos,deterministic=True)
        #     tgt_out = bert_tokenizer.decode(tgt_out.numpy())
        #     print(f'\nEN: {src} \n{lang_id.upper()}: {tgt} \n{lang_id.upper()}_MODEL: {tgt_out}\n')

        print('-'*30,'\n\n')
    
    gc.collect()
    torch.cuda.empty_cache()

  0%|          | 0/32 [00:00<?, ?it/s]
  with autocast():

train step loss: 10.5885:   0%|          | 0/51 [00:01<?, ?it/s][A
train step loss: 10.5885:   2%|▏         | 1/51 [00:01<00:57,  1.16s/it][A
train step loss: 8.1371:   2%|▏         | 1/51 [00:01<00:57,  1.16s/it] [A
train step loss: 8.1371:   4%|▍         | 2/51 [00:01<00:34,  1.42it/s][A
train step loss: 8.0859:   4%|▍         | 2/51 [00:01<00:34,  1.42it/s][A
train step loss: 8.0859:   6%|▌         | 3/51 [00:01<00:26,  1.81it/s][A
train step loss: 8.0827:   6%|▌         | 3/51 [00:02<00:26,  1.81it/s][A
train step loss: 8.0827:   8%|▊         | 4/51 [00:02<00:22,  2.05it/s][A
train step loss: 7.7721:   8%|▊         | 4/51 [00:02<00:22,  2.05it/s][A
train step loss: 7.7721:  10%|▉         | 5/51 [00:02<00:20,  2.22it/s][A
train step loss: 7.7856:  10%|▉         | 5/51 [00:03<00:20,  2.22it/s][A
train step loss: 7.7856:  12%|█▏        | 6/51 [00:03<00:19,  2.33it/s][A
train step loss: 7.8641:  12%|█▏        | 6/51

epoch 0 | train_loss: 6.5329 valid_loss: 5.2362
saving best model...
saving epoch checkpoint...
predicting with current epoch model...

Question: Where is the arena located? 
Answer: Des Moines, Iowa 
ModelAnswer: the the the the the the the the the the the the the the the,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


Question: Who voted Brunson to be the best passer in high school basketball prior to his senior season? 
Answer: peers 
ModelAnswer: the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the


Question: How many games did the home team lose in the first 5 games of the series? 
Answer: each 
ModelAnswer: three


Question: For what sports network was Elliott a color commentator? 
Answer: 6-7 
ModelAns

  3%|▎         | 1/32 [00:28<14:37, 28.32s/it]


Question: On what date was NBA 2K23 Arcade Edition released? 
Answer: October 18, 2022 
ModelAnswer: february february, 2008,,, and the the the the,,, 2008,,,, 2008,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 4.8370:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 4.8370:   2%|▏         | 1/51 [00:00<00:21,  2.27it/s][A
train step loss: 5.3000:   2%|▏         | 1/51 [00:00<00:21,  2.27it/s][A
train step loss: 5.3000:   4%|▍         | 2/51 [00:00<00:19,  2.45it/s][A
train step loss: 4.9930:   4%|▍         | 2/51 [00:01<00:19,  2.45it/s][A
train step loss: 4.9930:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 5.5041:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 5.5041:   8%|▊         | 4/51 [00:01<00:18,  2.54it/s][A
train step loss: 4.9880:   8%|▊         | 4/51 [00:01<00:18,  2.54it/s][A
train step loss: 4.9880:  10%|▉         | 5/51 [00:01<00:17,  2.56it/s][A
train step loss: 4.9646:  10%|▉         | 5/51 [00:02<00:17,  2.56it/s][A
train step loss: 4.9646:  12%|█▏        | 6/51 [00:02<00:17,  2.57it/s][A
train step loss: 4.9291:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 1 | train_loss: 4.8480 valid_loss: 4.6446
saving best model...
saving epoch checkpoint...
predicting with current epoch model...

Question: When did The Dream trade Tamera Young to the Chicago Sky? 
Answer: August 12, 2009 
ModelAnswer: february 23, 2009


Question: How many 60-win teams had New York's three big men won? 
Answer: two 
ModelAnswer: three


Question: How long was the NBA's Most Valuable Player drought? 
Answer: 47-year 
ModelAnswer: three


Question: How many overtimes did the game go into? 
Answer: three 
ModelAnswer: three



  6%|▋         | 2/32 [00:55<13:41, 27.40s/it]


Question: Who played for Boston Celtics from 1950 to 1963? 
Answer: Cousy, BobBob Cousy 
ModelAnswer: the celtics

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 4.4139:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 4.4139:   2%|▏         | 1/51 [00:00<00:21,  2.35it/s][A
train step loss: 4.1890:   2%|▏         | 1/51 [00:00<00:21,  2.35it/s][A
train step loss: 4.1890:   4%|▍         | 2/51 [00:00<00:19,  2.48it/s][A
train step loss: 4.1716:   4%|▍         | 2/51 [00:01<00:19,  2.48it/s][A
train step loss: 4.1716:   6%|▌         | 3/51 [00:01<00:19,  2.52it/s][A
train step loss: 4.3745:   6%|▌         | 3/51 [00:01<00:19,  2.52it/s][A
train step loss: 4.3745:   8%|▊         | 4/51 [00:01<00:18,  2.54it/s][A
train step loss: 3.9253:   8%|▊         | 4/51 [00:01<00:18,  2.54it/s][A
train step loss: 3.9253:  10%|▉         | 5/51 [00:01<00:18,  2.55it/s][A
train step loss: 4.3751:  10%|▉         | 5/51 [00:02<00:18,  2.55it/s][A
train step loss: 4.3751:  12%|█▏        | 6/51 [00:02<00:17,  2.56it/s][A
train step loss: 4.3200:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 2 | train_loss: 4.1359 valid_loss: 4.4651
saving best model...
saving epoch checkpoint...
predicting with current epoch model...

Question: How long had Billups been an unrestricted free agent? 
Answer: several days 
ModelAnswer: four


Question: What is the name of the school at the University of New Hampshire? 
Answer: New Hampshire Wildcats 
ModelAnswer: the nba nba nba nba


Question: What team did Bynum play against on April 12? 
Answer: San Antonio Spurs 
ModelAnswer: philadelphia 76ers


Question: In what year did he place third in the NBA Rookie of the Year voting? 
Answer: 2007-08 
ModelAnswer: 2008



  9%|▉         | 3/32 [01:22<13:09, 27.22s/it]


Question: How many players remained with the team by January 1, 2014? 
Answer: six 
ModelAnswer: four

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 3.6179:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 3.6179:   2%|▏         | 1/51 [00:00<00:22,  2.25it/s][A
train step loss: 3.8532:   2%|▏         | 1/51 [00:00<00:22,  2.25it/s][A
train step loss: 3.8532:   4%|▍         | 2/51 [00:00<00:20,  2.43it/s][A
train step loss: 3.6214:   4%|▍         | 2/51 [00:01<00:20,  2.43it/s][A
train step loss: 3.6214:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 3.4905:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 3.4905:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 3.8470:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 3.8470:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 3.5288:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 3.5288:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 3.7539:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 3 | train_loss: 3.5660 valid_loss: 4.3550
saving best model...
saving epoch checkpoint...
predicting with current epoch model...

Question: How many years was Shailee Lehening a letterwinner and starter at Sublette High School? 
Answer: four 
ModelAnswer: three


Question: What team did Jerry West play for after Hot Rod Hundley's graduation? 
Answer: Mountaineers 
ModelAnswer: new york knicks


Question: Who was Hubie Brown in 1976-1981? 
Answer: Mike Fratello 
ModelAnswer: the winner


Question: How many consecutive games did he score 30 or more points in? 
Answer: three 
ModelAnswer: three



 12%|█▎        | 4/32 [01:49<12:40, 27.17s/it]


Question: Jordan is a regular recipient of what strategy? 
Answer: Hack-a-Shaq 
ModelAnswer: the nba

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 3.1093:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 3.1093:   2%|▏         | 1/51 [00:00<00:21,  2.30it/s][A
train step loss: 3.1239:   2%|▏         | 1/51 [00:00<00:21,  2.30it/s][A
train step loss: 3.1239:   4%|▍         | 2/51 [00:00<00:19,  2.45it/s][A
train step loss: 2.9908:   4%|▍         | 2/51 [00:01<00:19,  2.45it/s][A
train step loss: 2.9908:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 3.1236:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 3.1236:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 3.1836:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 3.1836:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 3.0900:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 3.0900:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 2.8995:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 4 | train_loss: 3.0528 valid_loss: 4.3605
saving epoch checkpoint...
predicting with current epoch model...

Question: How many NBA teams did he play for in the 2004 NBA season? 
Answer: six 
ModelAnswer: four


Question: What was the former name of the Albany Sharp Shooters? 
Answer: SouthGA Blues 
ModelAnswer: the year


Question: What format was the first time the Finals were played? 
Answer: best-of-five 
ModelAnswer: $ 10 million


Question: How many points did Gay average on his freshman year at Connecticut? 
Answer: 11.8 
ModelAnswer: 14



 16%|█▌        | 5/32 [02:15<12:07, 26.94s/it]


Question: What team acquired Derek Harper in a midseason trade? 
Answer: Dallas Mavericks 
ModelAnswer: the celtics

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 2.6384:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 2.6384:   2%|▏         | 1/51 [00:00<00:21,  2.30it/s][A
train step loss: 2.6291:   2%|▏         | 1/51 [00:00<00:21,  2.30it/s][A
train step loss: 2.6291:   4%|▍         | 2/51 [00:00<00:20,  2.45it/s][A
train step loss: 2.3963:   4%|▍         | 2/51 [00:01<00:20,  2.45it/s][A
train step loss: 2.3963:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 2.4179:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 2.4179:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 2.5703:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 2.5703:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 2.5718:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 2.5718:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 2.5351:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 5 | train_loss: 2.5559 valid_loss: 4.4374
saving epoch checkpoint...
predicting with current epoch model...

Question: Who is the Western Conference champion? 
Answer: Utah Jazz 
ModelAnswer: los angeles


Question: Who is the best basketball player I've ever seen? 
Answer: Michael Jeffrey Jordan 
ModelAnswer: the lakers


Question: How many points did he score in the second half of the Wizards' 108-97 win over the Bucks? 
Answer: 19 of his career-high 21 
ModelAnswer: 12


Question: On what date did the season begin? 
Answer: October 27, 2016 
ModelAnswer: february 26



 19%|█▉        | 6/32 [02:42<11:35, 26.74s/it]


Question: How many points did the Thunder score in the Miami Heat 121? 
Answer: 106 
ModelAnswer: 16

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 2.1433:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 2.1433:   2%|▏         | 1/51 [00:00<00:21,  2.33it/s][A
train step loss: 2.0733:   2%|▏         | 1/51 [00:00<00:21,  2.33it/s][A
train step loss: 2.0733:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 1.9369:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 1.9369:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 2.1632:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 2.1632:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 1.9698:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 1.9698:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 2.0061:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 2.0061:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 2.0968:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 6 | train_loss: 2.0641 valid_loss: 4.5091
saving epoch checkpoint...
predicting with current epoch model...

Question: Who was the head coach of the Shock? 
Answer: Laimbeer 
ModelAnswer: steve nash


Question: What is the first time the NBA gives out a Most Improved Player award at the end of a season? 
Answer: This season 
ModelAnswer: february 14, 2017


Question: How did the Warriors and Celtics tie their regular season games? 
Answer: 1-1 
ModelAnswer: three


Question: What team defeated the Kansas Jayhawks on March 18, 2008? 
Answer: Memphis Tigers 
ModelAnswer: the celtics



 22%|██▏       | 7/32 [03:08<11:05, 26.62s/it]


Question: When was Lawler honored with a star on the Hollywood Walk of Fame? 
Answer: March 3, 2016 
ModelAnswer: february 23, 2017

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 1.6488:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 1.6488:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 1.5566:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 1.5566:   4%|▍         | 2/51 [00:00<00:20,  2.45it/s][A
train step loss: 1.6358:   4%|▍         | 2/51 [00:01<00:20,  2.45it/s][A
train step loss: 1.6358:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 1.5796:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 1.5796:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 1.5851:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 1.5851:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 1.6165:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 1.6165:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 1.5101:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 7 | train_loss: 1.5786 valid_loss: 4.6025
saving epoch checkpoint...
predicting with current epoch model...

Question: What do Harrell's younger brothers call themselves? 
Answer: Cadarius and Quatauis 
ModelAnswer: five - star game


Question: How many points did the Thunder score in the Miami Heat 121? 
Answer: 106 
ModelAnswer: 25


Question: What is the name of the PBA Press Corps Coach of the Year? 
Answer: Perry Ronquillo 
ModelAnswer: the game


Question: What was the original name of the Albuquerque Thunderbird? 
Answer: Huntsville Flight 
ModelAnswer: the nba all - star game



 25%|██▌       | 8/32 [03:34<10:38, 26.59s/it]


Question: How many rebounds did Holiday have for the West? 
Answer: five 
ModelAnswer: 5. 3

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 1.0563:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 1.0563:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 1.1814:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 1.1814:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 1.1069:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 1.1069:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 1.0974:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 1.0974:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 1.1642:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 1.1642:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 1.1394:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 1.1394:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 1.0611:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 8 | train_loss: 1.1377 valid_loss: 4.6019
saving epoch checkpoint...
predicting with current epoch model...

Question: How many big 3-pointers did Cole make in Game 4 of the 2012 NBA Finals? 
Answer: two 
ModelAnswer: six


Question: Who punched Kent Benson? 
Answer: Abdul-Jabbar 
ModelAnswer: the winner


Question: What is the seating capacity in the expanded arena? 
Answer: 12,666 
ModelAnswer: 5, 500, and 1, and 10, and 1, and 10, and the year


Question: How many games did the Heat have in the last 25 games? 
Answer: 11-14 
ModelAnswer: six



 28%|██▊       | 9/32 [04:01<10:10, 26.53s/it]


Question: Who is the owner of G Billy Gabor Dan Gadzuric Deng Gai Elmer Gainer? 
Answer: Bill Gaines 
ModelAnswer: ray allen

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.8126:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.8126:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.8295:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.8295:   4%|▍         | 2/51 [00:00<00:19,  2.45it/s][A
train step loss: 0.7523:   4%|▍         | 2/51 [00:01<00:19,  2.45it/s][A
train step loss: 0.7523:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.7499:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.7499:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.7561:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.7561:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.8035:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.8035:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.7131:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 9 | train_loss: 0.7597 valid_loss: 4.7331
saving epoch checkpoint...
predicting with current epoch model...

Question: On what date did Giannis suffer an injury to his left knee? 
Answer: June 29, 2021 
ModelAnswer: february 21, 2013


Question: How many assists did Dwyane Wade surpass? 
Answer: 2,867 
ModelAnswer: 15


Question: How many points and 21 rebounds was he in a 113-102 loss to the Phoenix Suns? 
Answer: 29 
ModelAnswer: 17


Question: Who was the winningest coach in BC history? 
Answer: Skinner 
ModelAnswer: james harden



 31%|███▏      | 10/32 [04:27<09:43, 26.54s/it]


Question: When was James elected the first vice president of the NBPA? 
Answer: February 13, 2015 
ModelAnswer: january 24, 2010

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.4658:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.4658:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.4426:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.4426:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 0.4790:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 0.4790:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.4766:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.4766:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.4917:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.4917:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.4406:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.4406:  12%|█▏        | 6/51 [00:02<00:17,  2.54it/s][A
train step loss: 0.4844:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 10 | train_loss: 0.4586 valid_loss: 4.8257
saving epoch checkpoint...
predicting with current epoch model...

Question: What team claimed the first seed in the Western Conference? 
Answer: The Lakers 
ModelAnswer: the lakers


Question: What team drafted Florida State forward Jonathan Isaac? 
Answer: Orlando 
ModelAnswer: philadelphia 76ers


Question: Who was the most improved player in the Portland Trail Blazers? 
Answer: Mahmoud Abdul-Rauf 
ModelAnswer: alvin robertson


Question: How many rounds are there in the Play-Off System? 
Answer: three 
ModelAnswer: 16



 34%|███▍      | 11/32 [04:54<09:18, 26.62s/it]


Question: What league was Parsons willing to play in? 
Answer: G League 
ModelAnswer: nba

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.2243:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.2243:   2%|▏         | 1/51 [00:00<00:22,  2.26it/s][A
train step loss: 0.2625:   2%|▏         | 1/51 [00:00<00:22,  2.26it/s][A
train step loss: 0.2625:   4%|▍         | 2/51 [00:00<00:20,  2.43it/s][A
train step loss: 0.3075:   4%|▍         | 2/51 [00:01<00:20,  2.43it/s][A
train step loss: 0.3075:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.2722:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.2722:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.2663:   8%|▊         | 4/51 [00:02<00:18,  2.52it/s][A
train step loss: 0.2663:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.2638:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.2638:  12%|█▏        | 6/51 [00:02<00:17,  2.54it/s][A
train step loss: 0.2407:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 11 | train_loss: 0.2614 valid_loss: 4.9416
saving epoch checkpoint...
predicting with current epoch model...

Question: Who was the first NBA player drafted directly out of high school in 20 years? 
Answer: Garnett 
ModelAnswer: indiana pacers


Question: Where are the final three rounds held? 
Answer: St. Joseph Civic Arena 
ModelAnswer: san antonio, tx, and bill sharman and volleyball and the north carolina state warriors, the north carolina carolina carolina state university, the los angeles, the north carolina carolina


Question: What was the nickname of the Cougars? 
Answer: Phi Slama Jama 
ModelAnswer: 26 56. 317


Question: Who held the previous record of 442 points in March 1983? 
Answer: Isiah Thomas 
ModelAnswer: the lakers



 38%|███▊      | 12/32 [05:21<08:53, 26.66s/it]


Question: What did Bradley block several times? 
Answer: shots 
ModelAnswer: a player of the - 74 53 31. 631 and a a a & & & m m m m a a 2 2 2 2 2 2 2 - 6 2017 - 2017 - two

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.1593:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.1593:   2%|▏         | 1/51 [00:00<00:22,  2.25it/s][A
train step loss: 0.1381:   2%|▏         | 1/51 [00:00<00:22,  2.25it/s][A
train step loss: 0.1381:   4%|▍         | 2/51 [00:00<00:20,  2.42it/s][A
train step loss: 0.1999:   4%|▍         | 2/51 [00:01<00:20,  2.42it/s][A
train step loss: 0.1999:   6%|▌         | 3/51 [00:01<00:19,  2.48it/s][A
train step loss: 0.1461:   6%|▌         | 3/51 [00:01<00:19,  2.48it/s][A
train step loss: 0.1461:   8%|▊         | 4/51 [00:01<00:18,  2.50it/s][A
train step loss: 0.1433:   8%|▊         | 4/51 [00:02<00:18,  2.50it/s][A
train step loss: 0.1433:  10%|▉         | 5/51 [00:02<00:18,  2.52it/s][A
train step loss: 0.1559:  10%|▉         | 5/51 [00:02<00:18,  2.52it/s][A
train step loss: 0.1559:  12%|█▏        | 6/51 [00:02<00:17,  2.54it/s][A
train step loss: 0.1657:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 12 | train_loss: 0.1587 valid_loss: 4.8729
saving epoch checkpoint...
predicting with current epoch model...

Question: How many conference champions are there? 
Answer: four 
ModelAnswer: 1


Question: What city had been idle for five days prior to its start? 
Answer: Minneapolis 
ModelAnswer: milwaukee bucks


Question: Who played for Boston Celtics from 1950 to 1963? 
Answer: Cousy, BobBob Cousy 
ModelAnswer: walter brown


Question: What was the Lakers' record in the 2008-09 season? 
Answer: 65-17 
ModelAnswer: 15 - 11



 41%|████      | 13/32 [05:47<08:25, 26.59s/it]


Question: What was the name of Barkley's daughter named after? 
Answer: Christiana Mall 
ModelAnswer: cheyenne

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.1050:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.1050:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 0.0988:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 0.0988:   4%|▍         | 2/51 [00:00<00:20,  2.45it/s][A
train step loss: 0.0956:   4%|▍         | 2/51 [00:01<00:20,  2.45it/s][A
train step loss: 0.0956:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.1049:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.1049:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.1043:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.1043:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.1189:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.1189:  12%|█▏        | 6/51 [00:02<00:17,  2.54it/s][A
train step loss: 0.1028:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 13 | train_loss: 0.1138 valid_loss: 4.9375
saving epoch checkpoint...
predicting with current epoch model...

Question: How much of a percentage did Ball shoot from three? 
Answer: 39.2 
ModelAnswer: 7. 5 % - 2. 5 % % % - 2. 5 % %


Question: Who was the winningest coach in BC history? 
Answer: Skinner 
ModelAnswer: james harden - 1973 - 74 53 31. 631


Question: What format was the first time the Finals were played? 
Answer: best-of-five 
ModelAnswer: 1985


Question: What was the record of the Dallas cowboys in 1989? 
Answer: 1-15 
ModelAnswer: 26 - 24, 9 - 34



 44%|████▍     | 14/32 [06:14<07:58, 26.59s/it]


Question: How many minutes per game did Harkless average during the 2014-15 season? 
Answer: 15 
ModelAnswer: 27. 4

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0824:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0824:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0836:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0836:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 0.0648:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 0.0648:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0642:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0642:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0781:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0781:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0869:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0869:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.1061:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 14 | train_loss: 0.0820 valid_loss: 5.0063
saving epoch checkpoint...
predicting with current epoch model...

Question: Who was traded to the Charlotte Bobcats for rookie Brandan Wright? 
Answer: Richardson 
ModelAnswer: dallas mavericks


Question: What was the name of the Independent 4 4.500? 
Answer: 1902-03 
ModelAnswer: 23 - 5


Question: Where is Eddie Griffin? 
Answer: 867 
ModelAnswer: philadelphia, new york


Question: Which team won Game 7 of the NBA Finals? 
Answer: Pistons 
ModelAnswer: los angeles lakers



 47%|████▋     | 15/32 [06:40<07:31, 26.54s/it]


Question: What is Pippen's given name on his birth certificate? 
Answer: Scotty 
ModelAnswer: the year and two two two two - star game

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0548:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0548:   2%|▏         | 1/51 [00:00<00:22,  2.27it/s][A
train step loss: 0.1195:   2%|▏         | 1/51 [00:00<00:22,  2.27it/s][A
train step loss: 0.1195:   4%|▍         | 2/51 [00:00<00:20,  2.43it/s][A
train step loss: 0.0589:   4%|▍         | 2/51 [00:01<00:20,  2.43it/s][A
train step loss: 0.0589:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0550:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0550:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0539:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0539:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0625:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0625:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0516:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 15 | train_loss: 0.0617 valid_loss: 4.9665
saving epoch checkpoint...
predicting with current epoch model...

Question: How many players in NBA history made at least 600 free throws in one season? 
Answer: four 
ModelAnswer: four


Question: Who is the General Manager of Kroenke Sports & Entertainment? 
Answer: Calvin Booth 
ModelAnswer: october 27 and october october october october october october october october october october october october october october october october october 27 27 27 27 27 27 - star game


Question: What year was the team in Orlando? 
Answer: 2012-13 
ModelAnswer: 2022


Question: When did Celtics score 10 of their season-high 47 points in overtime? 
Answer: November 20, 2017 
ModelAnswer: april 18, 2017 - 18 18 18 18 18



 50%|█████     | 16/32 [07:07<07:05, 26.60s/it]


Question: When did Dennis reconcile with his father? 
Answer: 2012 
ModelAnswer: 1990

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0471:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0471:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 0.0525:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 0.0525:   4%|▍         | 2/51 [00:00<00:20,  2.45it/s][A
train step loss: 0.0455:   4%|▍         | 2/51 [00:01<00:20,  2.45it/s][A
train step loss: 0.0455:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0418:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0418:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0683:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0683:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0481:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0481:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0444:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 16 | train_loss: 0.0517 valid_loss: 4.9799
saving epoch checkpoint...
predicting with current epoch model...

Question: What did Carter-Williams do during the 2012 offseason? 
Answer: worked on his game 
ModelAnswer: at least 20 points, 5. 6 rebounds, and 2. 4 assists


Question: What is the name of the American professional basketball team? 
Answer: The Washington Wizards 
ModelAnswer: minnesota timberwolves and the year award award - kentucky kentucky colonels 4, and the year award, the university of the north carolina carolina carolina at the north carolina carolina carolina state


Question: What team acquired Derek Harper in a midseason trade? 
Answer: Dallas Mavericks 
ModelAnswer: the game


Question: What is the league now called? 
Answer: semi-professional 
ModelAnswer: the lane, or key )



 53%|█████▎    | 17/32 [07:34<06:39, 26.61s/it]


Question: How many games did they lose to the Spurs? 
Answer: seven 
ModelAnswer: one

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0449:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0449:   2%|▏         | 1/51 [00:00<00:22,  2.27it/s][A
train step loss: 0.0453:   2%|▏         | 1/51 [00:00<00:22,  2.27it/s][A
train step loss: 0.0453:   4%|▍         | 2/51 [00:00<00:20,  2.44it/s][A
train step loss: 0.0449:   4%|▍         | 2/51 [00:01<00:20,  2.44it/s][A
train step loss: 0.0449:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0335:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0335:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0357:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0357:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0363:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0363:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0438:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 17 | train_loss: 0.0453 valid_loss: 5.0523
saving epoch checkpoint...
predicting with current epoch model...

Question: Who quarterbacked the Los Angeles Lakers one more time? 
Answer: Bob Cousy 
ModelAnswer: dallas mavericks


Question: What color do current members departing members depart in? 
Answer: pink 
ModelAnswer: white


Question: How many minutes did Bowen average in 27 games as a senior? 
Answer: 36.6 
ModelAnswer: 69


Question: How many daughters did Howard have by her 19th birthday? 
Answer: four 
ModelAnswer: three



 56%|█████▋    | 18/32 [08:01<06:13, 26.68s/it]


Question: How many games has XU won each of the last four years? 
Answer: 25 
ModelAnswer: 7

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0628:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0628:   2%|▏         | 1/51 [00:00<00:21,  2.33it/s][A
train step loss: 0.0386:   2%|▏         | 1/51 [00:00<00:21,  2.33it/s][A
train step loss: 0.0386:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 0.0581:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 0.0581:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0309:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0309:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0336:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0336:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0507:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0507:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0374:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 18 | train_loss: 0.0446 valid_loss: 4.9457
saving epoch checkpoint...
predicting with current epoch model...

Question: How many overtimes did the game go into? 
Answer: three 
ModelAnswer: 30


Question: What was the nickname of the Cougars? 
Answer: Phi Slama Jama 
ModelAnswer: municipal


Question: On what date did the season begin? 
Answer: October 27, 2016 
ModelAnswer: april 16


Question: What is the combined record of Texas A&M and Kansas State? 
Answer: 14-16 
ModelAnswer: 4 - 4



 59%|█████▉    | 19/32 [08:27<05:46, 26.63s/it]


Question: When was the Cobo Arena constructed? 
Answer: 1961-1978 
ModelAnswer: june 25, 2010

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0661:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0661:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 0.0326:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 0.0326:   4%|▍         | 2/51 [00:00<00:20,  2.44it/s][A
train step loss: 0.0517:   4%|▍         | 2/51 [00:01<00:20,  2.44it/s][A
train step loss: 0.0517:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0336:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0336:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0718:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0718:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0356:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0356:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0397:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 19 | train_loss: 0.0485 valid_loss: 5.1425
saving epoch checkpoint...
predicting with current epoch model...

Question: Who was the eventual national champion of the 2005-06 season? 
Answer: North Carolina 
ModelAnswer: the philadelphia 76ers


Question: When was the stadium opened? 
Answer: July 12, 2005 
ModelAnswer: september 15, 1974


Question: What team did the Warriors beat to win their first title? 
Answer: Mad Ants 
ModelAnswer: boston celtics


Question: When did the Houston Rockets trade him? 
Answer: August 2018 
ModelAnswer: february 14, 1995



 62%|██████▎   | 20/32 [08:54<05:19, 26.62s/it]


Question: How many rebounds did the NBA average? 
Answer: 20 
ModelAnswer: 5. 5

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0552:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0552:   2%|▏         | 1/51 [00:00<00:22,  2.26it/s][A
train step loss: 0.0540:   2%|▏         | 1/51 [00:00<00:22,  2.26it/s][A
train step loss: 0.0540:   4%|▍         | 2/51 [00:00<00:20,  2.43it/s][A
train step loss: 0.0369:   4%|▍         | 2/51 [00:01<00:20,  2.43it/s][A
train step loss: 0.0369:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0316:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0316:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0314:   8%|▊         | 4/51 [00:02<00:18,  2.52it/s][A
train step loss: 0.0314:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0328:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0328:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0356:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 20 | train_loss: 0.0394 valid_loss: 5.0622
saving epoch checkpoint...
predicting with current epoch model...

Question: When was the team's 68th win? 
Answer: March 31, 2016 
ModelAnswer: february 23, 2011 - october october - - -


Question: How many career points was he able to record? 
Answer: 10,000th 
ModelAnswer: 245


Question: Which team staged a furious second-half comeback to win 100-99? 
Answer: Lakers 
ModelAnswer: the magic


Question: What team did Bynum play against on April 12? 
Answer: San Antonio Spurs 
ModelAnswer: charlotte hornets



 66%|██████▌   | 21/32 [09:20<04:52, 26.62s/it]


Question: Who traded Bibby to the Washington Wizards in 2011? 
Answer: Atlanta 
ModelAnswer: the knicks

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0262:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0262:   2%|▏         | 1/51 [00:00<00:22,  2.27it/s][A
train step loss: 0.0338:   2%|▏         | 1/51 [00:00<00:22,  2.27it/s][A
train step loss: 0.0338:   4%|▍         | 2/51 [00:00<00:20,  2.43it/s][A
train step loss: 0.0248:   4%|▍         | 2/51 [00:01<00:20,  2.43it/s][A
train step loss: 0.0248:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0278:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0278:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0369:   8%|▊         | 4/51 [00:02<00:18,  2.52it/s][A
train step loss: 0.0369:  10%|▉         | 5/51 [00:02<00:18,  2.53it/s][A
train step loss: 0.0254:  10%|▉         | 5/51 [00:02<00:18,  2.53it/s][A
train step loss: 0.0254:  12%|█▏        | 6/51 [00:02<00:17,  2.53it/s][A
train step loss: 0.0391:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 21 | train_loss: 0.0339 valid_loss: 5.0111
saving epoch checkpoint...
predicting with current epoch model...

Question: Who was a Hall of Fame Harlem Renaissance player? 
Answer: William Pop Gates 
ModelAnswer: moses malone


Question: Who was replaced for the Grizzlies for $200,000? 
Answer: the main basketball court 
ModelAnswer: tom heinsohn


Question: When did Dennis reconcile with his father? 
Answer: 2012 
ModelAnswer: 2013


Question: On what date did Farmer undergo surgery to repair a torn lateral meniscus in his left knee? 
Answer: December 24, 2008 
ModelAnswer: may 16



 69%|██████▉   | 22/32 [09:47<04:25, 26.59s/it]


Question: When was Prohm named Murray State's 15th head coach? 
Answer: May 23, 2011 
ModelAnswer: 2022 - - 74 53 31. 631

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0321:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0321:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0225:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0225:   4%|▍         | 2/51 [00:00<00:19,  2.45it/s][A
train step loss: 0.0281:   4%|▍         | 2/51 [00:01<00:19,  2.45it/s][A
train step loss: 0.0281:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0277:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0277:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0308:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0308:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0246:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0246:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0277:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 22 | train_loss: 0.0333 valid_loss: 5.1802
saving epoch checkpoint...
predicting with current epoch model...

Question: What team won the 1953 NBA Playoffs? 
Answer: Syracuse Nationals 
ModelAnswer: los angeles lakers


Question: What is Europe's second-tier level competition after the top-tier EuroLeague and the Greek Basket League? 
Answer: FIBA Saporta Cup 
ModelAnswer: the game was not televised - - 74 53 31. 631


Question: Who was forced to fold the Colonels? 
Answer: John Y. Brown, Jr. 
ModelAnswer: 14


Question: How many sons does Chalmers have? 
Answer: three 
ModelAnswer: three



 72%|███████▏  | 23/32 [10:13<03:59, 26.60s/it]


Question: When did the NBL merge with the BAA? 
Answer: August 3, 1949 
ModelAnswer: 1949 - 50

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0244:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0244:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 0.0221:   2%|▏         | 1/51 [00:00<00:21,  2.32it/s][A
train step loss: 0.0221:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 0.0224:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 0.0224:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0224:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0224:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0226:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0226:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0321:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0321:  12%|█▏        | 6/51 [00:02<00:17,  2.54it/s][A
train step loss: 0.0469:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 23 | train_loss: 0.0280 valid_loss: 5.0631
saving epoch checkpoint...
predicting with current epoch model...

Question: How many points did Duke have at halftime? 
Answer: 14 
ModelAnswer: 17


Question: How many games behind the Carolina Cougars were the Tams in the Eastern Division? 
Answer: 33 
ModelAnswer: 14


Question: When did the Bucks announce that Joe Prunty would become head coach? 
Answer: December 20, 2015 
ModelAnswer: august 16, 2010


Question: Where is the Coliseum located? 
Answer: three 
ModelAnswer: atlanta, georgia, texas texas,, the



 75%|███████▌  | 24/32 [10:40<03:32, 26.60s/it]


Question: What is the name of the multi-purpose arena located in Minneapolis, Minnesota? 
Answer: Target Center 
ModelAnswer: roy wilkins auditorium

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0216:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0216:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0315:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0315:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 0.0279:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 0.0279:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0222:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0222:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0192:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0192:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0222:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0222:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0521:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 24 | train_loss: 0.0277 valid_loss: 5.1278
saving epoch checkpoint...
predicting with current epoch model...

Question: Who is the current point guard for the Memphis Grizzlies? 
Answer: Alex Conley 
ModelAnswer: scott skiles


Question: Who fights against Edison Miranda, Mikkel Kessler, and Allan Green? 
Answer: Andre Ward 
ModelAnswer: david robinson


Question: How many points did Portland have in the third quarter? 
Answer: eighteen 
ModelAnswer: 15


Question: Who was named co-MVP of the April 16 Jordan Brand Classic game? 
Answer: Davis 
ModelAnswer: kyrie irving



 78%|███████▊  | 25/32 [11:07<03:06, 26.58s/it]


Question: Who won the NBA championship in 1966? 
Answer: Philadelphia 76ers 
ModelAnswer: philadelphia 76ers the philadelphia 76ers

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0199:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0199:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 0.0178:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 0.0178:   4%|▍         | 2/51 [00:00<00:20,  2.44it/s][A
train step loss: 0.0184:   4%|▍         | 2/51 [00:01<00:20,  2.44it/s][A
train step loss: 0.0184:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0383:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0383:   8%|▊         | 4/51 [00:01<00:18,  2.51it/s][A
train step loss: 0.0258:   8%|▊         | 4/51 [00:02<00:18,  2.51it/s][A
train step loss: 0.0258:  10%|▉         | 5/51 [00:02<00:18,  2.53it/s][A
train step loss: 0.0297:  10%|▉         | 5/51 [00:02<00:18,  2.53it/s][A
train step loss: 0.0297:  12%|█▏        | 6/51 [00:02<00:17,  2.54it/s][A
train step loss: 0.0246:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 25 | train_loss: 0.0267 valid_loss: 5.0359
saving epoch checkpoint...
predicting with current epoch model...

Question: How many points did Jerry Lucas score to lead the Knicks to the win in the series? 
Answer: 26 
ModelAnswer: 31


Question: Who is the owner of G Billy Gabor Dan Gadzuric Deng Gai Elmer Gainer? 
Answer: Bill Gaines 
ModelAnswer: anthony mason 4, kentucky colonels 3 1973 - 74 53 31. 631


Question: How many points did Duke have in the game? 
Answer: eight 
ModelAnswer: 11


Question: What year did WWU win the DII Tournament? 
Answer: 2012 
ModelAnswer: 1968



 81%|████████▏ | 26/32 [11:34<02:40, 26.69s/it]


Question: How many points shy of his career high? 
Answer: two 
ModelAnswer: two two two

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0181:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0181:   2%|▏         | 1/51 [00:00<00:21,  2.34it/s][A
train step loss: 0.0179:   2%|▏         | 1/51 [00:00<00:21,  2.34it/s][A
train step loss: 0.0179:   4%|▍         | 2/51 [00:00<00:19,  2.47it/s][A
train step loss: 0.0251:   4%|▍         | 2/51 [00:01<00:19,  2.47it/s][A
train step loss: 0.0251:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0140:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0140:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0242:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0242:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0229:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0229:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0210:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 26 | train_loss: 0.0241 valid_loss: 5.1315
saving epoch checkpoint...
predicting with current epoch model...

Question: When did Mills score 28 points in the Tigers' season opener? 
Answer: 7 October 2011 
ModelAnswer: 2007


Question: Who was impressed with Bynum's development? 
Answer: Coach Pete Newell 
ModelAnswer: western conference


Question: What did Malone say about his relationship with his older children in 2018? 
Answer: Curry, Iguodala 
ModelAnswer: a 13, the remainder of the season


Question: How many points and 13.6 rebounds did Barkley average per game during the postseason? 
Answer: 26 
ModelAnswer: 19. 6



 84%|████████▍ | 27/32 [12:00<02:13, 26.61s/it]


Question: What is part of a dynamic urban center? 
Answer: entertainment 
ModelAnswer: a 29, 2014 - volleyball - star game - - 74 53 31. 631

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0166:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0166:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 0.0155:   2%|▏         | 1/51 [00:00<00:21,  2.29it/s][A
train step loss: 0.0155:   4%|▍         | 2/51 [00:00<00:20,  2.45it/s][A
train step loss: 0.0372:   4%|▍         | 2/51 [00:01<00:20,  2.45it/s][A
train step loss: 0.0372:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0150:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0150:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0257:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0257:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0351:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0351:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0388:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 27 | train_loss: 0.0253 valid_loss: 5.2128
saving epoch checkpoint...
predicting with current epoch model...

Question: When did Paul Westphal hire Elie? 
Answer: June 22, 2009 
ModelAnswer: june 25, 2010


Question: What is Pippen's given name on his birth certificate? 
Answer: Scotty 
ModelAnswer: the university of hawaii at mānoa 4, kentucky colonels 3 - - 74 53 31. 631


Question: Where was the All-Star Game held? 
Answer: Toyota Center in Houston, Texas 
ModelAnswer: los angeles memorial sports arena


Question: How many assists did Vaugn have by the end of his college career? 
Answer: 804 
ModelAnswer: two



 88%|████████▊ | 28/32 [12:27<01:46, 26.61s/it]


Question: In what city did Burke call her scheduled game on March 11? 
Answer: Dallas 
ModelAnswer: five

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0168:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0168:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0258:   2%|▏         | 1/51 [00:00<00:21,  2.31it/s][A
train step loss: 0.0258:   4%|▍         | 2/51 [00:00<00:19,  2.45it/s][A
train step loss: 0.0246:   4%|▍         | 2/51 [00:01<00:19,  2.45it/s][A
train step loss: 0.0246:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0154:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0154:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0168:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0168:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0138:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0138:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0218:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 28 | train_loss: 0.0236 valid_loss: 5.0842
saving epoch checkpoint...
predicting with current epoch model...

Question: What team finished the season with a record of 46 wins and 32 losses? 
Answer: The Buccaneers 
ModelAnswer: the bulls


Question: After how many seasons as president, who took over? 
Answer: Scott Perry 
ModelAnswer: 8


Question: How many points did Orlando have in the fourth quarter in Game 1? 
Answer: 18 
ModelAnswer: 16


Question: What team acquired Derek Harper in a midseason trade? 
Answer: Dallas Mavericks 
ModelAnswer: the game



 91%|█████████ | 29/32 [12:53<01:19, 26.64s/it]


Question: Thompson averaged how many games in the 1976 Finals against the New York Nets and Erving? 
Answer: 28 
ModelAnswer: six and volleyball of the 3 1973 - 74 53 31. 631

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0159:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0159:   2%|▏         | 1/51 [00:00<00:22,  2.26it/s][A
train step loss: 0.0459:   2%|▏         | 1/51 [00:00<00:22,  2.26it/s][A
train step loss: 0.0459:   4%|▍         | 2/51 [00:00<00:20,  2.43it/s][A
train step loss: 0.0338:   4%|▍         | 2/51 [00:01<00:20,  2.43it/s][A
train step loss: 0.0338:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0234:   6%|▌         | 3/51 [00:01<00:19,  2.49it/s][A
train step loss: 0.0234:   8%|▊         | 4/51 [00:01<00:18,  2.52it/s][A
train step loss: 0.0240:   8%|▊         | 4/51 [00:02<00:18,  2.52it/s][A
train step loss: 0.0240:  10%|▉         | 5/51 [00:02<00:18,  2.53it/s][A
train step loss: 0.0338:  10%|▉         | 5/51 [00:02<00:18,  2.53it/s][A
train step loss: 0.0338:  12%|█▏        | 6/51 [00:02<00:17,  2.54it/s][A
train step loss: 0.0366:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 29 | train_loss: 0.0298 valid_loss: 5.2844
saving epoch checkpoint...
predicting with current epoch model...

Question: Who was the general manager of the Rockets? 
Answer: Daryl Morey 
ModelAnswer: jack mccloskey


Question: Who is the best player in the region? 
Answer: Alexander 
ModelAnswer: chris paul


Question: What was his average per game per game? 
Answer: 15.6 points 
ModelAnswer: 6. 7


Question: How many preseason games did he play? 
Answer: three 
ModelAnswer: nine



 94%|█████████▍| 30/32 [13:20<00:53, 26.76s/it]


Question: When did Lucas star on the U.S. Olympic basketball team? 
Answer: 1960 
ModelAnswer: 1974 4, kentucky colonels 3 1973 - 74 53 31. 631

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0278:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0278:   2%|▏         | 1/51 [00:00<00:21,  2.33it/s][A
train step loss: 0.0206:   2%|▏         | 1/51 [00:00<00:21,  2.33it/s][A
train step loss: 0.0206:   4%|▍         | 2/51 [00:00<00:19,  2.46it/s][A
train step loss: 0.0193:   4%|▍         | 2/51 [00:01<00:19,  2.46it/s][A
train step loss: 0.0193:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0426:   6%|▌         | 3/51 [00:01<00:19,  2.51it/s][A
train step loss: 0.0426:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0150:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0150:  10%|▉         | 5/51 [00:01<00:18,  2.55it/s][A
train step loss: 0.0402:  10%|▉         | 5/51 [00:02<00:18,  2.55it/s][A
train step loss: 0.0402:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0158:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 30 | train_loss: 0.0240 valid_loss: 5.2964
saving epoch checkpoint...
predicting with current epoch model...

Question: Who coached the Blue Devils in the East Regional Final? 
Answer: Rick Pitino 
ModelAnswer: phil jackson and volleyball


Question: What was the best recruiting class by Liberty and in the Big South Conference? 
Answer: Hankinsonu2019s first recruiting class 
ModelAnswer: the year


Question: On what date did he return to action? 
Answer: November 19 
ModelAnswer: january 12, 2018 february 12, and october october october 3 1973 - 74 53 31. 631 and october 27 27 27 27 27 27, 2017, 2017, 2017


Question: What was the total tournament attendance? 
Answer: 763,607 
ModelAnswer: 11, 475 and volleyball



 97%|█████████▋| 31/32 [13:47<00:26, 26.66s/it]


Question: How many points did the Thunder score in the Miami Heat 121? 
Answer: 106 
ModelAnswer: 15

------------------------------ 





  0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0133:   0%|          | 0/51 [00:00<?, ?it/s][A
train step loss: 0.0133:   2%|▏         | 1/51 [00:00<00:21,  2.30it/s][A
train step loss: 0.0292:   2%|▏         | 1/51 [00:00<00:21,  2.30it/s][A
train step loss: 0.0292:   4%|▍         | 2/51 [00:00<00:20,  2.45it/s][A
train step loss: 0.0184:   4%|▍         | 2/51 [00:01<00:20,  2.45it/s][A
train step loss: 0.0184:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0173:   6%|▌         | 3/51 [00:01<00:19,  2.50it/s][A
train step loss: 0.0173:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0127:   8%|▊         | 4/51 [00:01<00:18,  2.53it/s][A
train step loss: 0.0127:  10%|▉         | 5/51 [00:01<00:18,  2.54it/s][A
train step loss: 0.0258:  10%|▉         | 5/51 [00:02<00:18,  2.54it/s][A
train step loss: 0.0258:  12%|█▏        | 6/51 [00:02<00:17,  2.55it/s][A
train step loss: 0.0174:  12%|█▏        | 6/51 [00:02<00:17,  2.5

epoch 31 | train_loss: 0.0227 valid_loss: 5.2572
saving epoch checkpoint...
predicting with current epoch model...

Question: Which two players were the only players in franchise history to receive this honor? 
Answer: Vince Carter and Chris Bosh 
ModelAnswer: sam jones, john havlicek, bill sharman, and john starks and john havlicek, and kentucky colonels 3 1973 - 74 53 31. 631


Question: Who graduated from UConn in 2010? 
Answer: Kalana Green 
ModelAnswer: iowa state


Question: What was MBS renamed to? 
Answer: People's Television Network 
ModelAnswer: the nba all - star game


Question: On what date did he return to action? 
Answer: November 19 
ModelAnswer: january 12, 2018 february 12, and october 27 27 27 january 2, 2017, 2018, and december 16, 2018 2018 2018 2018 2018 2018 2018



100%|██████████| 32/32 [14:13<00:00, 26.68s/it]


Question: How long did Carl Landry play for the Kings before joining the Kings? 
Answer: 4-year 
ModelAnswer: one - year

------------------------------ 





