In [1]:
#copied from https://github.com/aburkov/theLMbook/blob/main/news_decoder_language_model.ipynb
# Import required libraries
import os               # For file and path operations (check_file_exists, extract_dataset)
import urllib.request   # For downloading dataset files from URLs
import tarfile          # For extracting .tar.gz dataset archives
import torch            # Main PyTorch library for tensor operations and deep learning
import torch.nn as nn   # Neural network modules, layers, and utilities
import torch.nn.functional as F  # For softmax
from torch.utils.data import DataLoader, IterableDataset  # For efficient data loading
import random           # For setting random seeds
from tqdm import tqdm   # For progress bars
import math             # For computing perplexity using exp()
import re               # For preprocessing text (replacing numbers with placeholders)
from transformers import AutoTokenizer  # For loading pre-trained tokenizer
#import tempfile         # For temporary file handling during extraction
#import shutil           # For file operations during extraction

# ----------------------------
# Utility Functions
# ----------------------------

def set_seed(seed):
    """
    Sets random seeds for reproducibility across different Python libraries.
    This ensures that random operations give the same results across runs.

    Args:
        seed (int): Seed value for random number generation
    """
    # Set seed for Python's built-in random module
    random.seed(seed)
    # Set seed for PyTorch's CPU random number generator
    torch.manual_seed(seed)
    # Set seed for PyTorch's GPU random number generator
    torch.cuda.manual_seed_all(seed)
    # Requests cuDNN to use deterministic algorithms when possible
    # Note: This may impact performance and might not guarantee determinism in all cases
    torch.backends.cudnn.deterministic = True
    # Disables cuDNN's auto-tuner which finds the best algorithm for your specific input size
    # Ensures consistent behavior but might be slower as it doesn't optimize for input sizes
    torch.backends.cudnn.benchmark = False

# ----------------------------
# Dataset Class
# ----------------------------

class IterableTextDataset(IterableDataset):
    """
    An iterable dataset for processing text data in a memory-efficient way.
    Instead of loading all data into memory, it streams data from disk.
    Inherits from PyTorch's IterableDataset for streaming support.

    Args:
        file_path (str): Path to the text file containing sentences
        tokenizer: Tokenizer object for converting text to tokens
        max_length (int): Maximum sequence length to process (default: 30)
    """
    def __init__(self, file_path, tokenizer, max_length=30):
        # Store file path for reading data
        self.file_path = file_path
        # Store tokenizer for text processing
        self.tokenizer = tokenizer
        # Set maximum sequence length to truncate long sequences
        self.max_length = max_length
        self._count_sentences()

    def __iter__(self):
        """
        Creates an iterator over the dataset.
        This method is called when iterating over the dataset.

        Yields:
            tuple: (input_sequence, target_sequence) pairs for language modeling
                  input_sequence is the sequence up to the last token
                  target_sequence is the sequence shifted one position right
        """
        # Open file in read mode with UTF-8 encoding
        with open(self.file_path, 'r', encoding="utf-8") as f:
            # Process each line (sentence) in the file
            for line in f:
                # Remove leading/trailing whitespace
                sentence = line.strip()
                # Replace all numbers with ### placeholder
                # This reduces vocabulary size and helps model generalize
                sentence = re.sub(r"\d+", "###", sentence)

                # Convert sentence to token IDs
                encoded_sentence = self.tokenizer.encode(
                    sentence,
                    max_length=self.max_length,
                    truncation=True
                )

                # Only use sequences with at least 2 tokens
                # (need at least one input and one target token)
                if len(encoded_sentence) >= 2:
                    # Input is all tokens except last
                    input_seq = encoded_sentence[:-1]
                    # Target is all tokens except first
                    target_seq = encoded_sentence[1:]
                    # Convert to PyTorch tensors and yield
                    yield torch.tensor(input_seq, dtype=torch.long), torch.tensor(target_seq, dtype=torch.long)
    def __len__(self):
        return self._num_sentences

    def _count_sentences(self):
        print(f"\nCounting sentences in {self.file_path}...")
        with open(self.file_path, 'r', encoding="utf-8") as f:
            self._num_sentences = sum(1 for _ in f)
        print(f"\nFound {self._num_sentences} sentences in {self.file_path}.")

## ----------------------------
## Download and prepare data
## ----------------------------

def create_collate_fn(tokenizer):
    """
    Creates a collate function for batching sequences of different lengths.
    This function pads shorter sequences to match the longest sequence in the batch.

    Args:
        tokenizer: Tokenizer object containing padding token information

    Returns:
        function: Collate function that handles padding in batches
    """
    def collate_fn(batch):
        # Separate inputs and targets from batch
        input_seqs, target_seqs = zip(*batch)
        # Get padding token ID from tokenizer
        pad_index = tokenizer.pad_token_id
        # Pad input sequences to same length
        input_padded = nn.utils.rnn.pad_sequence(input_seqs, batch_first=True, padding_value=pad_index)
        # Pad target sequences to same length
        target_padded = nn.utils.rnn.pad_sequence(target_seqs, batch_first=True, padding_value=pad_index)
        return input_padded, target_padded
    return collate_fn

def check_file_exists(filename):
    """
    Checks if a file exists in the current directory.
    Args:
        filename (str): Name of the file to check
    Returns:
        bool: True if file exists, False otherwise
    """
    return os.path.exists(filename)

def download_file(url):
    """
    Downloads a file from the given URL if it doesn't exist locally.
    Uses a custom User-Agent to help prevent download blocks.

    Args:
        url (str): URL of the file to download
    Returns:
        str: Name of the downloaded file ("news.tar.gz")
    """
    # Always use news.tar.gz as the filename, regardless of URL
    filename = "news.tar.gz"

    if not check_file_exists(filename):
        print(f"\nDownloading dataset from {url}...")
        req = urllib.request.Request(
            url,
            headers={"User-Agent": "Mozilla/5.0"}
        )
        with urllib.request.urlopen(req) as response:
            with open(filename, "wb") as out_file:
                out_file.write(response.read())
        print("\nDownload completed.")
    else:
        print(f"\n{filename} already downloaded.")
    return filename

def is_within_directory(directory, target):
    """
    Checks if a target path is within a specified directory by comparing absolute paths.

    Args:
        directory (str): Base directory path
        target (str): Target path to check
    Returns:
        bool: True if target's absolute path starts with directory's absolute path
    """
    abs_directory = os.path.abspath(directory)
    abs_target = os.path.abspath(target)
    prefix = os.path.commonprefix([abs_directory, abs_target])
    return prefix == abs_directory

def extract_dataset(filename):
    """
    Extracts train.txt and test.txt from the downloaded archive.
    Includes debug information about archive contents.

    Args:
        filename (str): Name of the archive file
    Returns:
        tuple: Paths to extracted train and test files
    """
    data_dir = os.path.join(os.path.dirname(filename), "news")
    train_path = os.path.join(data_dir, "train.txt")
    test_path = os.path.join(data_dir, "test.txt")

    if check_file_exists(train_path) and check_file_exists(test_path):
        print("\nData files already extracted.")
        return train_path, test_path

    print("\nListing archive contents:")
    with tarfile.open(filename, "r:gz") as tar:
        for member in tar.getmembers():
            print(f"\nArchive member: {member.name}")

        print("\nExtracting files...")
        # Extract to current directory first
        tar.extractall('.')

    if not (check_file_exists(train_path) and check_file_exists(test_path)):
        raise FileNotFoundError(f"\nRequired files not found in the archive. Please check the paths above.")

    print("\nExtraction completed.")
    return train_path, test_path

def create_datasets(train_file, test_file, tokenizer, max_length=30):
    """
    Creates IterableTextDataset objects for training and testing.
    These datasets will stream data from disk instead of loading it all into memory.

    Args:
        train_file (str): Path to training data file
        test_file (str): Path to test data file
        tokenizer: Tokenizer object for text processing

    Returns:
        tuple: (train_dataset, test_dataset) - Dataset objects for training and testing
    """
    # Create training dataset
    train_dataset = IterableTextDataset(train_file, tokenizer, max_length)
    # Create test dataset
    test_dataset = IterableTextDataset(test_file, tokenizer, max_length)

    # Print dataset sizes
    print(f"\nTraining sentences: {len(train_dataset)}")
    print(f"\nTest sentences: {len(test_dataset)}")

    return train_dataset, test_dataset

def create_dataloaders(train_dataset, test_dataset, batch_size, collate_fn):
    """
    Creates DataLoader objects for efficient data iteration.

    Args:
        train_dataset: Training dataset
        test_dataset: Test dataset
        batch_size (int): Number of sequences per batch
        collate_fn: Function to handle padding and batch creation

    Returns:
        tuple: (train_dataloader, test_dataloader) - DataLoader objects for
               iterating over batches of data with proper padding
    """
    # Create training data loader
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        collate_fn=collate_fn,    # Function to handle padding
        num_workers=0             # Number of worker processes (0 = single process)
    )
    # Create test data loader
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        collate_fn=collate_fn,
        num_workers=0
    )
    return train_dataloader, test_dataloader

def download_and_prepare_data(url, batch_size, tokenizer, max_length=30):
    """
    Main function to handle the complete data preparation pipeline.
    Downloads data, extracts it, and creates necessary dataset objects.

    Args:
        url (str): URL where the dataset archive can be downloaded
        batch_size (int): Batch size for data loading
        tokenizer: Tokenizer object for text processing
        max_length (int): Maximum sequence length for tokenization (default: 30)

    Returns:
        tuple: (train_dataloader, test_dataloader) - Ready-to-use data loaders
    """
    # Step 1: Download dataset archive from URL
    filename = download_file(url)

    # Step 2: Extract training and test files from archive
    train_file, test_file = extract_dataset(filename)

    # Step 3: Create dataset objects for streaming data
    train_dataset, test_dataset = create_datasets(train_file, test_file, tokenizer, max_length)

    # Step 4: Create function to handle batch creation
    collate_fn = create_collate_fn(tokenizer)

    # Step 5: Create and return data loaders
    return create_dataloaders(train_dataset, test_dataset, batch_size, collate_fn)

# ----------------------------
# Evaluation Functions
# ----------------------------

def compute_loss_and_perplexity(model, dataloader, tokenizer, criterion, device, max_sentences=1000):
    """
    Evaluates model performance by computing loss and perplexity on data.

    Args:
        model (nn.Module): The language model to evaluate
        dataloader (DataLoader): Data loader containing batched sequences
        tokenizer: Tokenizer for handling special tokens like padding
        criterion: Loss function (usually CrossEntropyLoss)
        device: Device to run computation on (cuda/cpu)
        max_sentences (int): Maximum number of sentences to evaluate (default: 1000)
                           Limits evaluation to a subset for faster validation

    Returns:
        tuple: (average_loss, perplexity, sentences_processed)
               - average_loss: Mean loss per token (excluding padding)
               - perplexity: exp(average_loss), lower is better
    """
    # Set model to evaluation mode (disables dropout, etc.)
    model.eval()

    # Initialize counters for loss calculation
    total_loss = 0.0          # Accumulator for total loss across all batches
    total_tokens = 0          # Counter for total number of tokens (excluding padding)
    sentences_processed = 0    # Counter for number of sentences processed

    # Disable gradient computation for efficiency
    with torch.no_grad():
        # Iterate through data with progress bar
        for input_seq, target_seq in tqdm(dataloader, desc="Evaluating", leave=False):
            # Move input and target sequences to specified device
            input_seq = input_seq.to(device)      # Shape: (batch_size, seq_len)
            target_seq = target_seq.to(device)    # Shape: (batch_size, seq_len)

            # Get current batch size (might be smaller for last batch)
            batch_size_current = input_seq.size(0)

            # Forward pass through the model
            logits = model(input_seq)             # Shape: (batch_size, seq_len, vocab_size)

            # Reshape logits and target for loss calculation
            logits = logits.reshape(-1, logits.size(-1))  # Shape: (batch_size * seq_len, vocab_size)
            target = target_seq.reshape(-1)              # Shape: (batch_size * seq_len)

            # Create mask to exclude padding tokens
            mask = target != tokenizer.pad_token_id

            # Compute loss only on non-padded tokens
            loss = criterion(logits[mask], target[mask])

            # Update counters
            loss_value = loss.item() * mask.sum().item()  # Total loss for this batch
            total_loss += loss_value                      # Accumulate batch loss
            total_tokens += mask.sum().item()             # Count non-padding tokens

            # Update sentence counter and check if we've reached maximum
            sentences_processed += batch_size_current
            if sentences_processed >= max_sentences:
                break

    # Calculate final metrics
    average_loss = total_loss / total_tokens           # Normalize loss by number of tokens
    perplexity = math.exp(average_loss)               # Convert loss to perplexity

    return average_loss, perplexity

def generate_text(model, start_string, tokenizer, device, max_length=50):
    """
    Generates text continuation from a given start string using greedy decoding.

    Args:
        model (nn.Module): Trained language model
        start_string (str): Initial text to continue from
        tokenizer: Tokenizer for text processing
        device: Device to run generation on (cuda/cpu)
        max_length (int): Maximum length of generated sequence

    Returns:
        str: Generated text continuation
    """
    # Set model to evaluation mode to disable dropout and other training-specific behaviors
    model.eval()

    # Convert input string to token indices
    input_indices = tokenizer.encode(start_string, add_special_tokens=False)

    # Convert indices to tensor and move to specified device (GPU/CPU)
    input_tensor = torch.tensor([input_indices], dtype=torch.long).to(device)

    # Keep track of all generated tokens, starting with input sequence
    generated_indices = input_indices.copy()

    # Generate tokens until we hit max length or end-of-sequence token
    for _ in range(max_length - len(input_indices)):
        # Get model predictions for the entire sequence
        logits = model(input_tensor)
        # Only take predictions for the last token position
        logits = logits[:, -1, :]

        # Prevent the model from generating unknown tokens by setting their probability to negative infinity
        if tokenizer.unk_token_id is not None:
            logits[:, tokenizer.unk_token_id] = float("-inf")

        # Greedy decoding: select the token with highest probability
        next_token = torch.argmax(logits, dim=-1)

        # Add the chosen token to our generated sequence
        generated_indices.append(next_token.item())

        # If we generate an end-of-sequence token, stop generation
        if next_token.item() == tokenizer.eos_token_id:
            break

        # Add the new token to input tensor for next iteration
        input_tensor = torch.cat([input_tensor, next_token.unsqueeze(0)], dim=1)

    # Convert token indices back to text, removing any special tokens
    return tokenizer.decode(generated_indices, skip_special_tokens=True)

def save_model(model, tokenizer, model_name):
    """
    Saves the model state dictionary and tokenizer using the specified model name.

    Args:
        model (nn.Module): The trained model to save
        tokenizer: The tokenizer used with the model
        model_name (str): Name to use for the saved model files
    """
    # Create the models directory if it doesn't exist
    save_dir = os.path.join("models", model_name)
    os.makedirs(save_dir, exist_ok=True)

    # Save the model state dictionary and configuration
    model_path = os.path.join(save_dir, f"{model_name}.pth")
    torch.save({
        "model_state_dict": model.state_dict(),
        "model_config": {
            "vocab_size": len(tokenizer),
            "emb_dim": model.embedding.embedding_dim,
            "num_heads": len(model.layers[0].attn.heads),
            "num_blocks": len(model.layers),
            "pad_idx": model.embedding.padding_idx
        }
    }, model_path)

    # Save the tokenizer
    tokenizer_path = os.path.join(save_dir, "tokenizer")
    tokenizer.save_pretrained(tokenizer_path)

    print(f"Model and tokenizer saved as '{model_name}'")

def load_model(model_name, device=None):
    """
    Loads a saved model and tokenizer using the model name.

    Args:
        model_name (str): Name of the model to load
        device: Device to load the model onto (if None, uses available device)

    Returns:
        tuple: (loaded_model, loaded_tokenizer)
    """
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    save_dir = os.path.join("models", model_name)

    # Check if model exists
    if not os.path.exists(save_dir):
        raise FileNotFoundError(f"No saved model found with name '{model_name}'")

    # Load the tokenizer
    tokenizer_path = os.path.join(save_dir, "tokenizer")
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

    # Load the model state and config
    model_path = os.path.join(save_dir, f"{model_name}.pth")
    checkpoint = torch.load(model_path, map_location=device, weights_only=False)

    # Create a new model instance with the saved configuration
    model = DecoderLanguageModel(
        vocab_size=checkpoint["model_config"]["vocab_size"],
        emb_dim=checkpoint["model_config"]["emb_dim"],
        num_heads=checkpoint["model_config"]["num_heads"],
        num_blocks=checkpoint["model_config"]["num_blocks"],
        pad_idx=checkpoint["model_config"]["pad_idx"]
    )

    # Load the saved state dictionary
    model.load_state_dict(checkpoint["model_state_dict"])
    model.to(device)
    model.eval()

    print(f"\nModel '{model_name}' loaded successfully")
    return model, tokenizer

def get_hyperparameters():
    emb_dim = 128
    num_heads = 8
    num_blocks = 2
    batch_size = 128
    learning_rate = 0.001
    num_epochs = 1
    context_size = 30
    return emb_dim, num_heads, num_blocks, batch_size, learning_rate, num_epochs, context_size

In [2]:
#copied
def initialize_weights(model):
    """
    Initialize the weights of different model components using appropriate schemes.
    Each layer type receives specialized initialization for optimal training.
    """
    for module in model.modules():
        if isinstance(module, nn.Linear):
            # Xavier uniform initialization for linear layers
            # Helps maintain variance across network layers
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.zeros_(module.bias)  # Initialize biases to zero
        elif isinstance(module, nn.Embedding):
            # Initialize embedding layers with normal distribution
            nn.init.normal_(module.weight, mean=0, std=0.02)
            if module.padding_idx is not None:
                # Ensure padding tokens have zero embeddings
                with torch.no_grad():
                    module.weight[module.padding_idx].fill_(0)
        elif isinstance(module, AttentionHead):
            # Initialize query, key, and value projection matrices
            # Xavier uniform helps maintain good gradient flow
            nn.init.xavier_uniform_(module.W_Q)
            nn.init.xavier_uniform_(module.W_K)
            nn.init.xavier_uniform_(module.W_V)
        elif isinstance(module, MultiHeadAttention):
            # Initialize output projection matrix for attention mechanism
            nn.init.xavier_uniform_(module.W_O)
        elif isinstance(module, DecoderLanguageModel):
            # Initialize final output projection layer
            nn.init.xavier_uniform_(module.output)
        elif isinstance(module, RMSNorm):
            # Initialize RMSNorm scale parameters to ones
            # This starts with identity transformation
            nn.init.ones_(module.scale)
        elif isinstance(module, MLP):
            # Initialize feed-forward network parameters
            nn.init.xavier_uniform_(module.W_1)
            nn.init.xavier_uniform_(module.W_2)
            nn.init.zeros_(module.B_1)
            nn.init.zeros_(module.B_2)

def rope(x, theta_base=10000.0):
    """
    Implements Rotary Position Embedding (RoPE) for transformer attention.
    RoPE encodes position information through rotation matrices applied to pairs of dimensions.

    Args:
        x: Input tensor of shape (batch_size, seq_len, emb_dim)
        theta_base: Base for computing rotation frequencies (default: 10000.0)

    Returns:
        Tensor with position information encoded through rotations
    """
    batch_size, seq_len, emb_dim = x.size()
    assert emb_dim % 2 == 0, "Embedding dimensionality must be even for RoPE"

    # Generate sequence position indices
    pos = torch.arange(0, seq_len, dtype=torch.float32, device=x.device)
    pos = pos.unsqueeze(0).expand(batch_size, seq_len)

    # Compute frequency bands for each dimension pair
    # Modified: frequencies start from p=1 and use (p-1) in exponent
    p = torch.arange(1, emb_dim // 2 + 1, dtype=torch.float32, device=x.device)
    theta_p = 1.0 / (theta_base ** (2 * (p - 1) / emb_dim))

    # Compute rotation angles for each position and frequency
    pos = pos.unsqueeze(-1)
    theta = pos * theta_p

    # Compute rotation components
    sin_theta = torch.sin(theta)
    cos_theta = torch.cos(theta)

    # Split input into alternating dimensions
    x1 = x[..., 0::2]  # Dimensions at indices 0,2,4,...
    x2 = x[..., 1::2]  # Dimensions at indices 1,3,5,...

    # Apply 2D rotations to each pair
    x_rotated_1 = x1 * cos_theta - x2 * sin_theta
    x_rotated_2 = x1 * sin_theta + x2 * cos_theta

    # Recombine rotated pairs into final output
    x_rotated = torch.stack((x_rotated_1, x_rotated_2), dim=-1).reshape(batch_size, seq_len, emb_dim)

    return x_rotated

In [3]:
class MultiHeadAttention(nn.Module):
  def __init__(self, emb_dim, num_heads):
    super().__init__()
    d_h = emb_dim // num_heads
    self.heads = nn.ModuleList([AttentionHead(emb_dim, d_h) for _ in range(num_heads)])
    self.W_O = nn.Parameter(torch.empty(emb_dim, emb_dim))

  def forward(self, x, mask):
    head_outputs = [head(x, mask) for head in self.heads]
    x = torch.cat(head_outputs, dim=-1)
    return x @ self.W_O


In [4]:
class AttentionHead(nn.Module):
  def __init__(self, emb_dim, d_h):
    super().__init__()
    self.W_Q = nn.Parameter(torch.empty(emb_dim, d_h))
    self.W_K = nn.Parameter(torch.empty(emb_dim, d_h))
    self.W_V = nn.Parameter(torch.empty(emb_dim, d_h))
    self.d_h = d_h

  def forward(self, x, mask):
    Q = x @ self.W_Q
    K = x @ self.W_K
    V = x @ self.W_V

    Q, K = rope(Q), rope(K)

    scores = Q @ K.transpose(-2,-1) / math.sqrt(self.d_h)

    # how casual mask is applied??
    masked_scores = scores.masked_fill(mask == 0, float('-inf'))

    attention_weights = nn.functional.softmax(masked_scores, dim=-1)

    return attention_weights @ V

In [5]:
class MLP(nn.Module):
  def __init__(self, emb_dim):
    super().__init__()
    # use of 4 why??
    self.W_1 = nn.Parameter(torch.empty(emb_dim, 4 * emb_dim))
    self.W_2 = nn.Parameter(torch.empty(4 * emb_dim, emb_dim))
    self.B_1 = nn.Parameter(torch.zeros(4 * emb_dim))
    self.B_2 = nn.Parameter(torch.zeros(emb_dim))

  def forward(self, x):
    x = x @ self.W_1 + self.B_1
    x = torch.relu(x)
    x = x @ self.W_2 + self.B_2
    return x

In [6]:
class RMSNorm(nn.Module):
  def __init__(self, emb_dim, epsilon=1e-8):
    super().__init__()
    self.scale = nn.Parameter(torch.ones(emb_dim))
    self.epsilon = epsilon

  def forward(self, x):
    squared_x = x**2
    mean_squared = torch.mean(squared_x, dim=-1, keepdim=True)
    rms = torch.sqrt(mean_squared + self.epsilon)
    return self.scale * x / rms


In [7]:
class DecoderBlock(nn.Module):
  def __init__(self, emb_dim, num_heads):
    super().__init__()
    self.norm1 = RMSNorm(emb_dim)
    self.norm2 = RMSNorm(emb_dim)
    self.attn = MultiHeadAttention(emb_dim, num_heads)
    self.mlp = MLP(emb_dim)

  def forward(self, x, mask):
    attn_out = self.attn(self.norm1(x), mask)
    x = x + attn_out
    mlp_out = self.mlp(self.norm2(x))
    x = x + mlp_out
    return x

In [8]:
class DecoderLanguageModel(nn.Module):
  def __init__(self, vocab_size, emb_dim, num_heads, num_blocks, pad_idx):
    super().__init__()
    self.embedding = nn.Embedding(
        vocab_size, emb_dim, padding_idx=pad_idx
    )
    self.layers = nn.ModuleList([
        DecoderBlock(emb_dim, num_heads) for _ in range(num_blocks)
    ])
    self.output = nn.Parameter(torch.rand(emb_dim, vocab_size))

  def forward(self, x):
    x = self.embedding(x)
    _, seq_len, _ = x.shape
    mask = torch.tril(torch.ones(seq_len, seq_len, device=x.device))
    for layer in self.layers:
      x = layer(x, mask)
    return x @ self.output

In [None]:
#copied

# ----------------------------
# Main training loop for a Decoder Language Model
# This script handles the entire training process including data loading,
# model training, validation, and text generation
# ----------------------------

if __name__ == "__main__":
    # Initialize random seeds to ensure reproducible results
    set_seed(42)

    # Retrieve model architecture and training hyperparameters from configuration
    # emb_dim: dimensionality of input token and intermediary embeddings
    # num_heads: number of attention heads in each transformer block
    # num_blocks: number of transformer blocks in the model
    # batch_size: mini-batch size
    # learning_rate: step size for optimizer updates
    # num_epochs: number of complete passes through the training dataset
    # context_size: maximum input sequence length
    emb_dim, num_heads, num_blocks, batch_size, learning_rate, num_epochs, context_size = get_hyperparameters()

    # Initialize the tokenizer using Microsoft's Phi-3.5-mini model
    tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
    # Get padding token index for padding shorter sequences
    pad_idx = tokenizer.pad_token_id

    # Check for CUDA-capable GPU and set the device accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Download the news dataset and create DataLoader objects for training and testing
    # DataLoaders handle batching and shuffling
    data_url = "https://www.thelmbook.com/data/news"
    train_dataloader, test_dataloader = download_and_prepare_data(
        data_url, batch_size, tokenizer, context_size
    )

    # Get the size of the vocabulary that the model needs to handle
    vocab_size = len(tokenizer)
    print(f"\nVocabulary size: {vocab_size}\n")

    # Initialize the Decoder language model with specified architecture parameters
    # vocab_size: determines output layer dimensionality
    # emb_dim: size of token embeddings and intermediary embeddings
    # num_heads: number of attention heads per transformer block
    # num_blocks: number of transformer blocks in the model
    # pad_idx: special token ID used for padding shorter sequences
    model = DecoderLanguageModel(
        vocab_size, emb_dim, num_heads, num_blocks, pad_idx
    )

    # Move the model to GPU if available
    model.to(device)

    # Initialize model weights using custom initialization scheme
    # This is important for stable training of deep neural networks
    initialize_weights(model)

    # Initialize the AdamW optimizer with specified learning rate
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    # Initialize the loss function (Cross Entropy) for training
    # ignore_index=pad_idx ensures that padding tokens don't contribute to the loss
    criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

    # Calculate and display the total number of trainable parameters in the model
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nTotal trainable parameters: {total_params}\n")

    # Set evaluation interval (number of examples after which to perform validation)
    # 200,000 examples provides a good balance between training time and monitoring frequency
    eval_interval = 200_000
    examples_processed = 0  # Counter for tracking progress toward next evaluation

    # Define test contexts for generating sample text during evaluation
    contexts = [
        "Moscow",
        "New York",
        "A hurricane",
        "The President"
    ]

    # Main training loop - iterate through specified number of epochs
    for epoch in range(num_epochs):
        # Set model to training mode
        model.train()

        # Initialize tracking variables for this epoch
        total_loss = 0.0      # Accumulator for loss across all batches
        total_tokens = 0      # Counter for actual tokens processed (excluding padding)

        # Create progress bar for monitoring training progress
        progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")

        # Iterate through batches in the training data
        for batch_idx, (input_seq, target_seq) in enumerate(progress_bar):
            # Move input and target sequences to GPU if available
            input_seq = input_seq.to(device)
            target_seq = target_seq.to(device)

            # Clear gradients from previous batch
            optimizer.zero_grad()

            # Forward pass: get model predictions for this batch
            # output shape: (batch_size, seq_len, vocab_size)
            logits = model(input_seq)

            # Reshape logits and target tensors for loss computation
            logits = logits.reshape(-1, logits.size(-1))
            target = target_seq.reshape(-1)

            # Create mask to exclude padding tokens from loss calculation
            mask = target != pad_idx

            # Compute loss between model predictions and actual targets
            # Using masked versions to ignore padding tokens
            loss = criterion(logits[mask], target[mask])

            # Backward pass: compute gradients of loss with respect to model parameters
            loss.backward()

            # Update model parameters using calculated gradients
            optimizer.step()

            # Calculate actual loss value for this batch accounting for padding
            loss_value = loss.item() * mask.sum().item()

            # Accumulate total loss and tokens for epoch statistics
            total_loss += loss_value
            total_tokens += mask.sum().item()
            examples_processed += input_seq.size(0)

            # Update progress bar with current batch loss
            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

            # Periodic evaluation after processing specified number of examples
            if examples_processed >= eval_interval:
                # Calculate average loss over the last eval_interval examples
                avg_loss = total_loss / total_tokens
                print(f"\nAfter {examples_processed} examples, Average Loss: {avg_loss:.4f}")

                # Switch to evaluation mode
                model.eval()

                # Compute validation metrics
                average_loss, perplexity = compute_loss_and_perplexity(
                    model, test_dataloader, tokenizer, criterion, device, max_sentences=1000
                )
                # Record validation
                print(f"\nValidation Average Loss: {average_loss:.4f}, Perplexity: {perplexity:.2f}")

                model.eval()

                # Generate sample texts to qualitatively assess model performance
                for context in contexts:
                    # Generate text continuation for each test context
                    generated_text = generate_text(
                        model=model,
                        start_string=context,
                        tokenizer=tokenizer,
                        device=device,
                        max_length=50
                    )
                    print(f"\nContext: {context}")
                    print(f"\nGenerated text: {generated_text}\n")

                # Switch back to training mode for continued training
                model.train()

                # Reset counters for next evaluation interval
                examples_processed = 0
                total_loss = 0.0
                total_tokens = 0

        # End-of-epoch reporting
        if total_tokens > 0:
            # Calculate and display average loss for the epoch
            avg_loss = total_loss / total_tokens
            print(f"\nEpoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
        else:
            # Handle edge case where no tokens were processed
            print(f"\nEpoch {epoch+1}/{num_epochs} completed.")

        # Perform end-of-epoch validation
        model.eval()

        # Generate sample texts for qualitative assessment
        print("\nGenerating text based on contexts using generate_text:\n")
        for context in contexts:
            generated_text = generate_text(
                model=model,
                start_string=context,
                tokenizer=tokenizer,
                device=device,
                max_length=50
            )
            print(f"\nContext: {context}")
            print(f"\nGenerated text: {generated_text}\n")

        average_loss, perplexity = compute_loss_and_perplexity(
            model, test_dataloader, tokenizer, criterion, device, max_sentences=1000
        )
        print(f"\nValidation Average Loss: {average_loss:.4f}, Perplexity: {perplexity:.2f}")

        # Reset to training mode for next epoch
        model.train()

    # Save the trained model and tokenizer for later use
    # This includes model architecture, weights, and tokenizer configuration
    model_name = "Decoder_LM"
    save_model(model, tokenizer, model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/665 [00:00<?, ?B/s]


Downloading dataset from https://www.thelmbook.com/data/news...

Download completed.

Listing archive contents:

Archive member: news

Archive member: news/train.txt

Archive member: news/test.txt

Extracting files...


  tar.extractall('.')



Extraction completed.

Counting sentences in news/train.txt...

Found 22034911 sentences in news/train.txt.

Counting sentences in news/test.txt...

Found 449693 sentences in news/test.txt.

Training sentences: 22034911

Test sentences: 449693

Vocabulary size: 32011


Total trainable parameters: 8589824



Epoch 1/1:   1%|          | 1561/172148 [02:37<4:21:18, 10.88it/s, loss=5.0870]


After 200064 examples, Average Loss: 5.3756



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:13, 18.11it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:06, 18.82it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.10it/s][A
                                                            [A


Validation Average Loss: 4.8355, Perplexity: 125.90

Context: Moscow

Generated text: Moscow has been charged with murdered in the ####s . 's ' . 's Office . 's Office . 's . 's . 's . 's . 's . 's . ' . ' . ' . ' .


Context: New York

Generated text: New York , who has been charged with murdered in the ####s . 's ' . 's Office . 's Office . 's Office . 's Office . 's . 's . 's . 's . 's .


Context: A hurricane

Generated text: A hurricane from the home in the ####s , which is a new . '' . 's Office . 's Office . 's Office . 's Office . 's Office . 's . ' . 's . ' . '



Epoch 1/1:   1%|          | 1564/172148 [02:41<28:37:45,  1.66it/s, loss=4.8098]


Context: The President

Generated text: The President of the ##-year-old was in the ####s of the ##-year-old . 's . 's . 's mother 's . '' . 's mother told the BBC . 's . 's . '



Epoch 1/1:   2%|▏         | 3125/172148 [05:18<5:09:05,  9.11it/s, loss=4.8347]


After 200064 examples, Average Loss: 4.7397



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:51, 15.19it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:55, 14.92it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:49, 15.27it/s][A
                                                            [A


Validation Average Loss: 4.6155, Perplexity: 101.04

Context: Moscow

Generated text: Moscow 's office has been charged with a number of fraud . '' . 's office said . ' '' . ' '' . ' '' . ' '' . 's office said . 's . ' . ' . ' . ' . '


Context: New York

Generated text: New York City 's side have been in the past few weeks . '' 's office said . ' '' . 'The family 's office . ' '' . 's office said . 's family . 's . 's . ' .


Context: A hurricane

Generated text: A hurricane , which was found in the village of Stanton , was also found in the village of Stanton , and the ##-year-old . 's mother told the court . ' . ' . ' . ' . ' .



Epoch 1/1:   2%|▏         | 3127/172148 [05:23<50:23:33,  1.07s/it, loss=4.6655]


Context: The President

Generated text: The President was a `` very good '' . '' and the family had a good job . '' . '' . '' . '' . '' . '' . '' . 's office said . 'We 'll be able to get the first time . '



Epoch 1/1:   3%|▎         | 4688/172148 [08:00<4:17:27, 10.84it/s, loss=4.6108]


After 200064 examples, Average Loss: 4.5857



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:19, 17.61it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:09, 18.56it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:02, 19.18it/s][A
                                                            [A


Validation Average Loss: 4.5055, Perplexity: 90.52

Context: Moscow

Generated text: Moscow is not yet clear whether the government is responsible for the attacks . '' 'The . ' '' 's official said . ' '' 'The . ' '' 'The . ' '' 'The . 'The only one of the most of the issue


Context: New York

Generated text: New York : The couple 's parents , who were killed in the attack . 's ##-year-old , who was killed . 's sister , said . ' I 'm not sure . 's . 's . 's .


Context: A hurricane

Generated text: A hurricane is a key part of the new era of the world 's most important . '' 's official said . ' '' 'The . ' '' 'The . ' '' 'The . ' . 's . 's 's



Epoch 1/1:   3%|▎         | 4690/172148 [08:04<32:00:46,  1.45it/s, loss=4.5663]


Context: The President

Generated text: The President 's office said the government would not be able to pay the number of people who have been killed . ' '' 'The . ' '' 'The . ' '' 'The . ' '' 's official said . 's . 's



Epoch 1/1:   4%|▎         | 6251/172148 [10:41<5:04:39,  9.08it/s, loss=4.4853]


After 200064 examples, Average Loss: 4.4960



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:43, 15.73it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:53, 15.03it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:54, 14.94it/s][A
                                                            [A


Validation Average Loss: 4.4314, Perplexity: 84.05

Context: Moscow

Generated text: Moscow has been a key role in the country 's economy . '' 'We 're not to be able to do the same . ' '' said . ' '' . ' '' the company said . 's `` to the `` . '' . ''


Context: New York

Generated text: New York City : The ##-year-old was arrested in the ####s . ' '' 'We 've been charged with the murder of the woman . 's mother . ' '' 's mother said . 's . 's . '


Context: A hurricane

Generated text: A hurricane , which is the most important thing to be the most important thing to be the most important thing to be the most important thing . '' ' '' . ' '' said . ' '' . 's said . 's . 's



Epoch 1/1:   4%|▎         | 6253/172148 [10:46<45:14:02,  1.02it/s, loss=4.4383]


Context: The President

Generated text: The President said the government had not yet been released . '' . ' '' 'We 're not to be able to do the same . ' '' 'We 're not to be able to-beat . 's . 's . '



Epoch 1/1:   5%|▍         | 7814/172148 [13:22<4:11:10, 10.90it/s, loss=4.4400]


After 200064 examples, Average Loss: 4.4371



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<02:58, 19.63it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:16, 17.88it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:09, 18.55it/s][A
                                                            [A


Validation Average Loss: 4.3852, Perplexity: 80.25

Context: Moscow

Generated text: Moscow has been a long-running warrant for the attack . '' . '' . '' 'We 're not to be able to do it . ' '' ' . ' '' ' . 's . ' '' ' . ' . ' . '


Context: New York

Generated text: New York City : The couple are now in the past . ' '' 's family . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' 's . 's . 's . 's . 's .


Context: A hurricane




Epoch 1/1:   5%|▍         | 7816/172148 [13:26<31:01:24,  1.47it/s, loss=4.3329]


Context: The President

Generated text: The President 's decision to be released on a statement from the Commons . ' '' 's decision to be released . ' '' ' . ' '' . ' '' 's . ' '' 's decision to be made . 's decision . '



Epoch 1/1:   5%|▌         | 9377/172148 [16:02<5:49:55,  7.75it/s, loss=4.3985]


After 200064 examples, Average Loss: 4.3974



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:57, 11.81it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:37, 12.65it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:28, 13.05it/s][A
                                                            [A


Validation Average Loss: 4.3429, Perplexity: 76.93

Context: Moscow

Generated text: Moscow has been a major international threat to the United States . '' . ' '' 's statement said . ' '' 'We 'll be able to get the first of the world 's most important . ' '' ' . ' . ' . '


Context: New York

Generated text: New York Mayor of the . 'We 'll be able to get the best of the world . ' '' 's report . ' '' 's statement . ' '' 's statement said . 's . 's . 's . 's


Context: A hurricane

Generated text: A hurricane center has been a huge fire for the past ## years . ' '' 's mother said . ' '' 'The . ' '' ' I 'll be able to get the same . ' and the way of the way . '



Epoch 1/1:   5%|▌         | 9379/172148 [16:06<44:33:11,  1.01it/s, loss=4.4746]


Context: The President

Generated text: The President said the government had `` no immediate effect '' . '' . '' 'We 'd have been in the past few weeks . ' '' ' I 'll be able to get the opportunity to get the opportunity to getaway . 's ``



Epoch 1/1:   6%|▋         | 10940/172148 [18:42<4:13:45, 10.59it/s, loss=4.3113]


After 200064 examples, Average Loss: 4.3638



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:00, 19.43it/s][A
Evaluating:   0%|          | 4/3514 [00:00<02:58, 19.66it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:00, 19.41it/s][A
                                                            [A


Validation Average Loss: 4.3155, Perplexity: 74.85

Context: Moscow

Generated text: Moscow has been a long-term . '' 'We 're not sure that 's what we 're going to be . ' '' 's report . ' '' 's a long time . ' . ' . ' . ' . ' he


Context: New York

Generated text: New York Times : The Duke of Cambridge is the first of the ##th century . ' '' 's report . ' '' 's report . ' '' 's report . ' '' 's . 's . 's . 's . '


Context: A hurricane

Generated text: A hurricane is a popular in the world . ' '' 's report said . ' '' 'We 're not sure how much it 's . ' '' ' he said . ' '' 's . ' ITV 's ' .



Epoch 1/1:   6%|▋         | 10942/172148 [18:47<32:06:52,  1.39it/s, loss=4.3302]


Context: The President

Generated text: The President of the House of Commons , who has been working with the Conservatives , said the party would be `` a very good decision '' . '' . '' 'We 've . '' 's . '' 's . 's . 's



Epoch 1/1:   7%|▋         | 12503/172148 [21:24<5:33:51,  7.97it/s, loss=4.4397]


After 200064 examples, Average Loss: 4.3322



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:31, 12.93it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:46, 12.24it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:28, 13.09it/s][A
                                                            [A


Validation Average Loss: 4.2958, Perplexity: 73.39

Context: Moscow

Generated text: Moscow has been a major part of the conflict in the region . '' ' '' said . ' '' 'The U.S. Department of Defence . ' '' 's . ' '' 's . ' . ' . ' . ' . '


Context: New York

Generated text: New York City 's Cathedral , which is a major part of the world 's most recent years in the world . ' '' 's report said . ' '' 'The . ' '' 's spokesman said . 's . '


Context: A hurricane

Generated text: A hurricane in the city of the city of the city of the city of the city of the city of the city of the city . '' 's report said . ' . ' '' 's . ' . ' . ' . ' .



Epoch 1/1:   7%|▋         | 12505/172148 [21:28<43:12:55,  1.03it/s, loss=4.2167]


Context: The President

Generated text: The President 's office said the government has been `` deeply disappointed '' . '' ' '' said . ' '' 'The . ' '' 'The . ' '' ' I 'm sure to be a `` a good idea of the `` a ``



Epoch 1/1:   8%|▊         | 14066/172148 [24:05<4:04:41, 10.77it/s, loss=4.3377]


After 200064 examples, Average Loss: 4.3086



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:48, 15.35it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:19, 17.59it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:09, 18.56it/s][A
                                                            [A


Validation Average Loss: 4.2743, Perplexity: 71.83

Context: Moscow

Generated text: Moscow has also been accused of terrorism and terrorists . '' . ' '' 's report . ' '' 's report said . ' '' . ' '' 's . ' '' 's . 'in-election 's `` a ``


Context: New York

Generated text: New York City Mayor Bill Clinton said : `` We are not going to be a good idea . '' ' '' . ' '' ' I 'm not sure . ' '' 's report . 's Day . 's . 's . '


Context: A hurricane

Generated text: A hurricane is a huge part of the world 's largest ever since . '' ' '' . ' '' 's mother said . ' '' ' I 'm not sure . ' '' 's . 's . ' . ' . '



Epoch 1/1:   8%|▊         | 14068/172148 [24:09<30:50:55,  1.42it/s, loss=4.3766]


Context: The President

Generated text: The President has said that the government has been `` a very important step '' . '' ' '' . ' '' ' he said . ' '' ' I 'm not sure that he 's a good . 's been a good . ' . '



Epoch 1/1:   9%|▉         | 15629/172148 [26:47<6:01:08,  7.22it/s, loss=4.2180]


After 200064 examples, Average Loss: 4.2923



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<05:05, 11.49it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:55, 11.88it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:29, 12.99it/s][A
                                                            [A


Validation Average Loss: 4.2489, Perplexity: 70.03

Context: Moscow

Generated text: Moscow 's government has been accused of being a `` very dangerous '' and the country 's government . '' . '' . '' . '' . '' . '' . ' '' said . 's . 's . 's . ' . 's


Context: New York

Generated text: New York-based company , which is owned by the company 's company , has been in the UK since #### . ' '' . ' '' . ' '' said . ' '' . ' '' . 's . 's . 's . '


Context: A hurricane

Generated text: A hurricane in the area is now in the area . ' '' said . ' '' 'The ##-year-old was the first time . ' '' . ' '' said . ' '' ' . ' . ' . ' and 's



Epoch 1/1:   9%|▉         | 15631/172148 [26:53<58:40:29,  1.35s/it, loss=4.2927]


Context: The President

Generated text: The President is accused of being a member of the . ' '' ' and the 's family . ' '' ' . ' '' ' . ' '' . ' '' said . ' '' . ' . 's . ' . ' . ' . '



Epoch 1/1:  10%|▉         | 17191/172148 [29:30<3:55:23, 10.97it/s, loss=4.2537]


After 200064 examples, Average Loss: 4.2769



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<02:56, 19.87it/s][A
Evaluating:   0%|          | 4/3514 [00:00<02:59, 19.56it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:11, 18.28it/s][A
                                                            [A


Validation Average Loss: 4.2390, Perplexity: 69.34

Context: Moscow

Generated text: Moscow 's government has been criticised by the government 's government . '' ' '' . ' '' 'The United Nations said . ' '' 'The . ' '' 'The . ' '' 's . 's . 's . 's


Context: New York

Generated text: New York City Mayor Michael Bloomberg said : 'The United States is a very important step . ' '' 's report . ' '' 'The . ' '' ' and . ' '' 's . 's . 's . 's .


Context: A hurricane

Generated text: A hurricane is the most common in the world . '' ' '' he said . ' '' 'The . ' '' 'The . ' '' 'We 're not sure the person 's family 's 's best ' . 's



Epoch 1/1:  10%|▉         | 17194/172148 [29:34<24:49:46,  1.73it/s, loss=4.2656]


Context: The President

Generated text: The President is the first president of the United Nations , which is a major political leader of the country 's government . '' 's Office said . ' '' 's . ' '' 's . 's . 's . 's . '



Epoch 1/1:  11%|█         | 18754/172148 [32:11<3:59:56, 10.65it/s, loss=4.1995]


After 200064 examples, Average Loss: 4.2623



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:07, 18.77it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:04, 18.99it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:30, 16.64it/s][A
                                                            [A


Validation Average Loss: 4.2313, Perplexity: 68.81

Context: Moscow

Generated text: Moscow 's government has been criticised by the government . ' '' 's statement . ' '' 's statement said . ' '' 'The . ' '' ' . ' '' ' and . 's . 's been a . 's .


Context: New York

Generated text: New York Mayor of London Mayor Michael Bloomberg said : 'We are delighted to be a very good idea . ' '' 's . ' '' 'We are now . ' '' 's . 's . ' . ' . 's


Context: A hurricane

Generated text: A hurricane in the region is now being built in the southern region of the city of <rare> , which is now ## miles north of the city . ' '' 's spokesman said . 's . 's . ' .



Epoch 1/1:  11%|█         | 18757/172148 [32:16<30:49:15,  1.38it/s, loss=4.1156]


Context: The President

Generated text: The President is now in the middle of the year . ' '' ' , the report said . ' '' 'The . ' '' ' . ' '' 'We 're not going to be a . 's . 's . 's ' .



Epoch 1/1:  12%|█▏        | 20317/172148 [34:53<3:52:30, 10.88it/s, loss=4.1541]


After 200064 examples, Average Loss: 4.2459



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:03, 19.09it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:00, 19.45it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.10it/s][A
                                                            [A


Validation Average Loss: 4.2161, Perplexity: 67.77

Context: Moscow

Generated text: Moscow 's office said it was `` a very difficult decision '' . '' 'We 're not going to be able to make sure that the country 's future . ' '' ' . 's . 's a `` . ' . 's


Context: New York

Generated text: New York : The ##-year-old was shot dead in the head and was shot dead in the head . ' '' 's office said . 'We 're not sure . ' he said . 's . 's . 's .


Context: A hurricane

Generated text: A hurricane in the northwest of the city of <rare> , which is believed to be a major destination for the country . ' '' 's <rare> . ' . ' . ' . 's . ' . ' . '



Epoch 1/1:  12%|█▏        | 20320/172148 [34:57<24:17:23,  1.74it/s, loss=4.1241]


Context: The President

Generated text: The President 's office said the two-year-old was a 'slight ' and ' a 'would of the '##s ' . ' '' ' . ' '' ' . 's . ' . 's . ' . '



Epoch 1/1:  13%|█▎        | 21880/172148 [37:35<4:55:00,  8.49it/s, loss=4.2967]


After 200064 examples, Average Loss: 4.2375



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:11, 18.39it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:05, 18.90it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:02, 19.23it/s][A
                                                            [A


Validation Average Loss: 4.1960, Perplexity: 66.42

Context: Moscow

Generated text: Moscow has been accused of killing the ##-year-old of the attack . ' '' 's attorney . ' '' . ' '' . ' . ' . ' . ' . ' . 's . ' . 's . ' . '


Context: New York

Generated text: New York City Mayor Mike P.J . ' '' 's attorney , `` I 'm not sure what the president is . '' '' 's office said . ' '' . ' '' 's . 's . 's . 's


Context: A hurricane

Generated text: A hurricane in the area is now being held in the eastern province of the capital of the country . ' '' said . 'We 're not sure . ' '' ' . ' he said . ' . ' . ' . ' . '



Epoch 1/1:  13%|█▎        | 21883/172148 [37:39<33:00:19,  1.26it/s, loss=4.0868]


Context: The President

Generated text: The President has been criticised for the `` serious and serious consequences of the violence . '' '' . ' '' said . ' '' . ' '' . ' '' . ' '' . ' '' . 's . ' . ' . 's . '



Epoch 1/1:  14%|█▎        | 23443/172148 [40:16<3:50:48, 10.74it/s, loss=4.1807]


After 200064 examples, Average Loss: 4.2268



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:19, 17.62it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:10, 18.43it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:05, 18.88it/s][A
                                                            [A


Validation Average Loss: 4.1884, Perplexity: 65.92

Context: Moscow

Generated text: Moscow 's government has been in the midst of a crisis . '' ' '' he said . ' '' 's statement . ' '' ' . ' '' 'The . ' '' ' he said . 's . 's . 's . '


Context: New York

Generated text: New York City 's Secret Service said the ##-year-old was a `` very good , '' and `` a great-grandfather . '' '' '' . '' ' '' . ' '' 's statement . 's . 's . '


Context: A hurricane

Generated text: A hurricane in the area is a very small area of the area . ' '' ' and the airline 's airline . ' '' 's statement said . ' '' ' . ' '' ' . ' . ' . ' . '



Epoch 1/1:  14%|█▎        | 23446/172148 [40:21<26:54:42,  1.53it/s, loss=4.1899]


Context: The President

Generated text: The President said the government 's decision to be taken to the Commons committee . ' '' 's report said . ' '' 's report . ' '' ' . ' '' ' . 'We . 's . 's . 's . '



Epoch 1/1:  15%|█▍        | 25007/172148 [42:58<3:53:42, 10.49it/s, loss=4.1775]


After 200064 examples, Average Loss: 4.2131



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:17, 17.80it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:08, 18.66it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:05, 18.94it/s][A
                                                            [A


Validation Average Loss: 4.1887, Perplexity: 65.94

Context: Moscow

Generated text: Moscow 's nuclear programme is a major issue of the region . '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' 's . ' '' 's . 's . 's . 's


Context: New York

Generated text: New York City police chiefs said the attack was `` a very serious incident '' . '' 'The . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' 's . ' . ' . 's . '


Context: A hurricane

Generated text: A hurricane in the area is a major area of the city 's city of <rare> , which is a major event . '' ' . ' '' ' . ' '' ' . ' . ' . ' . ' . ' . '



Epoch 1/1:  15%|█▍        | 25009/172148 [43:02<29:29:27,  1.39it/s, loss=4.1637]


Context: The President

Generated text: The President has been accused of a 'great ' and 's ' . ' '' ' . ' '' ' and said he was 'very concerned ' . ' '' ' . ' . ' . ' . ' . ' . ' . ' .



Epoch 1/1:  15%|█▌        | 26570/172148 [45:39<3:49:09, 10.59it/s, loss=4.2747]


After 200064 examples, Average Loss: 4.2053



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:05, 18.90it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:02, 19.21it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.16it/s][A
                                                            [A


Validation Average Loss: 4.1817, Perplexity: 65.48

Context: Moscow

Generated text: Moscow has been a major concern for the Russian military and the U.S. military . '' '' said . ' '' . ' '' . ' '' . ' '' 's . ' '' 's . 's . 's . ' . '


Context: New York

Generated text: New York City Mayor Bill Clinton said : `` We are very pleased to have a good job . '' 's decision . '' ' '' . ' '' ' . ' '' ' . ' '' 's . 's . 's . 's


Context: A hurricane

Generated text: A hurricane has been reportedly in the area , which has been in the north of the city 's capital , which has been in the region . ' '' said . ' '' . ' . ' and the . ' . ' . '



Epoch 1/1:  15%|█▌        | 26572/172148 [45:44<32:54:40,  1.23it/s, loss=4.2757]


Context: The President

Generated text: The President has said he has been `` a very good man . '' '' . ' '' ' and said he is sorry for the incident . ' '' ' . ' '' ' . ' '' 's . ' . 's . 's . '



Epoch 1/1:  16%|█▋        | 28132/172148 [48:22<3:51:02, 10.39it/s, loss=4.1158]


After 200064 examples, Average Loss: 4.1980



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:05, 18.88it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:08, 18.63it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:04, 19.06it/s][A
                                                            [A


Validation Average Loss: 4.1774, Perplexity: 65.19

Context: Moscow

Generated text: Moscow has been in the past few years , and has been in the past few years . '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' . 's . 's . 's . ' .


Context: New York

Generated text: New York Times : The couple 's mother , who is married to the couple , were married . 'Sapp . ' '' ' . ' '' ' . ' . ' . ' . ' . ' . 's . 's . 's


Context: A hurricane

Generated text: A hurricane , which is the first time the country is in the world , is now in the midst of a massive storm . '' 's spokesman . ' '' 's . ' . ' . 's . ' . 's



Epoch 1/1:  16%|█▋        | 28135/172148 [48:26<23:29:45,  1.70it/s, loss=4.1421]


Context: The President

Generated text: The President , who is in the midst of a new campaign , has been in the past few years . '' 's ##-year-old son of the British . ' '' 's . 's . 's . 's . 's



Epoch 1/1:  17%|█▋        | 29696/172148 [51:03<3:39:59, 10.79it/s, loss=4.0000]


After 200064 examples, Average Loss: 4.1884



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:16, 17.86it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:05, 18.88it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.08it/s][A
                                                            [A


Validation Average Loss: 4.1631, Perplexity: 64.27

Context: Moscow

Generated text: Moscow has been accused of being part of the attack . '' ' and said he was . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . ' . ' . ' . ' . '


Context: New York

Generated text: New York City 's first-ever-## win over the United States . '' 's ##-year-old son , who was the first to be the first of the ##th Century . 's . 's . 's . '


Context: A hurricane

Generated text: A hurricane in the middle of the day of the storm . ' '' , he said . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 'The . 's . 's . ' . '



Epoch 1/1:  17%|█▋        | 29698/172148 [51:08<33:17:40,  1.19it/s, loss=4.2216]


Context: The President

Generated text: The President of the U.S. Embassy in the U.S. Embassy in the U.S. Embassy in the U.S. Embassy in the U.S. District of Defense of the



Epoch 1/1:  18%|█▊        | 31258/172148 [53:44<3:36:03, 10.87it/s, loss=4.3244]


After 200064 examples, Average Loss: 4.1871



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<02:58, 19.63it/s][A
Evaluating:   0%|          | 4/3514 [00:00<02:59, 19.60it/s][A
Evaluating:   0%|          | 6/3514 [00:00<02:57, 19.76it/s][A
                                                            [A


Validation Average Loss: 4.1580, Perplexity: 63.94

Context: Moscow

Generated text: Moscow has been a major problem in the past . '' ' '' and has been a `` huge increase in the economy . '' '' ' . '' ' '' . ' '' ' . ' '' ' . ' . ' . ' . ' . ' .


Context: New York

Generated text: New York City 's ##-year-old son , who was a teacher at the time of the shooting . ' '' 's . ' '' ' . ' '' ' . ' '' . ' . ' . ' . ' . ' . '


Context: A hurricane

Generated text: A hurricane in the area is expected to be closed in the city of <rare> . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . ' . ' .



Epoch 1/1:  18%|█▊        | 31261/172148 [53:48<22:27:49,  1.74it/s, loss=4.2839]


Context: The President

Generated text: The President 's office said the `` no-one '' was notified by the government 's decision . '' 's report . '' ' . '' ' . ' '' ' . '' 's . ' . ' . 's . ' .



Epoch 1/1:  19%|█▉        | 32822/172148 [56:25<4:32:09,  8.53it/s, loss=4.0758]


After 200064 examples, Average Loss: 4.1766



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:24, 13.29it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:36, 12.68it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:58, 11.76it/s][A
                                                            [A


Validation Average Loss: 4.1480, Perplexity: 63.31

Context: Moscow

Generated text: Moscow has been criticized by the U.S. and its nuclear weapons . '' 's report . ' '' said . ' '' . ' '' . ' '' 's . ' . 's . ' . ' . 's ' . '


Context: New York

Generated text: New York City : The couple were in the middle of the road and the couple were in the car . ' '' 's . ' '' ' . ' '' ' . ' '' ' . ' . 's . 's . 's . '


Context: A hurricane

Generated text: A hurricane in the region is expected to be closed in the southwest of the city , where the city is expected to be closed . '' 's ##-year-old . 'The . 's . ' . ' . ' .



Epoch 1/1:  19%|█▉        | 32824/172148 [56:30<42:09:04,  1.09s/it, loss=4.2418]


Context: The President

Generated text: The President 's office said the decision was `` a very good thing '' . '' 's report . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' 's . 's . 's . 's



Epoch 1/1:  20%|█▉        | 34384/172148 [59:07<3:34:31, 10.70it/s, loss=4.2542]


After 200064 examples, Average Loss: 4.1693



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:12, 18.26it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:06, 18.85it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.10it/s][A
                                                            [A


Validation Average Loss: 4.1454, Perplexity: 63.14

Context: Moscow

Generated text: Moscow has been working on the site since #### . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . 's . ' . ' . 's . ' .


Context: New York

Generated text: New York City : The ##-year-old , who is pictured with his wife , is pictured with her mother . ' '' . ' '' ' . ' . ' . 's . ' . ' . 's . '


Context: A hurricane

Generated text: A hurricane in the sky is expected to be the first of the year . '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . ' . ' .



Epoch 1/1:  20%|█▉        | 34387/172148 [59:11<22:26:08,  1.71it/s, loss=4.1616]


Context: The President

Generated text: The President 's office said the two-day investigation was `` a very serious case '' . '' ' '' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . 's . 's . 's . 's



Epoch 1/1:  21%|██        | 35947/172148 [1:01:50<3:41:48, 10.23it/s, loss=4.1273]


After 200064 examples, Average Loss: 4.1646



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:01, 19.35it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:01, 19.36it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.15it/s][A
                                                            [A


Validation Average Loss: 4.1386, Perplexity: 62.71

Context: Moscow

Generated text: Moscow 's latest attempt to halt the attack . '' ' '' . ' '' ' I 'm not sure what 's going on . ' '' ' . ' '' ' . ' '' 's . ' . ' . 's . ' .


Context: New York

Generated text: New York City : The ##-year-old , who was born in #### , was born in the ####s , was born in the ####s . ' '' . ' '' ' . ' . ' . ' I . ' . ' . '


Context: A hurricane

Generated text: A hurricane in the region of the city of the city of the capital of the city of the city of the city of the city of the city of the capital . ' '' . ' '' . ' . ' . ' . ' . '



Epoch 1/1:  21%|██        | 35950/172148 [1:01:54<23:13:49,  1.63it/s, loss=4.1826]


Context: The President

Generated text: The President said the government was `` very disappointed '' . '' 'We 're not sure what the government would have done . ' '' ' . ' '' ' . ' '' ' . ' '' . '### 's decision to . '



Epoch 1/1:  22%|██▏       | 37511/172148 [1:04:31<3:27:18, 10.82it/s, loss=4.0491]


After 200064 examples, Average Loss: 4.1612



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:17, 17.80it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.30it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:04, 19.03it/s][A
                                                            [A


Validation Average Loss: 4.1351, Perplexity: 62.50

Context: Moscow

Generated text: Moscow has been a major blow to the Syrian government . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' `` The . 's . ' . 's . ' .


Context: New York

Generated text: New York City : The couple 's daughter , who was born in #### , was born in #### . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . 's . 's . '


Context: A hurricane




Epoch 1/1:  22%|██▏       | 37513/172148 [1:04:36<28:23:41,  1.32it/s, loss=4.0702]


Context: The President

Generated text: The President 's decision to take the next steps to help the country 's military and the Palestinians . '' ' . ' '' ' . ' '' ' . ' . ' '' 's . ' . ' . 's . ' .



Epoch 1/1:  23%|██▎       | 39073/172148 [1:07:14<3:26:07, 10.76it/s, loss=4.1167]


After 200064 examples, Average Loss: 4.1552



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:07, 18.75it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:04, 19.06it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:02, 19.18it/s][A
                                                            [A


Validation Average Loss: 4.1172, Perplexity: 61.39

Context: Moscow

Generated text: Moscow has been accused of `` a very dangerous act '' in the past . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . 's . ' .


Context: New York

Generated text: New York 's <rare> , the ##-year-old , was arrested on suspicion of murder in the #### murder of his wife . ' '' . ' '' . ' '' . 's . 's . 's 's .


Context: A hurricane




Epoch 1/1:  23%|██▎       | 39076/172148 [1:07:17<21:23:30,  1.73it/s, loss=4.1353]


Context: The President

Generated text: The President 's office said the decision was `` a very significant step '' . ' '' . ' '' . ' '' 's statement . ' '' 's . ' '' 's . 's . 's `` The . 's . '



Epoch 1/1:  24%|██▎       | 40637/172148 [1:09:54<3:23:21, 10.78it/s, loss=4.0962]


After 200064 examples, Average Loss: 4.1490



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:06, 18.87it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:05, 18.93it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.16it/s][A
                                                            [A


Validation Average Loss: 4.1209, Perplexity: 61.61

Context: Moscow

Generated text: Moscow 's military force is the first time the country 's largest nuclear program has been in the world . '' 's ##-year-old . ' '' 's . ' '' said . ' IRA . 's . 's .


Context: New York

Generated text: New York City : The ##-year-old was arrested on suspicion of murdering his wife , ## , and a ##-year-old girl . 'The . ' I 'm not sure . ' . ' . ' . 's


Context: A hurricane

Generated text: A hurricane center is expected to be the first of the ##th century . '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' 's new . 's . 's . 's



Epoch 1/1:  24%|██▎       | 40639/172148 [1:10:00<35:06:09,  1.04it/s, loss=4.2189]


Context: The President

Generated text: The President 's office has been set up to the public to discuss the situation . '' 's report . ' '' ' . ' '' ' . ' '' ' . ' . ' . 's . 's . ' . 's . '



Epoch 1/1:  25%|██▍       | 42199/172148 [1:12:38<3:24:33, 10.59it/s, loss=4.0200]


After 200064 examples, Average Loss: 4.1474



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:06, 18.86it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:02, 19.20it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.15it/s][A
                                                            [A


Validation Average Loss: 4.1193, Perplexity: 61.51

Context: Moscow

Generated text: Moscow 's military junta , the U.S. government , said it was `` not a `` significant threat '' . '' '' . ' '' . ' '' ' . ' '' ' . 's . ' . 'sowed President Barack


Context: New York

Generated text: New York Times Square , which is the first to be seen in the ####s , is the first to be the first of the year . ' '' . ' '' ' . ' . ' . ' . ' . 's <rare> . '


Context: A hurricane




Epoch 1/1:  25%|██▍       | 42202/172148 [1:12:42<20:50:36,  1.73it/s, loss=4.1133]


Context: The President

Generated text: The President 's office said the decision was `` not to be made '' . '' '' . ' '' . ' '' ' I 'm not sure how much it 's been done . ' Ips . ' . 's . ' . '



Epoch 1/1:  25%|██▌       | 43763/172148 [1:15:20<3:45:37,  9.48it/s, loss=4.2313]


After 200064 examples, Average Loss: 4.1430



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:53, 15.04it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:48, 15.35it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:47, 15.43it/s][A
                                                            [A


Validation Average Loss: 4.1194, Perplexity: 61.52

Context: Moscow

Generated text: Moscow has been trying to reassert control of the country . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . ' . ' . ' . ' .


Context: New York

Generated text: New York City Mayor Bill de Blasio said the `` very important thing '' would be a `` significant step '' . '' '' `` the `` <rare> '' . '' '' '' . '' '' . '' . '' . '' '' . '' ''


Context: A hurricane

Generated text: A hurricane surgeon , who was a member of the Royal Navy , said the plane was `` very close '' . '' ' . ' '' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . '



Epoch 1/1:  25%|██▌       | 43765/172148 [1:15:25<31:15:17,  1.14it/s, loss=4.1949]


Context: The President

Generated text: The President 's office has been set up to the Senate 's parliamentary committee . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' . 's . ' . 's . ' .



Epoch 1/1:  26%|██▋       | 45325/172148 [1:18:02<3:16:03, 10.78it/s, loss=4.0878]


After 200064 examples, Average Loss: 4.1393



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:06, 18.86it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:09, 18.51it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:06, 18.77it/s][A
                                                            [A


Validation Average Loss: 4.1040, Perplexity: 60.59

Context: Moscow

Generated text: Moscow has been accused of trying to tackle the attack . ' '' 's statement . ' '' . ' '' ' . ' I 'm not sure what 's happening . ' . 's . 's . 's . ' . '


Context: New York

Generated text: New Yorkers 's first game of the game was the first time the game was to be the first of the season . ' '' . ' '' ' . ' I . ' . ' . ' . ' . 's . 's . '


Context: A hurricane




Epoch 1/1:  26%|██▋       | 45328/172148 [1:18:06<20:21:03,  1.73it/s, loss=4.1018]


Context: The President

Generated text: The President of the United States , which has been in the past , has been in the past . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . 's . 's . 's . ' . '



Epoch 1/1:  27%|██▋       | 46889/172148 [1:20:44<4:20:18,  8.02it/s, loss=4.2479]


After 200064 examples, Average Loss: 4.1348



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:33, 12.82it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:27, 13.14it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:24, 13.28it/s][A
                                                            [A


Validation Average Loss: 4.1088, Perplexity: 60.88

Context: Moscow

Generated text: Moscow has been accused of plotting to kill a terrorist organization . ' '' . ' '' . ' '' ' Ahmadinejad said . ' '' . ' '' . 's . ' . 's . ' . ' . '


Context: New York

Generated text: New York City : The ##-year-old was arrested on suspicion of murdering his wife , ## , and ##-year-old son . ' '' . ' '' . ' . ' . 's . 's . 's .


Context: A hurricane

Generated text: A hurricane in the area is expected to be seen in the area . ' '' , he said . ' '' . ' '' . ' '' . ' . ' . ' . ' '' ' . 's . ' . ' . ' .



Epoch 1/1:  27%|██▋       | 46891/172148 [1:20:48<34:43:20,  1.00it/s, loss=4.0530]


Context: The President

Generated text: The President 's office said the government would not allow the government to be able to provide a `` thorough investigation '' . '' ' . ' '' . ' '' . ' '' . ' '' . 's . ' . '## 's . '



Epoch 1/1:  28%|██▊       | 48451/172148 [1:23:26<3:11:48, 10.75it/s, loss=4.1523]


After 200064 examples, Average Loss: 4.1315



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:09, 18.58it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:05, 18.94it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:06, 18.78it/s][A
                                                            [A


Validation Average Loss: 4.0984, Perplexity: 60.24

Context: Moscow

Generated text: Moscow , which is the first time the country 's nuclear power plant is a .##-caliber-propelled grenades . ' '' . ' . ' . ' . ' . 's a <rares . 's most .


Context: New York

Generated text: New York City 's most expensive , but the most expensive hotel is a big , and it 's a bit of a bit of a bit of a bit of a bit of a bit of a bit of the world . 's . 's


Context: A hurricane




Epoch 1/1:  28%|██▊       | 48454/172148 [1:23:30<21:38:30,  1.59it/s, loss=4.1188]


Context: The President

Generated text: The President 's office said the decision was `` not to be made '' . '' '' . ' '' . ' '' 'The . ' '' ' I 'm not sure how the situation is going to be a `` . 's . 's



Epoch 1/1:  29%|██▉       | 50015/172148 [1:26:08<4:04:45,  8.32it/s, loss=4.1238]


After 200064 examples, Average Loss: 4.1272



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:06, 18.86it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:01, 19.30it/s][A
Evaluating:   0%|          | 6/3514 [00:00<02:59, 19.55it/s][A
                                                            [A


Validation Average Loss: 4.1020, Perplexity: 60.46

Context: Moscow

Generated text: Moscow has been accused of a 'little ' of ' a 'picious ' and 'pouring ' . ' '' ' . ' ' I 'm not sure . ' . 's . ' . ' . 's . ' .


Context: New York

Generated text: New York City : The ##-year-old , who was arrested in #### , was arrested in #### . ' '' 'The . ' '' ' I 'm not sure . ' '' . ' and I 's . 's . 's


Context: A hurricane




Epoch 1/1:  29%|██▉       | 50017/172148 [1:26:12<31:27:14,  1.08it/s, loss=4.2218]


Context: The President

Generated text: The President 's office said the decision to be made to the House of Representatives . '' 'The . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . ' .



Epoch 1/1:  30%|██▉       | 51577/172148 [1:28:49<3:09:40, 10.59it/s, loss=4.1736]


After 200064 examples, Average Loss: 4.1250



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:08, 18.58it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:02, 19.28it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:05, 18.90it/s][A
                                                            [A


Validation Average Loss: 4.1014, Perplexity: 60.43

Context: Moscow

Generated text: Moscow has a long-range missile strike against the separatists . '' ' . ' '' . ' '' . ' '' . ' '' ' Ahmadinejad , the statement said . 's . 's . ' . '


Context: New York

Generated text: New York City Police Chief Mark Duggan said : 'The victim was not injured . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's Daily . 's . 's . '


Context: A hurricane

Generated text: A hurricane center is expected to be seen in the city of <rare> , the state 's largest city in the city of <rare> . ' '' . ' . ' '' . ' . 's . ' . ' . '



Epoch 1/1:  30%|██▉       | 51580/172148 [1:28:54<21:55:35,  1.53it/s, loss=4.0461]


Context: The President

Generated text: The President 's office said the decision was `` not to be made '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 'The . 's . 's . 's . '



Epoch 1/1:  31%|███       | 53141/172148 [1:31:32<4:09:28,  7.95it/s, loss=4.1070]


After 200064 examples, Average Loss: 4.1202



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:28, 13.08it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:23, 13.34it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:04, 14.35it/s][A
                                                            [A


Validation Average Loss: 4.0970, Perplexity: 60.16

Context: Moscow

Generated text: Moscow has been a key part of the Ukraine crisis since #### . '' ' '' . ' '' 'The . ' '' ' I 'll be a 'fair . ' '' ' . ' . ' . ' . ' . ' . ' .


Context: New York

Generated text: New York City , which is the first time the country 's economy has been in the past year . ' '' ' . ' '' ' . ' '' . ' '' . ' '' . ' the . 's . 's 's most .


Context: A hurricane




Epoch 1/1:  31%|███       | 53143/172148 [1:31:36<31:20:17,  1.05it/s, loss=4.0425]


Context: The President

Generated text: The President 's office said the incident was `` not a matter of time '' . '' 'The . ' '' . ' '' . ' '' . ' '' ' I 'm not sure what happened . ' . ' . 's . ' .



Epoch 1/1:  32%|███▏      | 54704/172148 [1:34:12<3:02:56, 10.70it/s, loss=4.1689]


After 200064 examples, Average Loss: 4.1215



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:11, 18.30it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:17, 17.73it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:11, 18.36it/s][A
                                                            [A


Validation Average Loss: 4.0916, Perplexity: 59.84

Context: Moscow

Generated text: Moscow has said it has been `` a long-standing '' . '' ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . 's not


Context: New York

Generated text: New York 's `` The WHO '' is a `` <rare> '' , which is the first time in the United States , the United States and the United States . '' ] . '' ] . '' . '' 's . 'The .


Context: A hurricane




Epoch 1/1:  32%|███▏      | 54706/172148 [1:34:17<23:40:29,  1.38it/s, loss=4.1537]


Context: The President

Generated text: The President 's office said the two sides had been `` in the right direction '' . '' 'The . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . '



Epoch 1/1:  33%|███▎      | 56267/172148 [1:36:55<4:07:03,  7.82it/s, loss=4.0584]


After 200064 examples, Average Loss: 4.1145



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:34, 12.79it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:28, 13.09it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:18, 13.59it/s][A
                                                            [A


Validation Average Loss: 4.0888, Perplexity: 59.67

Context: Moscow

Generated text: Moscow has been criticised for its efforts to protect the country 's interests of the country . '' ' . ' '' ' . ' '' . ' '' . ' '' . ' '' . 's . ' . ' . ' . ' . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company had been working on the issue . ' '' . ' '' ' I 'm not sure if it 's not a bad thing . ' '' . ' '' . ' . 's . '


Context: A hurricane




Epoch 1/1:  33%|███▎      | 56269/172148 [1:37:00<36:33:44,  1.14s/it, loss=3.9570]


Context: The President

Generated text: The President 's office has been in the past , but the government has not yet commented on the allegations . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 's . 's . '



Epoch 1/1:  34%|███▎      | 57829/172148 [1:39:38<3:03:50, 10.36it/s, loss=4.0232]


After 200064 examples, Average Loss: 4.1124



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:15, 17.92it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:08, 18.59it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:05, 18.91it/s][A
                                                            [A


Validation Average Loss: 4.0891, Perplexity: 59.69

Context: Moscow

Generated text: Moscow has been in the region of the country 's population . '' 's report . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' 's . 's . ' . ' . 's ' . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said : ' I 'm not going to be a good man . ' '' . ' '' . ' '' ' . ' '' ' . ' '' . ' . ' . 's . 's . '


Context: A hurricane




Epoch 1/1:  34%|███▎      | 57832/172148 [1:39:43<19:01:03,  1.67it/s, loss=4.1818]


Context: The President

Generated text: The President 's office said the `` unacceptable '' of the `` unacceptable '' . '' `` . '' '' that the government 's `` is a `` <rare> '' . '' . '' . '' . 's . ' .



Epoch 1/1:  35%|███▍      | 59392/172148 [1:42:21<2:56:41, 10.64it/s, loss=4.2125]


After 200064 examples, Average Loss: 4.1108



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:15, 17.98it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:08, 18.60it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.13it/s][A
                                                            [A


Validation Average Loss: 4.0856, Perplexity: 59.48

Context: Moscow

Generated text: Moscow has been criticised by the government for failing to provide a safe and secure place to be a key member of the European Union . '' ' . ' '' ' . ' '' . ' . 's . ' . 's . ' . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the `` very few '' was a `` big step '' . '' '' . ' '' . ' '' . ' '' . ' '' ' '' . ' `` . ' '' . ' '' . 's .


Context: A hurricane

Generated text: A hurricane in the area is expected to be in the area where the water is expected to be damaged . '' ' '' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . ' . ' .



Epoch 1/1:  35%|███▍      | 59395/172148 [1:42:25<18:53:05,  1.66it/s, loss=4.0904]


Context: The President

Generated text: The President 's office said the `` very few '' was a `` very difficult '' . '' ' '' . ' '' 'The . ' '' ' . ' '' ' . ' '' 's . 's . 's . 's . '



Epoch 1/1:  35%|███▌      | 60956/172148 [1:45:03<2:58:40, 10.37it/s, loss=4.0565]


After 200064 examples, Average Loss: 4.1093



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:06, 18.79it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:07, 18.75it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:03, 19.10it/s][A
                                                            [A


Validation Average Loss: 4.0811, Perplexity: 59.21

Context: Moscow

Generated text: Moscow 's Foreign Minister Sergei Lavrov said the government had not yet been formally identified . '' . ' '' . ' '' . ' '' said the ##-year-old . ' `` . '' . '' . 's . '' . '


Context: New York

Generated text: New York City Police said the man was arrested and charged with a criminal investigation . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' Police Chief Robert Gibson said . 's . 's rider 's '


Context: A hurricane




Epoch 1/1:  35%|███▌      | 60958/172148 [1:45:08<22:18:37,  1.38it/s, loss=4.1219]


Context: The President

Generated text: The President 's office said the government had not been informed . '' 's statement . ' '' the report said . 'The . ' '' . ' '' . ' `` . ' '' . 's . 's . 's . 's



Epoch 1/1:  36%|███▋      | 62518/172148 [1:47:46<2:51:46, 10.64it/s, loss=4.1878]


After 200064 examples, Average Loss: 4.1035



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:34, 16.35it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:20, 17.55it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:10, 18.45it/s][A
                                                            [A


Validation Average Loss: 4.0814, Perplexity: 59.23

Context: Moscow

Generated text: Moscow has been accused of `` terrorist '' and `` a terrorist attack . '' '' . ' '' . ' '' 'The . ' '' ' . ' '' ' '' . ' '' 's . ' . ' . 's . ' .


Context: New York

Generated text: New York City Mayor Bill Clinton said the `` unprecedented '' of the `` <rare> '' and `` a `` <rare> '' . '' '' . '' '' . '' '' . '' . '' . '' '' . '' . ''


Context: A hurricane




Epoch 1/1:  36%|███▋      | 62521/172148 [1:47:50<17:58:56,  1.69it/s, loss=4.2966]


Context: The President

Generated text: The President 's office said the government had `` no idea '' . ' '' `` the `` <rare> '' . '' '' . ' '' . ' '' ' '' . ' '' 's `` The same . 's `` a `` a ``



Epoch 1/1:  37%|███▋      | 64081/172148 [1:50:28<2:48:40, 10.68it/s, loss=4.0990]


After 200064 examples, Average Loss: 4.1003



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:06, 18.86it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:03, 19.15it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:19, 17.58it/s][A
                                                            [A


Validation Average Loss: 4.0873, Perplexity: 59.58

Context: Moscow

Generated text: Moscow has been accused of failing to deal with the deal . ' '' ' . ' '' ' . ' '' ' . ' '' ' Ahmadinejad said . ' '' 's . ' . 's 's ' . ' .


Context: New York

Generated text: New York City , ## , was arrested in the capital of the city 's capital of the city 's capital , where the police were called to the scene . ' '' . ' '' . ' . ' . ' . 's . 's


Context: A hurricane

Generated text: A hurricane has been seen in the area , which is expected to be seen in the area . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . ' . '



Epoch 1/1:  37%|███▋      | 64084/172148 [1:50:33<21:40:41,  1.38it/s, loss=4.2184]


Context: The President

Generated text: The President has been in the past two years , and has been a very good person . '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' 's . ' . 's . ' . '



Epoch 1/1:  38%|███▊      | 65644/172148 [1:53:11<2:44:09, 10.81it/s, loss=4.0811]


After 200064 examples, Average Loss: 4.0988



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:11, 18.30it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:05, 18.95it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:09, 18.51it/s][A
                                                            [A


Validation Average Loss: 4.0757, Perplexity: 58.89

Context: Moscow

Generated text: Moscow has been targeted by the rebels . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . ' . ' .


Context: New York

Generated text: New York , the largest number of people in the world , said the . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . ' `` . 's ##-year-ren of .


Context: A hurricane




Epoch 1/1:  38%|███▊      | 65647/172148 [1:53:15<17:12:46,  1.72it/s, loss=3.9819]


Context: The President

Generated text: The President has been a part of the country 's political campaign . '' 's decision to be made . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . '### . 's '



Epoch 1/1:  39%|███▉      | 67208/172148 [1:55:53<3:40:40,  7.93it/s, loss=4.0752]


After 200064 examples, Average Loss: 4.0957



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:42, 12.42it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:38, 12.60it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:36, 12.68it/s][A
                                                            [A


Validation Average Loss: 4.0767, Perplexity: 58.95

Context: Moscow

Generated text: Moscow has been accused of killing the U.S. and its .###-### . ' '' . ' '' . ' '' . ' '' . ' . ' . ' . 's . ' . 's 's ' . '


Context: New York

Generated text: New York City : The ##-year-old was killed in the crash . ' '' 's ##-year-old son , who was killed in the crash . ' '' 's . 's . ' . ' . 's . '


Context: A hurricane




Epoch 1/1:  39%|███▉      | 67210/172148 [1:55:58<30:45:15,  1.06s/it, loss=4.1438]


Context: The President

Generated text: The President 's office said the decision was `` not to be made '' . ' '' . ' '' . ' '' . ' '' . ' '' 's . ' '' . ' '' . 's . 's . 's . 's



Epoch 1/1:  40%|███▉      | 68770/172148 [1:58:36<2:42:55, 10.58it/s, loss=4.2020]


After 200064 examples, Average Loss: 4.0973



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:42, 15.80it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:21, 17.41it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:15, 17.95it/s][A
                                                            [A


Validation Average Loss: 4.0736, Perplexity: 58.77

Context: Moscow

Generated text: Moscow has been accused of the attack , saying he was not a member of the .##-caliber pistol . ' '' ' . ' '' ' . ' '' ' . ' . 's . ' . 's 's ' . '


Context: New York

Generated text: New York City Mayor Bill de Blasio said the ##-year-old was `` a great dealer '' . '' '' . ' '' ' '' . ' '' 's ##-year-based . ' '' . 's Daily . '


Context: A hurricane




Epoch 1/1:  40%|███▉      | 68773/172148 [1:58:41<18:39:16,  1.54it/s, loss=4.0845]


Context: The President

Generated text: The President 's office has been working with the U.S. Department of Agriculture , which has a strong understanding of the U.S. and the United States . ' '' 's . 's . 's . 's . '



Epoch 1/1:  41%|████      | 70333/172148 [2:01:20<3:33:34,  7.95it/s, loss=4.0842]


After 200064 examples, Average Loss: 4.0930



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:10, 18.44it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:05, 18.88it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:08, 18.65it/s][A
                                                            [A


Validation Average Loss: 4.0613, Perplexity: 58.05

Context: Moscow

Generated text: Moscow has been a key part of the country 's nuclear programme . '' 's . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's a key part of the first . 's


Context: New York

Generated text: New York City , New York , and the New York Times , are not the first time the state has been in the state of the state . '' ' . ' '' . ' . ' . 's . 's . 's . ' .


Context: A hurricane




Epoch 1/1:  41%|████      | 70336/172148 [2:01:24<22:28:06,  1.26it/s, loss=4.0670]


Context: The President

Generated text: The President 's office said the government had `` no intention of harm '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . 's . '



Epoch 1/1:  42%|████▏     | 71896/172148 [2:04:02<2:35:02, 10.78it/s, loss=4.0568]


After 200064 examples, Average Loss: 4.0908



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:20, 17.50it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:15, 17.98it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:13, 18.10it/s][A
                                                            [A


Validation Average Loss: 4.0675, Perplexity: 58.41

Context: Moscow

Generated text: Moscow 's nuclear weapons program is not only to be used in the war . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . 's . '


Context: New York

Generated text: New York City Police said the man was arrested and the man was arrested . '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's 's 's


Context: A hurricane

Generated text: A hurricane season is the first time the plane crashed into the sea . '' ' . ' '' , according to the National Weather Service . ' '' . ' . ' '' . ' '' . 's new . ' . 's



Epoch 1/1:  42%|████▏     | 71899/172148 [2:04:07<18:55:20,  1.47it/s, loss=4.1808]


Context: The President

Generated text: The President 's office said the government had `` no involvement '' . ' '' and `` we are not going to be a very good deal . '' '' . ' '' . ' '' . 's new . 's new . '' 's



Epoch 1/1:  43%|████▎     | 73460/172148 [2:06:45<2:36:10, 10.53it/s, loss=4.0999]


After 200064 examples, Average Loss: 4.0900



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:11, 18.32it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:08, 18.65it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:06, 18.78it/s][A
                                                            [A


Validation Average Loss: 4.0693, Perplexity: 58.52

Context: Moscow

Generated text: Moscow has been accused of failing to take place in the country 's capital . '' ' . ' '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's the way to . ' . '


Context: New York

Generated text: New York City , which is the first time in the world , is a .##-caliber pistol , and is a ##-year-old . ' '' . ' '' . ' . 's . 's new . ' . '


Context: A hurricane




Epoch 1/1:  43%|████▎     | 73462/172148 [2:06:49<18:50:08,  1.46it/s, loss=3.9817]


Context: The President

Generated text: The President 's office said the government had been `` inappropriate '' . ' '' . ' '' . ' '' ' `` The . ' '' . ' '' . ' '' . 's . 'Tard said . 's 's



Epoch 1/1:  44%|████▎     | 75023/172148 [2:09:28<3:09:41,  8.53it/s, loss=4.0911]


After 200064 examples, Average Loss: 4.0877



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:51, 12.05it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:50, 12.08it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:49, 12.10it/s][A
                                                            [A


Validation Average Loss: 4.0686, Perplexity: 58.48

Context: Moscow

Generated text: Moscow has been criticized for its efforts to protect the country 's economy . '' 's ##-year-old . ' '' . ' '' . ' '' . ' '' . 's . 's not . 's the first . '


Context: New York

Generated text: New York City , the first to be the first major winner of the year , is expected to be the first major winner . '' 's ##-year-old . ' '' . ' . ' . 's . 's ##-en '


Context: A hurricane




Epoch 1/1:  44%|████▎     | 75025/172148 [2:09:32<28:36:50,  1.06s/it, loss=4.1969]


Context: The President

Generated text: The President has been criticized for the way the government has been working with the United States . '' 's ##-year-old former President . ' '' . ' '' . ' . 's . ' . 't 's very close to



Epoch 1/1:  44%|████▍     | 76585/172148 [2:12:11<2:32:06, 10.47it/s, loss=4.0024]


After 200064 examples, Average Loss: 4.0856



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:18, 17.70it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.38it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:17, 17.74it/s][A
                                                            [A


Validation Average Loss: 4.0627, Perplexity: 58.13

Context: Moscow

Generated text: Moscow has been a long-time role in the search for the .###-year-old . ' '' . ' '' ' A-listed . ' '' . ' . ' . 's a ' . 's . ' . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company had been working with the company to help the company 's business . ' '' . ' '' . ' '' . ' '' . 's National . ' . 's Times . 's .


Context: A hurricane




Epoch 1/1:  44%|████▍     | 76588/172148 [2:12:16<20:20:45,  1.30it/s, loss=3.9910]


Context: The President

Generated text: The President 's office said the government had been working on the investigation . ' '' . ' '' ' I 'm sure he 's a very good person . ' '' . ' '' . 's . ' . 's . 's .



Epoch 1/1:  45%|████▌     | 78148/172148 [2:14:56<2:29:36, 10.47it/s, loss=4.0693]


After 200064 examples, Average Loss: 4.0848



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:16, 17.87it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:19, 17.63it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:16, 17.90it/s][A
                                                            [A


Validation Average Loss: 4.0603, Perplexity: 57.99

Context: Moscow

Generated text: Moscow has been accused of trying to block the .###-acre . ' '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' . ' '' 's . 's . 's out 's . ' .


Context: New York

Generated text: New York 's first-ever-of-three , the first person to be named , was aired in #### . ' '' . ' '' ' I 'm a 's . ' . ' . ' . ' . 's . '


Context: A hurricane




Epoch 1/1:  45%|████▌     | 78151/172148 [2:15:00<15:15:34,  1.71it/s, loss=4.1249]


Context: The President

Generated text: The President 's office said the government had `` no intention of the attack . '' '' . ' '' . ' '' . ' '' . ' '' ' . ' '' . ' '' . 's . 's . 's . 's .



Epoch 1/1:  46%|████▋     | 79712/172148 [2:17:39<3:08:10,  8.19it/s, loss=4.1105]


After 200064 examples, Average Loss: 4.0819



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:29, 13.02it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:32, 12.90it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:30, 12.96it/s][A
                                                            [A


Validation Average Loss: 4.0615, Perplexity: 58.06

Context: Moscow

Generated text: Moscow 's government has been accused of being a `` very dangerous '' and that the government has been working on the site . ' '' . ' '' ' . ' '' ' . ' '' 's . 's . 's . 't .


Context: New York

Generated text: New York City 's <rare> <rare> , a former U.S. Army , was arrested in the city of <rare> , in the United States . ' . ' . ' . ' . ' . ' . ' . '


Context: A hurricane

Generated text: A hurricane has been hit by a storm in the south of the city of <rare> , which is expected to be seen in the region . ' '' . ' . ' '' . ' . 's spokesman . 's .



Epoch 1/1:  46%|████▋     | 79714/172148 [2:17:44<27:23:52,  1.07s/it, loss=4.0976]


Context: The President

Generated text: The President 's office said the government would not be able to confirm the details of the alleged plot . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 's . 's . 's



Epoch 1/1:  47%|████▋     | 81275/172148 [2:20:23<2:21:38, 10.69it/s, loss=4.0248]


After 200064 examples, Average Loss: 4.0828



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:32, 16.51it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:16, 17.90it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:09, 18.53it/s][A
                                                            [A


Validation Average Loss: 4.0516, Perplexity: 57.49

Context: Moscow

Generated text: Moscow has been accused of 'inappropriate ' and 'pilot ' of the 'sexual ' . ' '' ' and said it was a . ' . ' . ' . ' . ' . ' . ' . ' . '


Context: New York

Generated text: New York City Police said the ##-year-old was arrested and charged with attempted murder . '' 'The .##-caliber handgun . ' '' . ' '' . ' Ips said . 's . 's police . 's


Context: A hurricane




Epoch 1/1:  47%|████▋     | 81277/172148 [2:20:27<17:32:48,  1.44it/s, loss=4.1439]


Context: The President

Generated text: The President 's office said the ##-year-old was `` a very good man '' and he was `` a great friend of the world 's most important man . '' '' . '' '' . '' '' . 's . '' '' .



Epoch 1/1:  48%|████▊     | 82838/172148 [2:23:07<2:43:16,  9.12it/s, loss=4.0631]


After 200064 examples, Average Loss: 4.0785



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:17, 17.79it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:10, 18.39it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:10, 18.41it/s][A
                                                            [A


Validation Average Loss: 4.0605, Perplexity: 58.00

Context: Moscow

Generated text: Moscow has been a major blow to the United States since #### . ' '' 's release . ' '' ' . ' '' ' . ' . ' . ' . ' . ' . ' . 's . ' . ' . ' . ' .


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company had been `` a great deal of the way '' . '' ' . ' '' . ' '' . ' '' 's report . ' . ' . ' . ' . 's . 's


Context: A hurricane




Epoch 1/1:  48%|████▊     | 82840/172148 [2:23:11<20:12:31,  1.23it/s, loss=4.1401]


Context: The President

Generated text: The President 's office said the government had `` notified the government of the United States . '' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' I-of . 's . 's first . '



Epoch 1/1:  49%|████▉     | 84401/172148 [2:25:49<2:15:38, 10.78it/s, loss=4.1898]


After 200064 examples, Average Loss: 4.0765



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:42, 15.80it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:19, 17.62it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:12, 18.22it/s][A
                                                            [A


Validation Average Loss: 4.0542, Perplexity: 57.64

Context: Moscow

Generated text: Moscow 's Foreign Ministry said it was `` a very sad day for the country 's terrorists '' . ' '' . ' '' . ' '' . ' '' . ' . ' . 's . 's . 's . 's .


Context: New York

Generated text: New York City : The ##-year-old has been charged with murder and has been charged with murder . ' '' . ' '' 's court heard . ' '' . ' . ' . 's Daily . 's . 's . '


Context: A hurricane




Epoch 1/1:  49%|████▉     | 84403/172148 [2:25:55<20:56:19,  1.16it/s, loss=3.9455]


Context: The President

Generated text: The President has said he has not been able to speak to the media . ' '' . ' '' . ' . ' . ' '' . ' . ' . ' '' . ' . ' . 's . 's been the case 's .



Epoch 1/1:  50%|████▉     | 85964/172148 [2:28:34<2:15:25, 10.61it/s, loss=4.0296]


After 200064 examples, Average Loss: 4.0758



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:16, 17.90it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:10, 18.39it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:09, 18.56it/s][A
                                                            [A


Validation Average Loss: 4.0539, Perplexity: 57.62

Context: Moscow

Generated text: Moscow has been accused of `` a terrorist attack '' in the past . ' '' and `` a very serious threat to the country . '' ' '' . ' '' . ' '' . ' . 's . ' . 's ' . ' .


Context: New York

Generated text: New York City 's presidential election is a major political party , and the Republican Party has been criticised for the `` unprecedented '' . ' '' . ' '' . ' '' . ' . 's . 's . 's


Context: A hurricane




Epoch 1/1:  50%|████▉     | 85966/172148 [2:28:38<16:31:26,  1.45it/s, loss=4.2023]


Context: The President

Generated text: The President 's office said the decision to `` get a lot of money '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' `` . ' '' . 's . '## 's . '



Epoch 1/1:  51%|█████     | 87527/172148 [2:31:17<2:12:20, 10.66it/s, loss=4.1174]


After 200064 examples, Average Loss: 4.0764



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:39, 16.01it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:20, 17.46it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:15, 17.97it/s][A
                                                            [A


Validation Average Loss: 4.0475, Perplexity: 57.25

Context: Moscow

Generated text: Moscow has been accused of a 'incredible ' and 'very . ' '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . 's 's


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company had been working with the company to investigate the incident . '' 's report . ' '' . ' '' . ' '' . ' '' . 's Daily News . 's Daily . 's


Context: A hurricane




Epoch 1/1:  51%|█████     | 87529/172148 [2:31:22<20:19:47,  1.16it/s, loss=4.0637]


Context: The President

Generated text: The President has been criticised for the fact that the government has been in the past . '' ' . ' '' ' . ' '' . ' '' . ' '' . ' '' . ' `` The . 's . 's . ' . '



Epoch 1/1:  52%|█████▏    | 89090/172148 [2:34:01<2:13:22, 10.38it/s, loss=4.0814]


After 200064 examples, Average Loss: 4.0738



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:13, 18.15it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.37it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:23, 17.28it/s][A
                                                            [A


Validation Average Loss: 4.0478, Perplexity: 57.27

Context: Moscow

Generated text: Moscow 's missile missile strike was not immediately clear whether the plane had been delayed . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . 's spokesman said . 's . ' . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company 's decision was `` a very important step forward '' . '' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's Times . 's


Context: A hurricane




Epoch 1/1:  52%|█████▏    | 89092/172148 [2:34:05<16:46:55,  1.37it/s, loss=4.1184]


Context: The President

Generated text: The President 's office said the government had been `` inappropriate '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's new . ' .



Epoch 1/1:  53%|█████▎    | 90653/172148 [2:36:45<2:10:36, 10.40it/s, loss=4.0586]


After 200064 examples, Average Loss: 4.0726



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:13, 18.10it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.35it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:10, 18.38it/s][A
                                                            [A


Validation Average Loss: 4.0399, Perplexity: 56.82

Context: Moscow

Generated text: Moscow has been a major threat to the country 's economy . '' ' . ' '' ' . ' '' ' I 'm not sure what 's happening . ' '' ' . 's . ' . 's new . ' . ' .


Context: New York

Generated text: New York City : The ##-year-old has been in the past two years . ' '' , and he 's now-manager of the United States . ' '' . ' . ' . ' . ' . 's . 's .


Context: A hurricane




Epoch 1/1:  53%|█████▎    | 90655/172148 [2:36:50<18:46:12,  1.21it/s, loss=4.0558]


Context: The President

Generated text: The President 's office said the `` unacceptable '' of the `` `` `` a very serious situation '' . '' '' . '' '' . ' '' . ' '' . ' '' . 's Aman . ' . 's <rares



Epoch 1/1:  54%|█████▎    | 92215/172148 [2:39:30<2:05:41, 10.60it/s, loss=4.0635]


After 200064 examples, Average Loss: 4.0703



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:19, 17.65it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.36it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:18, 17.70it/s][A
                                                            [A


Validation Average Loss: 4.0431, Perplexity: 57.00

Context: Moscow

Generated text: Moscow 's government has been accused of `` a long-running terrorist attack '' . ' '' . ' '' ' I 'm not sure what 's going on . ' '' . ' . 's . 's . 's . '


Context: New York

Generated text: New York City 's ##-year-old boy , who was a member of the public , said he was `` a very intelligent man . '' ' '' . ' '' . ' . ' . ' . ' . ' . ' . '


Context: A hurricane




Epoch 1/1:  54%|█████▎    | 92218/172148 [2:39:34<13:05:55,  1.70it/s, loss=4.0944]


Context: The President

Generated text: The President 's office said the `` unfortunate '' `` is the first time a few people are in the process of being a `` very difficult '' . '' '' . '' ' '' . 's I-barers . 's `` a



Epoch 1/1:  54%|█████▍    | 93778/172148 [2:42:13<2:03:38, 10.56it/s, loss=4.0649]


After 200064 examples, Average Loss: 4.0692



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:10, 18.42it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:08, 18.65it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:05, 18.90it/s][A
                                                            [A


Validation Average Loss: 4.0426, Perplexity: 56.97

Context: Moscow

Generated text: Moscow has been a major step towards the end of the war . '' ' . ' '' ' . ' '' ' . ' '' . ' '' . ' '' . ' . ' . 's . 'no a big stepd . ' . '


Context: New York

Generated text: New York City Police Chief Michael Gove said the decision was `` not to be made '' . '' ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' `` . 's Secret Service . 's '' .


Context: A hurricane




Epoch 1/1:  54%|█████▍    | 93781/172148 [2:42:19<15:40:01,  1.39it/s, loss=4.0785]


Context: The President

Generated text: The President 's office has been criticised by the government for failing to address the issue . '' ' . ' '' ' . ' '' . ' '' . ' '' . ' . ' . 's . 's . 's . 's



Epoch 1/1:  55%|█████▌    | 95341/172148 [2:44:58<2:00:36, 10.61it/s, loss=4.0387]


After 200064 examples, Average Loss: 4.0652



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:23, 17.23it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:34, 16.37it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:24, 17.15it/s][A
                                                            [A


Validation Average Loss: 4.0435, Perplexity: 57.02

Context: Moscow

Generated text: Moscow has been a key part of the European Union 's nuclear programme . '' 's report . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 's not a key . 's key .


Context: New York

Generated text: New York City Mayor Bill de Blasio said the party was `` not a matter of time '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's Times , a


Context: A hurricane




Epoch 1/1:  55%|█████▌    | 95344/172148 [2:45:02<12:32:00,  1.70it/s, loss=4.1170]


Context: The President

Generated text: The President 's office said the government had `` no immediate comment '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's .###-age '



Epoch 1/1:  56%|█████▋    | 96905/172148 [2:47:43<2:06:20,  9.93it/s, loss=4.0682]


After 200064 examples, Average Loss: 4.0692



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:20, 17.50it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.29it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:15, 17.97it/s][A
                                                            [A


Validation Average Loss: 4.0382, Perplexity: 56.72

Context: Moscow

Generated text: Moscow has been accused of being a member of the military and the United States . '' ' . ' '' . ' '' . ' '' . ' . ' '' . ' . ' . 's . ' . ' a ' . ' . 's


Context: New York

Generated text: New York City Police said the incident was not immediately available to the police . ' '' . ' '' . ' '' . ' '' . ' . ' '' . ' . ' . ' . 's . 's . 's decision 's decision


Context: A hurricane




Epoch 1/1:  56%|█████▋    | 96907/172148 [2:47:47<15:12:30,  1.37it/s, loss=4.2387]


Context: The President

Generated text: The President 's office said the government was `` very disappointed '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . 's



Epoch 1/1:  57%|█████▋    | 98467/172148 [2:50:27<1:56:25, 10.55it/s, loss=3.9815]


After 200064 examples, Average Loss: 4.0672



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:18, 17.72it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:14, 18.03it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:13, 18.09it/s][A
                                                            [A


Validation Average Loss: 4.0358, Perplexity: 56.59

Context: Moscow

Generated text: Moscow has been a major issue in the region , with the U.S. government . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 'no . ' . 's ' .


Context: New York

Generated text: New York City 's most famous movie star , the first to be the first of the year 's film . ' '' . ' '' . ' '' . ' . ' . ' '' . ' . ' . ' . ' . 's .


Context: A hurricane




Epoch 1/1:  57%|█████▋    | 98470/172148 [2:50:32<12:42:35,  1.61it/s, loss=4.0315]


Context: The President

Generated text: The President 's office said the government had `` no intention of any wrongdoing '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . '



Epoch 1/1:  58%|█████▊    | 100031/172148 [2:53:12<1:54:00, 10.54it/s, loss=3.9795]


After 200064 examples, Average Loss: 4.0636



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:18, 17.69it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:10, 18.47it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:10, 18.44it/s][A
                                                            [A


Validation Average Loss: 4.0403, Perplexity: 56.84

Context: Moscow

Generated text: Moscow has been a key target for the United States . '' ' . ' '' ' . ' '' ' Aaron Hernandez said . ' '' . ' '' ' I 'd . ' . ' . ' and the way a key to .


Context: New York

Generated text: New York City 's most famous movie , and the most popular TV show , is the most popular feature in the world . ' '' . ' '' . ' '' . ' '' . ' `` . ' '' . ' . 's . 's


Context: A hurricane




Epoch 1/1:  58%|█████▊    | 100033/172148 [2:53:16<13:49:24,  1.45it/s, loss=4.0125]


Context: The President

Generated text: The President 's office said the government had notified the government . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 's 's



Epoch 1/1:  59%|█████▉    | 101594/172148 [2:55:55<2:03:08,  9.55it/s, loss=4.0631]


After 200064 examples, Average Loss: 4.0624



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:11, 13.95it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:59, 14.66it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:54, 14.94it/s][A
                                                            [A


Validation Average Loss: 4.0348, Perplexity: 56.53

Context: Moscow

Generated text: Moscow has been trying to re-elect the U.S. government in the past . '' 's ##-year-old . ' '' ' . ' '' . ' . ' . 's a ' . ' . ' . ' . '


Context: New York

Generated text: New York City 's most popular TV show , which is the most popular feature of the film , is the most popular feature of the film . ' '' . ' '' . ' . ' . ' . ' . ' . ' . 's .


Context: A hurricane




Epoch 1/1:  59%|█████▉    | 101596/172148 [2:56:00<15:44:26,  1.25it/s, loss=3.9700]


Context: The President

Generated text: The President 's office said the government had been working on the issue . '' 's . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's . 's 's



Epoch 1/1:  60%|█████▉    | 103157/172148 [2:58:40<1:51:21, 10.32it/s, loss=4.1036]


After 200064 examples, Average Loss: 4.0635



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:19, 17.61it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.29it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:12, 18.19it/s][A
                                                            [A


Validation Average Loss: 4.0339, Perplexity: 56.48

Context: Moscow

Generated text: Moscow denies the allegations of the alleged attack , but the alleged attack was not a 'inous ' . ' '' . ' . ' . ' . ' '' . ' . ' . ' . ' . 's .##-


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the new government would not be able to get the vote . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . ' '' . ' . 's Press


Context: A hurricane




Epoch 1/1:  60%|█████▉    | 103159/172148 [2:58:44<13:22:36,  1.43it/s, loss=4.0242]


Context: The President

Generated text: The President 's office said the government had been `` deeply concerned about the situation '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . ' . 's . ' . '



Epoch 1/1:  61%|██████    | 104720/172148 [3:01:25<1:48:58, 10.31it/s, loss=3.9921]


After 200064 examples, Average Loss: 4.0620



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:10, 18.39it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:14, 18.04it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:23, 17.22it/s][A
                                                            [A


Validation Average Loss: 4.0324, Perplexity: 56.40

Context: Moscow

Generated text: Moscow has been trying to reassure the United States that the U.S. has been in the region . '' ' . ' '' ' . ' '' ' . ' . ' . 's . ' . ' . ' . ' . '


Context: New York

Generated text: New York 's `` The FBI '' is the first person to be identified as a suspect . '' 'SJ . ' '' `` . '' ] . ' '' . ' '' 's . 's . '' 's . 's .


Context: A hurricane




Epoch 1/1:  61%|██████    | 104722/172148 [3:01:29<13:57:45,  1.34it/s, loss=4.1745]


Context: The President

Generated text: The President 's office said the government had `` been trying to establish the circumstances of the attack . '' '' . ' '' . ' '' ' `` . ' '' . ' '' . ' '' . 's . 's `` The Obes ''



Epoch 1/1:  62%|██████▏   | 106283/172148 [3:04:08<1:44:22, 10.52it/s, loss=4.0260]


After 200064 examples, Average Loss: 4.0600



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:16, 17.85it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:09, 18.56it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:09, 18.48it/s][A
                                                            [A


Validation Average Loss: 4.0258, Perplexity: 56.03

Context: Moscow

Generated text: Moscow has been accused of killing a ##-year-old woman and has been arrested in connection with the attack . ' '' . ' '' ' . ' '' ' . ' . ' . 's . ' . ' a ' . ' . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the `` very difficult '' of the investigation was `` a very serious situation '' . '' ' '' . ' '' ' I 'm not sure . ' '' . ' '' . 's Times . 's


Context: A hurricane




Epoch 1/1:  62%|██████▏   | 106285/172148 [3:04:13<15:29:31,  1.18it/s, loss=4.0769]


Context: The President

Generated text: The President 's office said the government was `` deeply concerned '' and `` no doubt '' that the government 's decision was `` to be a priority '' . '' ' '' . ' '' . 's . 's .##-in . '



Epoch 1/1:  63%|██████▎   | 107845/172148 [3:06:55<1:41:59, 10.51it/s, loss=4.0794]


After 200064 examples, Average Loss: 4.0584



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:20, 17.50it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:15, 17.93it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:12, 18.26it/s][A
                                                            [A


Validation Average Loss: 4.0280, Perplexity: 56.15

Context: Moscow

Generated text: Moscow has been accused of targeting the country 's nuclear ambassador to the United States . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . ' . 's a . ' .


Context: New York

Generated text: New York City Mayor Bill de Blasio said the company was `` not going to be a big deal for the first time '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's .


Context: A hurricane




Epoch 1/1:  63%|██████▎   | 107848/172148 [3:06:59<10:26:30,  1.71it/s, loss=3.9389]


Context: The President

Generated text: The President 's office said the government had `` no longer support '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's best . '



Epoch 1/1:  64%|██████▎   | 109409/172148 [3:09:39<2:10:54,  7.99it/s, loss=3.8662]


After 200064 examples, Average Loss: 4.0594



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:17, 13.61it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:24, 13.27it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:33, 12.81it/s][A
                                                            [A


Validation Average Loss: 4.0226, Perplexity: 55.85

Context: Moscow

Generated text: Moscow has been targeted by the Ukrainian government and the United States . '' ' . ' '' ' . ' '' ' . ' '' ' . ' '' ' : . ' '' 's . ' . ' a very important to be a .


Context: New York

Generated text: New York City Mayor Billing said the company had been `` committed to the workforce '' and `` a `` big deal '' . '' '' and `` a `` <rare> '' . '' '' . '' . '' . '' . '' . '' .


Context: A hurricane




Epoch 1/1:  64%|██████▎   | 109411/172148 [3:09:43<17:02:44,  1.02it/s, loss=4.0282]


Context: The President

Generated text: The President has been criticised for the way he is . '' ' he said . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' ' : . ' . 's said . 's 's out to be



Epoch 1/1:  64%|██████▍   | 110972/172148 [3:12:24<1:36:57, 10.52it/s, loss=3.9500]


After 200064 examples, Average Loss: 4.0553



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:20, 17.52it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:17, 17.78it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:13, 18.09it/s][A
                                                            [A


Validation Average Loss: 4.0265, Perplexity: 56.07

Context: Moscow

Generated text: Moscow has been a key part of the country 's nuclear program , which has been a key part of the country 's nuclear program . '' 's . ' '' . ' . 's . 's . 's new to be a .


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company was `` very pleased '' . '' '' . '' '' . '' '' `` The New York Times . '' '' . '' '' . '' '' . '' . '' . '' . '' . '' .


Context: A hurricane




Epoch 1/1:  64%|██████▍   | 110974/172148 [3:12:29<14:34:41,  1.17it/s, loss=4.1227]


Context: The President

Generated text: The President has been a key part of the country 's economy . '' 's newest presidential campaign . ' '' . ' '' . ' '' ' Ahmad . ' '' 's . 's .##-brook the first



Epoch 1/1:  65%|██████▌   | 112534/172148 [3:15:09<1:34:17, 10.54it/s, loss=4.1297]


After 200064 examples, Average Loss: 4.0570



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:16, 17.84it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.34it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:06, 18.76it/s][A
                                                            [A


Validation Average Loss: 4.0202, Perplexity: 55.71

Context: Moscow

Generated text: Moscow said it would be a `` significant step '' to the deal . ' '' . ' '' 's ##-year-old said . ' '' . ' '' . ' . ' '' . 's not to say . 's time to show


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company was `` very concerned '' that the company had been working with the company . '' 's new chief executive . ' '' . ' '' . 's . 's . 's Daily . '


Context: A hurricane




Epoch 1/1:  65%|██████▌   | 112537/172148 [3:15:13<9:35:38,  1.73it/s, loss=3.9791] 


Context: The President

Generated text: The President has been in the hands of the public . ' '' 's family . ' '' 's statement . ' '' 's . ' '' 's . ' '' ' . ' . 's . ' a .## .## a .



Epoch 1/1:  66%|██████▋   | 114098/172148 [3:17:54<1:55:09,  8.40it/s, loss=4.0602]


After 200064 examples, Average Loss: 4.0580



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:50, 12.08it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:34, 12.78it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:44, 12.32it/s][A
                                                            [A


Validation Average Loss: 4.0313, Perplexity: 56.34

Context: Moscow

Generated text: Moscow 's Foreign Ministry said it was `` very difficult '' to find out what happened . ' '' . ' '' 'S . ' '' 'S . ' '' . ' '' . ' . 's . 's not to say . 's


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the `` very difficult '' of the situation . ' '' . ' '' 'S . ' '' ' I 'm not sure what happened . ' '' . ' . ' '' . ' . 's .


Context: A hurricane




Epoch 1/1:  66%|██████▋   | 114100/172148 [3:17:59<17:20:49,  1.08s/it, loss=4.0789]


Context: The President

Generated text: The President 's office said the government was `` very concerned '' . ' '' . ' '' 'Southam said . ' '' . ' '' . ' '' . ' '' . ' . 's . 's new administration of the case was



Epoch 1/1:  67%|██████▋   | 115660/172148 [3:20:39<1:29:34, 10.51it/s, loss=4.0695]


After 200064 examples, Average Loss: 4.0566



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:17, 17.76it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:13, 18.17it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:10, 18.43it/s][A
                                                            [A


Validation Average Loss: 4.0245, Perplexity: 55.95

Context: Moscow

Generated text: Moscow has been criticised by the rebels , including the rebels , and the rebels have been fighting for the country . ' '' 's . ' '' ' . ' . ' . ' . ' . ' a ' . ' a .


Context: New York

Generated text: New York City Council spokesman said : 'The . ' '' ' I 'm sure the . ' '' ' I 'm sure the man was a 'sorry ' . ' . ' . ' . ' . ' I 's .


Context: A hurricane




Epoch 1/1:  67%|██████▋   | 115663/172148 [3:20:44<10:11:34,  1.54it/s, loss=4.2083]


Context: The President

Generated text: The President has been criticised by the government for supporting the government . ' '' 'South Council spokesman for the National Weather Service . ' '' 's . ' '' 's . ' a . 's 's a 's



Epoch 1/1:  68%|██████▊   | 117224/172148 [3:23:24<1:28:30, 10.34it/s, loss=4.1378]


After 200064 examples, Average Loss: 4.0546



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:25, 17.08it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:15, 17.99it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:11, 18.34it/s][A
                                                            [A


Validation Average Loss: 4.0245, Perplexity: 55.95

Context: Moscow

Generated text: Moscow has been accused of using the U.S. military to help the U.S. and the U.S. military . ' '' . ' '' . ' . ' . ' . ' . ' . ' a ' . ' . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said : `` The whole thing is that the government is not a good idea . '' ' '' . ' '' ' I 'm sure he 's best-style . ' . ' . ' . 's


Context: A hurricane




Epoch 1/1:  68%|██████▊   | 117226/172148 [3:23:28<10:35:44,  1.44it/s, loss=4.0085]


Context: The President

Generated text: The President has said he has been a `` very proud '' of the `` family of the world 's most senior citizens . '' ' '' . ' '' . ' '' . ' . ' . ' . ' . ' .## . ' a .



Epoch 1/1:  69%|██████▉   | 118787/172148 [3:26:10<1:55:58,  7.67it/s, loss=4.0253]


After 200064 examples, Average Loss: 4.0548



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:25, 13.22it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:27, 13.11it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:22, 13.38it/s][A
                                                            [A


Validation Average Loss: 4.0206, Perplexity: 55.74

Context: Moscow

Generated text: Moscow has been targeted by the U.S. military and the United States . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . ' . ' a ##- .##--


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the decision was `` very difficult '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 's Daily News . 's


Context: A hurricane




Epoch 1/1:  69%|██████▉   | 118789/172148 [3:26:14<15:14:14,  1.03s/it, loss=3.8876]


Context: The President

Generated text: The President 's office said the decision was `` very difficult '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's . 's new to be '



Epoch 1/1:  70%|██████▉   | 120349/172148 [3:28:55<1:21:33, 10.59it/s, loss=4.0493]


After 200064 examples, Average Loss: 4.0521



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:50, 15.25it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:27, 16.88it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:23, 17.20it/s][A
                                                            [A


Validation Average Loss: 4.0216, Perplexity: 55.79

Context: Moscow

Generated text: Moscow has been a major threat to the U.S. and the U.S. government has been in the region . '' ' . ' '' ' . ' '' 's . ' . ' . ' a . 's .## . '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the `` very important thing '' to do with the `` big '' . '' `` The New York Times . '' '' . '' ' '' . ' '' . ' . '' . '' . '' . 's


Context: A hurricane

Generated text: A hurricane is expected to be closed Friday . '' ' . ' '' 'S . ' '' 'S . ' '' 'S . ' '' 'S . ' '' 'S . ' '' 's . '## a #- a



Epoch 1/1:  70%|██████▉   | 120352/172148 [3:28:59<9:05:26,  1.58it/s, loss=4.1315] 


Context: The President

Generated text: The President 's office said the government had `` no reason to believe '' the `` toughest of the country 's history . '' ' '' . ' '' . ' '' . 'T . 's . ' . 's . ' .



Epoch 1/1:  71%|███████   | 121912/172148 [3:31:40<1:20:43, 10.37it/s, loss=3.9923]


After 200064 examples, Average Loss: 4.0496



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:51, 15.15it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:30, 16.69it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:21, 17.43it/s][A
                                                            [A


Validation Average Loss: 4.0212, Perplexity: 55.77

Context: Moscow

Generated text: Moscow has been targeted by the rebels , and the rebels have warned of the conflict . '' ' . ' '' ' . ' '' ' . ' '' ' . ' . 's . ' . ' a . ' . ' a


Context: New York

Generated text: New York Times reports that the ##-year-old was a member of the public . ' '' ' A.J . ' '' ' . ' '' ' I 'm . ' '' 's Times . 's new . 's . '


Context: A hurricane

Generated text: A hurricane-like water-shaped flights will be closed on the coast of the Northwest Pacific Ocean , where the plane is now in the air . ' '' ' . ' '' . ' . ' . ' . ' . '



Epoch 1/1:  71%|███████   | 121915/172148 [3:31:44<8:16:50,  1.69it/s, loss=3.9998]


Context: The President

Generated text: The President 's office said the investigation was `` very difficult '' . ' '' and `` the police '' . '' ' '' . ' '' ' I 'm not sure what the investigation is . 's . ' . 's investigation . ' .



Epoch 1/1:  72%|███████▏  | 123476/172148 [3:34:25<1:32:51,  8.74it/s, loss=3.9351]


After 200064 examples, Average Loss: 4.0509



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:10, 14.02it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:10, 14.01it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:08, 14.13it/s][A
                                                            [A


Validation Average Loss: 4.0251, Perplexity: 55.98

Context: Moscow

Generated text: Moscow has been trying to establish a nuclear weapon , but it has been claimed that the United States has been targeted by the Taliban . '' ' said . ' . ' . ' . ' . ' . ' a 's a ' .


Context: New York

Generated text: New York Times reported that the company was `` a very important step forward '' . ' '' 's official Twitter user , who said . ' . ' '' . ' . ' . ' . ' . ' . ' . 's 's 's


Context: A hurricane




Epoch 1/1:  72%|███████▏  | 123478/172148 [3:34:30<14:23:36,  1.06s/it, loss=4.1632]


Context: The President

Generated text: The President 's office said the government had `` no doubt '' the government would not comment on the issue . '' 's statement . ' '' 's . ' '' . ' '' . 's . 's . 's next to the .



Epoch 1/1:  73%|███████▎  | 125038/172148 [3:37:11<1:15:39, 10.38it/s, loss=4.0374]


After 200064 examples, Average Loss: 4.0485



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:14, 18.05it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.29it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:09, 18.55it/s][A
                                                            [A


Validation Average Loss: 4.0260, Perplexity: 56.04

Context: Moscow

Generated text: Moscow has been accused of being a member of the U.S. government . '' 's statement . ' '' . ' '' . ' '' . ' . ' . ' . ' . ' . ' a ' a ' a ' a 's


Context: New York

Generated text: New York City Council said the government had been working on the site . '' 's report . ' '' . ' '' . ' '' . ' . ' '' . ' . ' . ' . 's . ' . 's 's 's


Context: A hurricane

Generated text: A hurricane is a huge winds of ##mph winds and snowfall in the southwest of the city . '' 's spokesman said . ' . ' '' . ' '' . 's spy ##ft . '



Epoch 1/1:  73%|███████▎  | 125041/172148 [3:37:15<7:39:14,  1.71it/s, loss=4.0321]


Context: The President

Generated text: The President 's office said the government was `` deeply disappointed '' and `` a `` very serious '' . '' '' . '' ' . ' '' . ' '' . ' '' . 's . 'it . ' .##- . 's



Epoch 1/1:  74%|███████▎  | 126602/172148 [3:39:58<1:40:46,  7.53it/s, loss=4.0676]


After 200064 examples, Average Loss: 4.0521



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:15, 17.97it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:10, 18.46it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:15, 17.98it/s][A
                                                            [A


Validation Average Loss: 4.0160, Perplexity: 55.48

Context: Moscow

Generated text: Moscow has been accused of being a 'sack ' and 'sutants ' . ' '' ' . ' '' ' . ' '' ' . ' . ' . ' . ' . ' . ' . ' a 's ' . 's


Context: New York

Generated text: New York Mayor Michael Bloomberg said the `` unfortunate '' incident happened at ##:## GMT on Sunday . '' . ' '' . ' '' . ' '' . ' '' . ' . ' . ' . 's `` . '


Context: A hurricane

Generated text: A hurricane is a major storm , with a snowfall in the southwest coast of England , with the highest in the world . '' ' . ' '' . ' . ' '' . ' . 's . '##-ly a major



Epoch 1/1:  74%|███████▎  | 126604/172148 [3:40:02<11:54:09,  1.06it/s, loss=4.1706]


Context: The President

Generated text: The President 's office said the government would not comment on the case . '' ' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 's first to be . ' . '



Epoch 1/1:  74%|███████▍  | 128165/172148 [3:42:43<1:08:52, 10.64it/s, loss=3.9599]


After 200064 examples, Average Loss: 4.0501



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:20, 17.55it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:19, 17.55it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:13, 18.17it/s][A
                                                            [A


Validation Average Loss: 4.0219, Perplexity: 55.81

Context: Moscow

Generated text: Moscow has been a key part of the nuclear program , which has been a key part of the nuclear program . '' ' . ' '' 's . ' '' 's . ' . ' . ' . ' a key part . 's . '


Context: New York

Generated text: New York City 's mayor , who was a former governor , was arrested on suspicion of murdering a child . ' '' 's mother . ' . ' . ' . ' . ' . 's . ' . 's . 's


Context: A hurricane




Epoch 1/1:  74%|███████▍  | 128167/172148 [3:42:47<8:59:44,  1.36it/s, loss=4.0897]


Context: The President

Generated text: The President has been a `` very good man '' and he has been a `` very good man '' . '' ' he said . ' '' ' I 'm not going to be a fanist . ' a great one of .##- .##



Epoch 1/1:  75%|███████▌  | 129728/172148 [3:45:29<1:07:31, 10.47it/s, loss=4.0678]


After 200064 examples, Average Loss: 4.0486



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:22, 17.31it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:18, 17.65it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:15, 17.98it/s][A
                                                            [A


Validation Average Loss: 4.0176, Perplexity: 55.57

Context: Moscow

Generated text: Moscow has been accused of targeting the country 's foreign minister , the U.S. Embassy in Benghazi . '' 's statement . ' '' . ' . 's spokes . 's 's 's 's


Context: New York

Generated text: New York City Police Department spokesman said : 'The incident was a very serious incident . ' '' . ' '' . ' '' . ' '' . ' '' . ' . ' . 's Press . ' . 's Times . 's


Context: A hurricane




Epoch 1/1:  75%|███████▌  | 129730/172148 [3:45:33<8:11:45,  1.44it/s, loss=3.9249]


Context: The President

Generated text: The President 's office said the government had been `` notified '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . 's . 's first time . 's `` he



Epoch 1/1:  76%|███████▋  | 131291/172148 [3:48:14<1:22:11,  8.29it/s, loss=4.0368]


After 200064 examples, Average Loss: 4.0439



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 1/3514 [00:00<05:51,  9.99it/s][A
Evaluating:   0%|          | 3/3514 [00:00<05:05, 11.50it/s][A
Evaluating:   0%|          | 5/3514 [00:00<04:56, 11.83it/s][A
Evaluating:   0%|          | 7/3514 [00:00<04:45, 12.30it/s][A
                                                            [A


Validation Average Loss: 4.0198, Perplexity: 55.69

Context: Moscow

Generated text: Moscow has been accused of a ‘ war ’ and ‘ a .mare ’ . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . '


Context: New York

Generated text: New York Times : The ##-year-old was a ##-year-old girl , who was born in #### . ' '' ' , according to the New York Post . ' . ' . ' . ' . 's way . 's


Context: A hurricane




Epoch 1/1:  76%|███████▋  | 131293/172148 [3:48:19<12:04:05,  1.06s/it, loss=3.8780]


Context: The President

Generated text: The President 's office said the government had `` no indication that the government is aware of the alleged incident . '' ' '' . ' '' . ' '' . ' '' . ' . 's . 's next toy the . '



Epoch 1/1:  77%|███████▋  | 132853/172148 [3:51:00<1:02:27, 10.49it/s, loss=3.9658]


After 200064 examples, Average Loss: 4.0498



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:12, 18.23it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:11, 18.34it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:11, 18.36it/s][A
                                                            [A


Validation Average Loss: 4.0148, Perplexity: 55.41

Context: Moscow

Generated text: Moscow has been a major blow to the eurozone , with the eurozone crisis , and the eurozone crisis has been a major problem . '' ' . ' '' . ' . ' . 's . 'sh . 's spared . '


Context: New York

Generated text: New York 's `` The FBI '' is a `` <rare> '' and `` <rare> '' . '' '' . '' ) . '' '' '' . '' ' '' , a spokesman said . '' 's . 's .


Context: A hurricane




Epoch 1/1:  77%|███████▋  | 132856/172148 [3:51:05<6:54:58,  1.58it/s, loss=3.9661]


Context: The President

Generated text: The President 's office said the government had `` notified '' the government of the United States . '' 'Southwest of the country 's capital . ' '' . ' '' . ' . 's new report .##-ed . '



Epoch 1/1:  78%|███████▊  | 134416/172148 [3:53:46<1:01:24, 10.24it/s, loss=4.1389]


After 200064 examples, Average Loss: 4.0442



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:19, 17.59it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:13, 18.09it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:13, 18.14it/s][A
                                                            [A


Validation Average Loss: 4.0192, Perplexity: 55.66

Context: Moscow

Generated text: Moscow has been accused of being a 'cultural ' and 'being ' . ' '' ' Ibrahimovic said . ' '' ' . ' '' . ' . 's . 's . 's not a 's '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the `` very important thing to do '' . ' '' 'The Associated Press . ' '' 'The Associated Press . ' '' . ' '' . ' . ' . 's Daily News . '


Context: A hurricane

Generated text: A hurricane is expected to be damaged in the storm , but the storm is still on the ground . ' '' ' . ' '' ' . ' '' . ' . ' '' . ' . 's sp .## . 's sp



Epoch 1/1:  78%|███████▊  | 134419/172148 [3:53:50<6:20:17,  1.65it/s, loss=4.1251]


Context: The President

Generated text: The President has been in talks with the United States and the United States . '' ' . ' '' ) . ' '' 'The Associated Press . ' '' ' . ' '' 's . 's . 's . 's been a



Epoch 1/1:  79%|███████▉  | 135979/172148 [3:56:31<58:26, 10.32it/s, loss=4.0337]


After 200064 examples, Average Loss: 4.0457



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:03, 14.45it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:56, 14.82it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:57, 14.76it/s][A
                                                            [A


Validation Average Loss: 4.0182, Perplexity: 55.60

Context: Moscow

Generated text: Moscow has been accused of `` a long-standing terrorist attack '' . ' '' 's statement . ' '' 'Sucher . ' '' 'S . ' '' 's . 's . ' a 's 's ' . '


Context: New York

Generated text: New York Mayor Michael Bloomberg said the company had `` been working closely with the public to make sure that the company is not going to be able to use the same technology . '' '' '' . ' . 's News . 's director .


Context: A hurricane

Generated text: A hurricane on the coast of the Southwest , the U.S. and the U.S. has been in the area since the ####s .##-#### . '' '## . 's new ##- site . 's



Epoch 1/1:  79%|███████▉  | 135982/172148 [3:56:36<7:09:48,  1.40it/s, loss=4.1092]


Context: The President

Generated text: The President 's office said he was `` very sorry '' and said he was `` very sorry '' . '' ' '' . ' '' . ' '' . ' '' . ' '' . ' . 'sine . 's . 's . '



Epoch 1/1:  80%|███████▉  | 137543/172148 [3:59:15<56:34, 10.20it/s, loss=4.0138]


After 200064 examples, Average Loss: 4.0416



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:43, 15.74it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:22, 17.37it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:13, 18.12it/s][A
                                                            [A


Validation Average Loss: 4.0137, Perplexity: 55.35

Context: Moscow

Generated text: Moscow has been a key part of the country 's history of the country 's economy . '' 'Southam . ' '' 'S . ' '' 'S . ' '' 's . 's not a key one . 's .


Context: New York

Generated text: New York City 's most famous names are in the world , with the most popularity of the world . ' '' 's ##-year-old son . ' '' 's . ' '' 's . 's . 's not a


Context: A hurricane




Epoch 1/1:  80%|███████▉  | 137545/172148 [3:59:20<7:00:53,  1.37it/s, loss=3.9152]


Context: The President

Generated text: The President 's office said the government had `` no intention of any of the issues '' . ' '' 'We . ' '' 'Southamall . ' '' . ' '' . 's . 's `` The F- . '' of



Epoch 1/1:  81%|████████  | 139106/172148 [4:02:02<1:09:15,  7.95it/s, loss=4.2012]


After 200064 examples, Average Loss: 4.0425



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:31, 12.92it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:20, 13.46it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:19, 13.50it/s][A
                                                            [A


Validation Average Loss: 4.0162, Perplexity: 55.49

Context: Moscow

Generated text: Moscow has been accused of killing ## people in the country , and the U.S. military has been killed in the attack . ' '' ' . ' '' 's . ' . 's . ' . ' a ##- . ' a .


Context: New York

Generated text: New York Mayor Bill de Blasio said the `` unanimous decision '' was `` a very important step toward the law '' . '' '' '' . ' '' '' . ' '' . ' . ' . ' . ' . 's a ``


Context: A hurricane




Epoch 1/1:  81%|████████  | 139108/172148 [4:02:06<8:56:56,  1.03it/s, loss=4.0138] 


Context: The President

Generated text: The President 's office said the government had been working with the government to help the government and the government . ' '' . ' '' . ' '' . ' '' . ' '' . 's . 's a . 's decision . ' .



Epoch 1/1:  82%|████████▏ | 140669/172148 [4:04:47<50:28, 10.39it/s, loss=4.0845]


After 200064 examples, Average Loss: 4.0430



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:23, 17.29it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:20, 17.52it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:17, 17.78it/s][A
                                                            [A


Validation Average Loss: 4.0153, Perplexity: 55.44

Context: Moscow

Generated text: Moscow has been accused of killing the U.S. and the U.S. military . '' ' . ' '' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . '


Context: New York

Generated text: New York Mayor Michael Bloomberg said the incident was `` a very serious matter '' . ' '' . ' '' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . ' . 's . '


Context: A hurricane




Epoch 1/1:  82%|████████▏ | 140671/172148 [4:04:52<7:39:26,  1.14it/s, loss=4.0261]


Context: The President

Generated text: The President 's office said the `` unacceptable '' of the `` disbelief '' . '' ' `` . '' ' . '' ' . '' ' . ' '' . ' . '' . 's new . 's man is ' .



Epoch 1/1:  83%|████████▎ | 142231/172148 [4:07:33<47:23, 10.52it/s, loss=4.0818]


After 200064 examples, Average Loss: 4.0406



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:30, 16.67it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:30, 16.64it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:26, 17.02it/s][A
                                                            [A


Validation Average Loss: 4.0086, Perplexity: 55.07

Context: Moscow

Generated text: Moscow has been a major threat to the country 's economy , which has been a major issue in the region . ' '' 's statement . ' . ' . ' . ' . ' . 's . ' . 's scepted in


Context: New York

Generated text: New York City : The ##-year-old was in the middle of the road in the middle of the street . ' '' ' and said he was not aware of the incident . ' . ' . ' . ' . 's New York City


Context: A hurricane




Epoch 1/1:  83%|████████▎ | 142234/172148 [4:07:38<4:51:03,  1.71it/s, loss=4.0228]


Context: The President

Generated text: The President 's office said the `` very important '' of the `` new generation of people who have been in the world . '' ' '' . ' '' ' . ' `` . ' '' . 's a `` . 's future . '' .



Epoch 1/1:  84%|████████▎ | 143795/172148 [4:10:20<56:09,  8.42it/s, loss=4.0631]


After 200064 examples, Average Loss: 4.0404



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 1/3514 [00:00<06:14,  9.37it/s][A
Evaluating:   0%|          | 3/3514 [00:00<05:01, 11.63it/s][A
Evaluating:   0%|          | 5/3514 [00:00<05:02, 11.59it/s][A
Evaluating:   0%|          | 7/3514 [00:00<05:05, 11.48it/s][A
                                                            [A


Validation Average Loss: 4.0153, Perplexity: 55.44

Context: Moscow

Generated text: Moscow has been in talks with the U.S. government , which has been criticised for its efforts . '' 'Shaul . ' '' . ' '' . ' '' . 's . ' . ' . 's said . '


Context: New York

Generated text: New York City 's most expensive car is a luxury hotel , which is owned by the company 's company . ' '' . ' '' ) . ' '' . ' . ' . ' . 's . 's . 's . '


Context: A hurricane




Epoch 1/1:  84%|████████▎ | 143797/172148 [4:10:24<8:38:47,  1.10s/it, loss=3.9941] 


Context: The President

Generated text: The President has been in talks with the government for the past few years . '' 'Sucher . ' '' ) . ' '' . ' '' . ' '' . ' . ' . 's . ' a . 's . 's



Epoch 1/1:  84%|████████▍ | 145357/172148 [4:13:06<42:59, 10.39it/s, loss=3.9844]


After 200064 examples, Average Loss: 4.0388



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:19, 17.57it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:19, 17.55it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:17, 17.72it/s][A
                                                            [A


Validation Average Loss: 4.0144, Perplexity: 55.39

Context: Moscow

Generated text: Moscow 's nuclear missiles have been bombed in the U.S. , with the U.S. military base camping in the U.S. . ' '' . ' . 's . ' . 's a . 's


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the incident was a `` very serious issue '' . ' '' . ' '' . ' '' . ' '' . ' '' . ' . ' . ' . ' . ' . ' . 's . '


Context: A hurricane

Generated text: A hurricane center is expected to be closed in the early hours of Thursday morning . '' ' . ' '' 'S . ' '' 'S . ' '' 'S . ' '' ' : . 's new to be a possible



Epoch 1/1:  84%|████████▍ | 145360/172148 [4:13:10<4:45:25,  1.56it/s, loss=4.0417]


Context: The President

Generated text: The President 's office said the government had `` notified the government of the United States . '' '' . ' '' . ' '' . ' '' . ' '' . ' '' . 'Tor of the . 's ##-in . '



Epoch 1/1:  85%|████████▌ | 146921/172148 [4:15:52<40:04, 10.49it/s, loss=4.0227]


After 200064 examples, Average Loss: 4.0422



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:22, 17.32it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:17, 17.77it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:15, 17.90it/s][A
                                                            [A


Validation Average Loss: 4.0195, Perplexity: 55.67

Context: Moscow

Generated text: Moscow has been accused of plotting to kill the U.S. and the US . '' ' a spokesman for the U.S. State Department . ' '' . ' . 's . ' . ' . ' . ' . '


Context: New York

Generated text: New York City Mayor Bill de Blasio , who has been in the past , said he was `` a good man . '' ' '' . ' '' . ' . ' . ' . ' . ' . ' . ' . ' . ' .


Context: A hurricane




Epoch 1/1:  85%|████████▌ | 146923/172148 [4:15:56<4:52:54,  1.44it/s, loss=4.0405]


Context: The President

Generated text: The President 's office said the government had been `` very concerned '' . ' '' . ' '' . ' '' . ' '' . ' . ' . ' . ' . ' . ' . 's . ' . 's a `` the only



Epoch 1/1:  86%|████████▋ | 148484/172148 [4:18:38<49:04,  8.04it/s, loss=4.0140]


After 200064 examples, Average Loss: 4.0389



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<04:45, 12.28it/s][A
Evaluating:   0%|          | 4/3514 [00:00<04:23, 13.31it/s][A
Evaluating:   0%|          | 6/3514 [00:00<04:24, 13.26it/s][A
                                                            [A


Validation Average Loss: 4.0160, Perplexity: 55.48

Context: Moscow

Generated text: Moscow 's military has been accused of being a member of the Islamic State group , which has been accused of killing ## people . ' '' . ' '' . ' . ' . ' . 's . ' . ' . ' . ' .


Context: New York

Generated text: New York City Mayor Bill de Blasio said the company was `` a very good deal '' . '' . ' '' . ' '' . ' '' . ' '' . ' . ' '' . ' . ' . ' . ' . ) . '


Context: A hurricane

Generated text: A hurricane season is expected to be in the next few weeks . ' '' . ' '' ' Arias said . ' '' . ' '' . ' '' . ' . ' '' . ' . ' . 's sp , who is a



Epoch 1/1:  86%|████████▋ | 148486/172148 [4:18:43<6:58:40,  1.06s/it, loss=3.9998]


Context: The President

Generated text: The President 's office said the `` very important thing '' is that he is not a member of the public . '' ' . ' '' . ' '' . ' '' . ' . ' . 's a . ' . 's next step is



Epoch 1/1:  87%|████████▋ | 150047/172148 [4:21:24<35:35, 10.35it/s, loss=4.0948]


After 200064 examples, Average Loss: 4.0369



Evaluating:   0%|          | 0/3514 [00:00<?, ?it/s][A
Evaluating:   0%|          | 2/3514 [00:00<03:16, 17.84it/s][A
Evaluating:   0%|          | 4/3514 [00:00<03:12, 18.19it/s][A
Evaluating:   0%|          | 6/3514 [00:00<03:15, 17.91it/s][A
                                                            [A


Validation Average Loss: 4.0195, Perplexity: 55.67

Context: Moscow

Generated text: Moscow has been accused of trying to hide the world 's most famous American people in the world . '' ' . ' '' . ' '' . ' '' . ' . ' '' . ' . 's said to be a 's ' a '


Context: New York

Generated text: New York City Mayor Michael Bloomberg said the company 's decision was `` a very significant step forward '' . '' . ' '' . ' '' . ' '' . ' '' . ' . ' . ' . ' . 's . 's


Context: A hurricane




Epoch 1/1:  87%|████████▋ | 150049/172148 [4:21:28<4:37:23,  1.33it/s, loss=4.0321]


Context: The President

Generated text: The President 's office said the government had not been able to do anything to do with the government 's decision . '' . ' '' . ' '' . ' '' . ' '' . 's new . 's `` . '' of the case



Epoch 1/1:  88%|████████▊ | 151531/172148 [4:24:01<32:36, 10.54it/s, loss=4.0443]