In [11]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm
import time

# Constants
MAX_PIPES = 40  # For pipe1 through pipe39
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class PipeNetwork:
    def __init__(self, max_pipes=MAX_PIPES):
        """
        Helper class to represent the pipe network structure.
        """
        self.max_pipes = max_pipes
        self.parent_map = {}  # Maps pipe_id -> parent_id
        self.child_map = {}   # Maps pipe_id -> list of child pipe_ids
        self.pipe_data = {}   # Maps pipe_id -> (length, radius, has_receiver)
        self.ancestors = {}   # Maps pipe_id -> list of ancestor pipe_ids
        self.descendants = {} # Maps pipe_id -> list of descendant pipe_ids
        self.distances = {}   # Maps (pipe_id1, pipe_id2) -> network distance
        
    def add_pipe(self, pipe_id, parent_id, length, radius, has_receiver):
        """Add a pipe to the network with its properties."""
        self.parent_map[pipe_id] = parent_id
        self.pipe_data[pipe_id] = (length, radius, has_receiver)
        
        # Update child_map
        if parent_id not in self.child_map and parent_id != -1:
            self.child_map[parent_id] = []
        if parent_id != -1:
            self.child_map[parent_id].append(pipe_id)
            
    def build_network_structure(self):
        """Build additional network structure information after all pipes are added."""
        # Calculate ancestors for each pipe
        for pipe_id in self.pipe_data.keys():
            self.ancestors[pipe_id] = self._get_ancestors(pipe_id)
            
        # Calculate descendants for each pipe
        for pipe_id in self.pipe_data.keys():
            self.descendants[pipe_id] = self._get_descendants(pipe_id)
            
        # Calculate network distances between all pairs of pipes
        pipe_ids = list(self.pipe_data.keys())
        for i in range(len(pipe_ids)):
            for j in range(i, len(pipe_ids)):
                pipe_id1 = pipe_ids[i]
                pipe_id2 = pipe_ids[j]
                distance = self._calculate_network_distance(pipe_id1, pipe_id2)
                self.distances[(pipe_id1, pipe_id2)] = distance
                self.distances[(pipe_id2, pipe_id1)] = distance
    
    def _get_ancestors(self, pipe_id):
        """Get all ancestors (parent, grandparent, etc.) of a pipe."""
        ancestors = []
        current = pipe_id
        while self.parent_map.get(current, -1) != -1:
            current = self.parent_map[current]
            ancestors.append(current)
        return ancestors
    
    def _get_descendants(self, pipe_id):
        """Get all descendants of a pipe recursively."""
        descendants = []
        if pipe_id in self.child_map:
            children = self.child_map[pipe_id]
            descendants.extend(children)
            for child in children:
                descendants.extend(self._get_descendants(child))
        return descendants
    
    def _calculate_network_distance(self, pipe_id1, pipe_id2):
        """
        Calculate the network distance (number of hops) between two pipes.
        """
        if pipe_id1 == pipe_id2:
            return 0
            
        # Check if pipe2 is an ancestor of pipe1
        if pipe_id2 in self.ancestors.get(pipe_id1, []):
            return len(self._get_path_to_ancestor(pipe_id1, pipe_id2))
            
        # Check if pipe1 is an ancestor of pipe2
        if pipe_id1 in self.ancestors.get(pipe_id2, []):
            return len(self._get_path_to_ancestor(pipe_id2, pipe_id1))
            
        # Find nearest common ancestor
        ancestors1 = self.ancestors.get(pipe_id1, [])
        ancestors2 = self.ancestors.get(pipe_id2, [])
        
        for ancestor in ancestors1:
            if ancestor in ancestors2:
                # Distance = path from pipe1 to ancestor + path from pipe2 to ancestor
                return (len(self._get_path_to_ancestor(pipe_id1, ancestor)) + 
                        len(self._get_path_to_ancestor(pipe_id2, ancestor)))
                
        # No common ancestor found (should not happen in a tree)
        return self.max_pipes
        
    def _get_path_to_ancestor(self, pipe_id, ancestor_id):
        """Get the path from a pipe to one of its ancestors."""
        path = []
        current = pipe_id
        while current != ancestor_id and self.parent_map.get(current, -1) != -1:
            path.append(current)
            current = self.parent_map[current]
        if current == ancestor_id:
            path.append(ancestor_id)
        return path
    
    def get_network_features(self, emitter_pipe_id=None):
        """
        Generate network-aware features for all pipes.
        
        Returns:
            numpy array of shape (max_pipes, feature_dim)
        """
        feature_dim = 10  # Adjust based on features below
        features = np.zeros((self.max_pipes, feature_dim), dtype=np.float32)
        
        for pipe_id, (length, radius, has_receiver) in self.pipe_data.items():
            if pipe_id >= self.max_pipes:
                continue
                
            # Base pipe features
            parent_id = self.parent_map.get(pipe_id, -1)
            
            # Network structural features
            num_ancestors = len(self.ancestors.get(pipe_id, []))
            num_descendants = len(self.descendants.get(pipe_id, []))
            
            # Distance to emitter if known
            distance_to_emitter = 0
            if emitter_pipe_id is not None:
                distance_to_emitter = self.distances.get((pipe_id, emitter_pipe_id), self.max_pipes)
            
            # Distance to root
            distance_to_root = len(self.ancestors.get(pipe_id, []))
            
            # Calculate shortest distance to a receiver
            min_distance_to_receiver = self.max_pipes
            for other_id, (_, _, other_has_receiver) in self.pipe_data.items():
                if other_has_receiver:
                    dist = self.distances.get((pipe_id, other_id), self.max_pipes)
                    min_distance_to_receiver = min(min_distance_to_receiver, dist)
            
            # Normalize features
            features[pipe_id-1] = [
                pipe_id / self.max_pipes,                  # Pipe ID (normalized)
                parent_id / self.max_pipes if parent_id > 0 else -0.1,  # Parent ID (normalized)
                length,                                    # Length
                radius,                                    # Radius
                1.0 if has_receiver else 0.0,              # Has receiver flag
                num_ancestors / self.max_pipes,            # Normalized number of ancestors
                num_descendants / self.max_pipes,          # Normalized number of descendants
                distance_to_root / self.max_pipes,         # Normalized distance to root
                distance_to_emitter / self.max_pipes,      # Normalized distance to emitter (0 during inference)
                min_distance_to_receiver / self.max_pipes  # Normalized distance to nearest receiver
            ]
            
        return features

In [12]:
class EnhancedEmitterDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.run_folders = sorted([
            f for f in os.listdir(data_dir) 
            if os.path.isdir(os.path.join(data_dir, f)) and not f.startswith('.')
        ])
        
        # Process all runs
        self.features = []
        self.targets = []
        self._process_runs()
        
    def _process_runs(self):
        print(f"Processing {len(self.run_folders)} simulation runs...")
        
        for run_folder in tqdm(self.run_folders):
            try:
                features, target = self._process_single_run(run_folder)
                self.features.append(features)
                self.targets.append(target)
            except Exception as e:
                print(f"Error processing run folder {run_folder}: {str(e)}")
                
        # Convert lists to tensors
        self.features = torch.tensor(np.array(self.features), dtype=torch.float32)
        self.targets = torch.tensor(np.array(self.targets), dtype=torch.float32)
        
    def _process_single_run(self, run_folder):
        run_path = os.path.join(self.data_dir, run_folder)
        
        # Create pipe network
        network = PipeNetwork()
        receiver_data = {}
        
        # Process pipe and receiver data
        pipe_folders = sorted([
            f for f in os.listdir(run_path) 
            if os.path.isdir(os.path.join(run_path, f)) and f.startswith('pipe')
        ], key=lambda x: int(x.replace('pipe', '')))
        
        # First pass: collect pipe structure information
        for pipe_folder in pipe_folders:
            pipe_id = int(pipe_folder.replace('pipe', ''))
            if pipe_id > MAX_PIPES:
                continue
                
            pipe_path = os.path.join(run_path, pipe_folder)
            
            # Read pipe data
            sim_data_path = os.path.join(pipe_path, "simulation_data.txt")
            if os.path.exists(sim_data_path):
                with open(sim_data_path, 'r') as f:
                    line = f.readline().strip().split()
                    p_id = int(line[0])
                    parent_id = int(line[1])
                    length = float(line[2])
                    radius = float(line[3])
                    
                    # Check for receivers
                    has_receiver = len(glob.glob(os.path.join(pipe_path, "#*-Ring type.txt"))) > 0
                    
                    # Add pipe to network
                    network.add_pipe(p_id, parent_id, length, radius, has_receiver)
            
            # Collect receiver data
            receiver_files = glob.glob(os.path.join(pipe_path, "#*-Ring type.txt"))
            for rec_file in receiver_files:
                with open(rec_file, 'r') as f:
                    # Read receiver location and stats
                    first_line = f.readline().strip().split()
                    second_line = f.readline().strip().split(',')
                    
                    rec_pipe_id = int(first_line[0])
                    r_coord = float(first_line[1])
                    z_coord = float(first_line[2])
                    
                    # Parse statistics
                    stats = [float(val.strip()) for val in second_line[:7]]  # Take up to 7 stats
                    
                    # Store receiver data
                    if rec_pipe_id not in receiver_data:
                        receiver_data[rec_pipe_id] = []
                    receiver_data[rec_pipe_id].append((r_coord, z_coord, stats))
        
        # Build network structure once all pipes are added
        network.build_network_structure()
        
        # Read target emitter location
        target_path = os.path.join(run_path, "targetOutput.txt")
        with open(target_path, 'r') as f:
            line = f.readline().strip().split()
            emitter_pipe_id = int(line[0])
            emitter_r = float(line[1])
            emitter_z = float(line[2]) if len(line) > 2 else 0.0
        
        # Get enhanced network-aware features
        network_features = network.get_network_features(emitter_pipe_id)
        
        # Process receiver data into a fixed-size feature vector
        receiver_features = np.zeros((MAX_PIPES, 10), dtype=np.float32)
        for pipe_id, receivers in receiver_data.items():
            if pipe_id >= MAX_PIPES:
                continue
                
            # Average stats if multiple receivers in the same pipe
            avg_r = np.mean([r for r, _, _ in receivers])
            avg_z = np.mean([z for _, z, _ in receivers])
            avg_stats = np.mean([s for _, _, s in receivers], axis=0)
            
            # Normalize pipe ID
            receiver_features[pipe_id-1] = np.concatenate([
                [pipe_id / MAX_PIPES, avg_r, avg_z],
                avg_stats
            ])
        
        # Flatten and combine features
        flat_network_features = network_features.flatten()
        flat_receiver_features = receiver_features.flatten()
        
        # Create final feature vector
        combined_features = np.concatenate([flat_network_features, flat_receiver_features])
        
        # Create target vector with normalized pipe ID
        target = np.array([
            emitter_pipe_id / MAX_PIPES,  # Normalized pipe ID
            emitter_r,                    # r coordinate
            emitter_z                     # z coordinate
        ], dtype=np.float32)
        
        return combined_features, target
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

In [13]:
class TreeAwareEmitterModel(nn.Module):
    def __init__(self, input_dim):
        super(TreeAwareEmitterModel, self).__init__()
        
        # Separate branches for network structure and receiver data
        self.network_encoder = nn.Sequential(
            nn.Linear(MAX_PIPES * 10, 512),  # 10 features per pipe
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU()
        )
        
        self.receiver_encoder = nn.Sequential(
            nn.Linear(MAX_PIPES * 10, 512),  # 10 features per receiver
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU()
        )
        
        # Combined layers
        self.combined_layers = nn.Sequential(
            nn.Linear(512, 256),  # 256 + 256 from both branches
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        
        # Separate heads for pipe classification and position regression
        self.pipe_head = nn.Linear(64, 1)  # Single value for pipe ID
        self.position_head = nn.Linear(64, 2)  # r, z coordinates
        
    def forward(self, x):
        # Split the input into network and receiver features
        network_features = x[:, :MAX_PIPES * 10]
        receiver_features = x[:, MAX_PIPES * 10:]
        
        # Process each branch
        network_encoding = self.network_encoder(network_features)
        receiver_encoding = self.receiver_encoder(receiver_features)
        
        # Combine branches
        combined = torch.cat((network_encoding, receiver_encoding), dim=1)
        features = self.combined_layers(combined)
        
        # Get predictions from each head
        pipe_pred = self.pipe_head(features)
        position_pred = self.position_head(features)
        
        # Combine predictions into single output
        return torch.cat((pipe_pred, position_pred), dim=1)

In [None]:
def custom_loss_function(pred, target, pipe_weight=5.0):
    """
    Custom loss that weights pipe prediction higher than position prediction.
    
    Args:
        pred: Model predictions [pipe_id, r, z]
        target: Ground truth targets [pipe_id, r, z]
        pipe_weight: Weight factor for pipe prediction loss
    """
    # MSE for pipe ID - we're treating it as regression for simplicity
    pipe_loss = nn.functional.mse_loss(pred[:, 0:1], target[:, 0:1])
    
    # MSE for position (r, z)
    position_loss = nn.functional.mse_loss(pred[:, 1:], target[:, 1:])
    
    # Weighted sum
    return pipe_weight * pipe_loss + position_loss

In [24]:
def train_and_evaluate(data_dir, epochs=500, batch_size=32, lr=0.001):
    """
    Train and evaluate the tree-aware emitter localization model.
    
    Args:
        data_dir: Directory containing simulation runs
        epochs: Number of training epochs
        batch_size: Batch size for training
        lr: Learning rate
    """
    # Create dataset
    print("Creating datasets...")
    full_dataset = EnhancedEmitterDataset(data_dir)
    
    # Split into train, validation, test
    dataset_size = len(full_dataset)
    train_size = int(0.7 * dataset_size)
    val_size = int(0.15 * dataset_size)
    test_size = dataset_size - train_size - val_size
    
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        full_dataset, [train_size, val_size, test_size]
    )
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    print(f"Training: {len(train_dataset)} samples, Validation: {len(val_dataset)} samples, Test: {len(test_dataset)} samples")
    
    # Calculate input dimension and initialize model
    sample_features, _ = full_dataset[0]
    input_dim = sample_features.shape[0]
    
    model = TreeAwareEmitterModel(input_dim).to(DEVICE)
    print(f"Model initialized with input dimension: {input_dim}")
    print(f"Training on: {DEVICE}")
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # Training loop
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    
    for epoch in range(1, epochs + 1):
        # Training phase
        model.train()
        epoch_loss = 0
        start_time = time.time()
        
        loop = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False)
        for features, targets in loop:
            features, targets = features.to(DEVICE), targets.to(DEVICE)
            
            # Forward pass
            optimizer.zero_grad()
            outputs = model(features)
            loss = custom_loss_function(outputs, targets)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            batch_loss = loss.item()
            epoch_loss += batch_loss * features.size(0)
            loop.set_postfix({"loss": batch_loss})
            
        avg_train_loss = epoch_loss / len(train_dataset)
        train_losses.append(avg_train_loss)
        
        # Validation phase
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for features, targets in val_loader:
                features, targets = features.to(DEVICE), targets.to(DEVICE)
                outputs = model(features)
                loss = custom_loss_function(outputs, targets)
                val_loss += loss.item() * features.size(0)
                
        avg_val_loss = val_loss / len(val_dataset)
        val_losses.append(avg_val_loss)
        
        if epoch % 10 == 0:
            # Save best model
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(model.state_dict(), "best_emitter_model.pt")
                print(f"New best model saved (loss: {best_val_loss:.6f})")
                
            elapsed = time.time() - start_time
            print(f"Epoch {epoch}/{epochs} | Train loss: {avg_train_loss:.6f} | Val loss: {avg_val_loss:.6f} | Time: {elapsed:.2f}s")
        

    # Load best model for testing
    model.load_state_dict(torch.load("best_emitter_model.pt"))
    
    # Test evaluation
    model.eval()
    test_loss = 0
    pipe_correct = 0
    position_errors = []
    
    with torch.no_grad():
        for features, targets in tqdm(test_loader, desc="Testing"):
            features, targets = features.to(DEVICE), targets.to(DEVICE)
            outputs = model(features)
            
            # Calculate test loss
            loss = custom_loss_function(outputs, targets, pipe_weight=1.0)  # Equal weighting for evaluation
            test_loss += loss.item() * features.size(0)
            
            # Convert normalized pipe IDs back to absolute IDs
            pred_pipe_ids = torch.round(outputs[:, 0] * MAX_PIPES).long()
            true_pipe_ids = torch.round(targets[:, 0] * MAX_PIPES).long()
            
            # Check pipe prediction accuracy
            pipe_correct += (pred_pipe_ids == true_pipe_ids).sum().item()
            
            # Calculate position errors
            position_error = torch.sqrt(
                (outputs[:, 1] - targets[:, 1])**2 + (outputs[:, 2] - targets[:, 2])**2
            )
            position_errors.extend(position_error.cpu().numpy())
            
    # Calculate metrics
    avg_test_loss = test_loss / len(test_dataset)
    pipe_accuracy = pipe_correct / len(test_dataset) * 100
    avg_position_error = np.mean(position_errors)
    
    print("\nTest Results:")
    print(f"Test Loss: {avg_test_loss:.6f}")
    print(f"Pipe Prediction Accuracy: {pipe_accuracy:.2f}%")
    print(f"Average Position Error: {avg_position_error:.6f} units")
    
    # Plot training history
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training and Validation Loss")
    plt.legend()
    plt.savefig("emitter_training_loss.png")
    plt.show()
    
    return model, avg_test_loss, pipe_accuracy, avg_position_error

if __name__ == "__main__":
    data_dir = "/Users/daghanerdonmez/Desktop/molecular-simulation-mlp/output-processing/Outputs_Copy"
    train_and_evaluate(data_dir)

Creating datasets...
Processing 1000 simulation runs...


100%|██████████| 1000/1000 [00:07<00:00, 127.27it/s]


Training: 700 samples, Validation: 150 samples, Test: 150 samples
Model initialized with input dimension: 800
Training on: cpu


Epoch 1/500:  23%|██▎       | 5/22 [00:00<00:00, 48.37it/s, loss=0.203] 

tensor([ 0.0148,  0.2366, -0.0281], grad_fn=<SelectBackward0>)
tensor([ 6.5000e-01,  1.5300e-04, -1.1090e-03])
tensor([ 0.0748, -0.0817, -0.2065], grad_fn=<SelectBackward0>)
tensor([2.7500e-01, 9.3000e-05, 3.2020e-03])
tensor([ 0.2451,  0.1196, -0.2746], grad_fn=<SelectBackward0>)
tensor([4.5000e-01, 1.8900e-04, 4.3480e-03])
tensor([ 0.5742,  0.0644, -0.1905], grad_fn=<SelectBackward0>)
tensor([8.5000e-01, 2.7400e-04, 3.1270e-03])
tensor([ 1.5615,  0.4255, -0.0186], grad_fn=<SelectBackward0>)
tensor([ 9.5000e-01,  2.8500e-04, -2.4840e-03])
tensor([1.1220, 0.1303, 0.0313], grad_fn=<SelectBackward0>)
tensor([ 8.7500e-01,  2.8000e-04, -2.8680e-03])
tensor([ 0.9214,  0.2796, -0.0858], grad_fn=<SelectBackward0>)
tensor([ 6.5000e-01,  5.1000e-05, -2.2570e-03])
tensor([ 1.1825,  0.1444, -0.0113], grad_fn=<SelectBackward0>)
tensor([ 6.7500e-01,  3.5200e-04, -1.5010e-03])
tensor([ 0.6895, -0.0229, -0.1664], grad_fn=<SelectBackward0>)
tensor([3.7500e-01, 2.6900e-04, 4.4990e-03])
tensor([ 0.4178,

                                                                          

tensor([ 0.3308, -0.1547, -0.0194], grad_fn=<SelectBackward0>)
tensor([0.4250, 0.0004, 0.0043])
tensor([0.0173, 0.0274, 0.3116], grad_fn=<SelectBackward0>)
tensor([0.0750, 0.0002, 0.0040])
tensor([ 1.1137e+00,  4.5821e-05, -1.9249e-01], grad_fn=<SelectBackward0>)
tensor([8.0000e-01, 1.9300e-04, 7.9100e-04])
tensor([0.3449, 0.0870, 0.0368])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([ 0.1342,  0.0415, -0.0421])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([ 0.1952, -0.0129, -0.0165])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.1880,  0.0183, -0.0793])
tensor([ 0.3500,  0.0004, -0.0008])
tensor([0.1517, 0.0670, 0.0913])
tensor([ 0.0750,  0.0002, -0.0027])


                                                                          

tensor([ 0.6584, -0.0807, -0.0427], grad_fn=<SelectBackward0>)
tensor([7.5000e-01, 3.0100e-04, 3.3900e-04])
tensor([ 0.2412,  0.0563, -0.0048], grad_fn=<SelectBackward0>)
tensor([ 2.2500e-01,  1.8600e-04, -2.5080e-03])
tensor([ 0.5341, -0.0168, -0.0151], grad_fn=<SelectBackward0>)
tensor([ 5.2500e-01,  2.6100e-04, -3.2390e-03])
tensor([ 0.7000, -0.0620,  0.0594], grad_fn=<SelectBackward0>)
tensor([ 8.7500e-01,  2.8000e-04, -2.8680e-03])
tensor([0.7660, 0.0393, 0.0022], grad_fn=<SelectBackward0>)
tensor([7.0000e-01, 2.2800e-04, 3.7000e-03])
tensor([ 0.4821, -0.0464,  0.0580], grad_fn=<SelectBackward0>)
tensor([ 5.0000e-01,  4.3700e-04, -9.5000e-05])
tensor([1.3646, 0.0999, 0.1102], grad_fn=<SelectBackward0>)
tensor([ 9.7500e-01,  2.6300e-04, -1.9940e-03])
tensor([ 0.7704,  0.0678, -0.0832], grad_fn=<SelectBackward0>)
tensor([ 8.0000e-01,  3.6000e-04, -1.8880e-03])
tensor([ 0.3752,  0.0438, -0.0861], grad_fn=<SelectBackward0>)
tensor([0.3250, 0.0004, 0.0031])
tensor([0.6375, 0.0581, 0.07

Epoch 3/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0806]

tensor([0.1376, 0.0704, 0.0246], grad_fn=<SelectBackward0>)
tensor([ 7.5000e-02,  7.8000e-05, -4.1100e-04])
tensor([ 0.0900, -0.0084,  0.0082], grad_fn=<SelectBackward0>)
tensor([1.2500e-01, 5.9000e-05, 9.0400e-04])
tensor([ 0.6231,  0.1043, -0.0083], grad_fn=<SelectBackward0>)
tensor([ 5.2500e-01,  4.2200e-04, -4.6300e-04])
tensor([0.7199, 0.0140, 0.0292], grad_fn=<SelectBackward0>)
tensor([ 5.7500e-01,  3.2000e-04, -9.2700e-04])
tensor([ 0.6161,  0.0429, -0.0138], grad_fn=<SelectBackward0>)
tensor([7.0000e-01, 1.9100e-04, 4.2020e-03])


Epoch 3/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0546]

tensor([0.7429, 0.0997, 0.0254], grad_fn=<SelectBackward0>)
tensor([7.7500e-01, 4.2400e-04, 3.3800e-04])
tensor([ 0.8174,  0.1234, -0.0089], grad_fn=<SelectBackward0>)
tensor([ 8.0000e-01,  3.6000e-04, -1.8880e-03])
tensor([ 0.6582, -0.0366,  0.0850], grad_fn=<SelectBackward0>)
tensor([6.5000e-01, 3.5300e-04, 1.5910e-03])


                                                                          

tensor([ 0.7931,  0.0150, -0.0219], grad_fn=<SelectBackward0>)
tensor([9.0000e-01, 3.1800e-04, 2.7290e-03])
tensor([ 1.0348, -0.0379, -0.0045], grad_fn=<SelectBackward0>)
tensor([ 8.2500e-01,  1.8000e-04, -3.8030e-03])
tensor([ 0.5106, -0.0741, -0.0362], grad_fn=<SelectBackward0>)
tensor([ 6.0000e-01,  1.5000e-04, -4.4040e-03])
tensor([ 0.8308, -0.0079, -0.0604], grad_fn=<SelectBackward0>)
tensor([8.5000e-01, 2.8700e-04, 2.7440e-03])
tensor([ 0.3142, -0.0510, -0.0527], grad_fn=<SelectBackward0>)
tensor([3.5000e-01, 2.6800e-04, 3.5750e-03])
tensor([ 0.6571, -0.0732, -0.0138], grad_fn=<SelectBackward0>)
tensor([ 6.0000e-01,  1.6000e-04, -4.0510e-03])
tensor([ 0.4012,  0.0234, -0.0540], grad_fn=<SelectBackward0>)
tensor([ 3.5000e-01,  1.0300e-04, -1.0960e-03])
tensor([ 0.0495, -0.0842,  0.0010], grad_fn=<SelectBackward0>)
tensor([ 1.7500e-01,  1.2700e-04, -1.4920e-03])
tensor([ 0.8315, -0.0569,  0.0838], grad_fn=<SelectBackward0>)
tensor([9.0000e-01, 2.0400e-04, 4.0050e-03])
tensor([ 0.05

Epoch 4/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0252]

tensor([ 0.6908, -0.0535, -0.0010], grad_fn=<SelectBackward0>)
tensor([6.7500e-01, 1.5000e-04, 1.0000e-04])
tensor([ 0.9292,  0.0320, -0.0136], grad_fn=<SelectBackward0>)
tensor([8.2500e-01, 3.3800e-04, 3.1520e-03])
tensor([ 0.5426,  0.0064, -0.0177], grad_fn=<SelectBackward0>)
tensor([5.5000e-01, 1.5800e-04, 3.6290e-03])
tensor([ 0.1376, -0.0251,  0.0676], grad_fn=<SelectBackward0>)
tensor([ 5.0000e-02,  9.1000e-05, -1.6940e-03])
tensor([ 0.9452, -0.1116,  0.0674], grad_fn=<SelectBackward0>)
tensor([ 9.0000e-01,  2.0600e-04, -2.8410e-03])
tensor([ 0.6344, -0.0397,  0.0724], grad_fn=<SelectBackward0>)
tensor([6.7500e-01, 1.3100e-04, 1.4240e-03])
tensor([ 0.1941,  0.0269, -0.0061], grad_fn=<SelectBackward0>)
tensor([ 0.2000,  0.0004, -0.0025])
tensor([ 0.2169,  0.0467, -0.0106], grad_fn=<SelectBackward0>)
tensor([0.2250, 0.0004, 0.0022])


Epoch 4/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0324]

tensor([ 0.5156,  0.0444, -0.0038], grad_fn=<SelectBackward0>)
tensor([ 5.7500e-01,  1.9200e-04, -1.6970e-03])
tensor([ 0.8688, -0.0280,  0.0296], grad_fn=<SelectBackward0>)
tensor([9.2500e-01, 3.6700e-04, 3.4350e-03])
tensor([ 0.3400, -0.0170,  0.0312], grad_fn=<SelectBackward0>)
tensor([4.7500e-01, 2.2100e-04, 7.7700e-04])
tensor([ 0.3543, -0.0309,  0.0378], grad_fn=<SelectBackward0>)
tensor([ 3.7500e-01,  1.4700e-04, -4.2080e-03])
tensor([ 0.6550, -0.0145,  0.0083], grad_fn=<SelectBackward0>)
tensor([8.2500e-01, 1.8700e-04, 4.3000e-03])


Epoch 4/500:  73%|███████▎  | 16/22 [00:00<00:00, 151.64it/s, loss=0.0199]

tensor([ 0.7062,  0.0992, -0.0083], grad_fn=<SelectBackward0>)
tensor([8.0000e-01, 1.9300e-04, 7.9100e-04])
tensor([ 0.9878,  0.0064, -0.0460], grad_fn=<SelectBackward0>)
tensor([9.5000e-01, 6.2000e-05, 4.4300e-04])
tensor([ 0.4145,  0.0263, -0.0189], grad_fn=<SelectBackward0>)
tensor([4.5000e-01, 3.4600e-04, 2.4810e-03])


                                                                          

tensor([ 0.3305, -0.0197, -0.0504], grad_fn=<SelectBackward0>)
tensor([ 3.5000e-01,  2.9500e-04, -3.2650e-03])
tensor([6.7091e-01, 1.2434e-04, 5.8902e-02], grad_fn=<SelectBackward0>)
tensor([ 7.5000e-01,  1.7300e-04, -4.0240e-03])
tensor([0.8371, 0.0064, 0.0280], grad_fn=<SelectBackward0>)
tensor([ 8.7500e-01,  1.9500e-04, -2.2600e-04])
tensor([ 0.1277, -0.0307, -0.0047], grad_fn=<SelectBackward0>)
tensor([7.5000e-02, 6.0000e-05, 4.2210e-03])
tensor([ 0.3788, -0.0020, -0.0530], grad_fn=<SelectBackward0>)
tensor([3.2500e-01, 2.2000e-04, 8.5900e-04])
tensor([ 0.7998, -0.0020,  0.0505], grad_fn=<SelectBackward0>)
tensor([ 6.2500e-01,  3.1500e-04, -5.9600e-04])
tensor([ 0.7857, -0.0173, -0.0029])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([ 0.1141, -0.0181, -0.0069])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([ 0.2772,  0.0030, -0.0157])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.2665, -0.0026, -0.0308])
tensor([ 0.3500,  0.0004, -0.0008])
tensor([ 0.0990,

Epoch 5/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.014] 

tensor([ 0.5427, -0.0142,  0.0476], grad_fn=<SelectBackward0>)
tensor([ 5.7500e-01,  2.3700e-04, -3.3520e-03])
tensor([0.7772, 0.0481, 0.0359], grad_fn=<SelectBackward0>)
tensor([ 7.5000e-01,  9.0000e-05, -2.5380e-03])
tensor([ 0.8293,  0.0045, -0.0484], grad_fn=<SelectBackward0>)
tensor([ 9.5000e-01,  7.4000e-05, -1.1930e-03])
tensor([0.6675, 0.0380, 0.0445], grad_fn=<SelectBackward0>)
tensor([ 5.7500e-01,  2.2100e-04, -3.0690e-03])
tensor([ 0.6578, -0.0024,  0.0159], grad_fn=<SelectBackward0>)
tensor([ 6.2500e-01,  2.8100e-04, -1.3690e-03])
tensor([ 1.0243e-01, -3.6862e-02, -6.5282e-05], grad_fn=<SelectBackward0>)
tensor([ 0.1000,  0.0004, -0.0015])
tensor([0.4726, 0.0098, 0.0114], grad_fn=<SelectBackward0>)
tensor([ 4.0000e-01,  3.5200e-04, -4.8100e-04])


Epoch 5/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0387]

tensor([ 0.6625, -0.0440, -0.0377], grad_fn=<SelectBackward0>)
tensor([7.2500e-01, 1.0700e-04, 2.1790e-03])
tensor([ 0.1685, -0.0085, -0.0555], grad_fn=<SelectBackward0>)
tensor([ 0.2500,  0.0003, -0.0006])
tensor([ 0.7238, -0.0819, -0.0079], grad_fn=<SelectBackward0>)
tensor([6.7500e-01, 4.4700e-04, 1.2040e-03])
tensor([ 0.7135, -0.0113,  0.0076], grad_fn=<SelectBackward0>)
tensor([7.5000e-01, 6.9000e-05, 2.8610e-03])
tensor([ 0.8082, -0.0476, -0.0305], grad_fn=<SelectBackward0>)
tensor([ 8.5000e-01,  1.5900e-04, -2.5980e-03])
tensor([ 0.7733,  0.0679, -0.0284], grad_fn=<SelectBackward0>)
tensor([8.0000e-01, 2.1700e-04, 8.3300e-04])
tensor([ 0.1678, -0.0138,  0.0023], grad_fn=<SelectBackward0>)
tensor([ 0.2500,  0.0004, -0.0022])
tensor([ 0.2553, -0.0150,  0.0159], grad_fn=<SelectBackward0>)
tensor([ 3.0000e-01,  1.3400e-04, -3.9200e-03])


Epoch 5/500:  73%|███████▎  | 16/22 [00:00<00:00, 150.02it/s, loss=0.0273]

tensor([0.4830, 0.1296, 0.0539], grad_fn=<SelectBackward0>)
tensor([ 4.7500e-01,  4.2500e-04, -3.8000e-03])
tensor([ 0.2627,  0.0275, -0.0551], grad_fn=<SelectBackward0>)
tensor([ 2.7500e-01,  1.1500e-04, -4.1320e-03])
tensor([0.7011, 0.0242, 0.0572], grad_fn=<SelectBackward0>)
tensor([7.5000e-01, 7.3000e-05, 1.7900e-03])
tensor([0.1332, 0.0365, 0.0445], grad_fn=<SelectBackward0>)
tensor([ 0.1000,  0.0003, -0.0017])
tensor([ 0.9418, -0.0346, -0.0038], grad_fn=<SelectBackward0>)
tensor([9.2500e-01, 3.8600e-04, 3.7180e-03])


                                                                          

tensor([ 0.6884, -0.0183,  0.0125], grad_fn=<SelectBackward0>)
tensor([7.5000e-01, 2.0100e-04, 3.2180e-03])
tensor([ 0.9794, -0.0117, -0.0018], grad_fn=<SelectBackward0>)
tensor([9.7500e-01, 2.5800e-04, 2.9650e-03])
tensor([ 0.8226, -0.0630, -0.0190])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([0.1006, 0.0092, 0.0182])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([0.3259, 0.0462, 0.0238])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.3306,  0.0007, -0.0190])
tensor([ 0.3500,  0.0004, -0.0008])
tensor([ 0.0966, -0.0173, -0.0300])
tensor([ 0.0750,  0.0002, -0.0027])


Epoch 6/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0134]

tensor([0.3594, 0.0112, 0.0237], grad_fn=<SelectBackward0>)
tensor([ 3.2500e-01,  5.4000e-05, -2.4570e-03])
tensor([0.2753, 0.0027, 0.0255], grad_fn=<SelectBackward0>)
tensor([ 3.0000e-01,  1.3400e-04, -3.9200e-03])
tensor([0.8858, 0.0136, 0.0197], grad_fn=<SelectBackward0>)
tensor([ 9.7500e-01,  5.7000e-05, -4.3090e-03])
tensor([ 1.0702e+00, -8.9876e-05, -3.8334e-02], grad_fn=<SelectBackward0>)
tensor([9.0000e-01, 1.3400e-04, 1.2050e-03])
tensor([ 0.2917,  0.0183, -0.0208], grad_fn=<SelectBackward0>)
tensor([ 4.5000e-01,  7.4000e-05, -3.0400e-03])
tensor([0.8130, 0.0179, 0.0290], grad_fn=<SelectBackward0>)
tensor([7.7500e-01, 4.2400e-04, 3.3800e-04])
tensor([ 0.5889,  0.0346, -0.0012], grad_fn=<SelectBackward0>)
tensor([6.0000e-01, 3.8800e-04, 3.4260e-03])


Epoch 6/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0163]

tensor([ 0.2895, -0.0053,  0.1020], grad_fn=<SelectBackward0>)
tensor([ 3.0000e-01,  1.4000e-04, -2.3370e-03])
tensor([ 0.6094, -0.0243, -0.0460], grad_fn=<SelectBackward0>)
tensor([ 6.2500e-01,  1.0600e-04, -8.6400e-04])
tensor([ 0.1048, -0.0177, -0.0568], grad_fn=<SelectBackward0>)
tensor([0.1250, 0.0004, 0.0029])
tensor([ 0.0842,  0.0196, -0.0338], grad_fn=<SelectBackward0>)
tensor([0.0250, 0.0002, 0.0034])
tensor([ 0.5767, -0.0124,  0.0041], grad_fn=<SelectBackward0>)
tensor([ 5.5000e-01,  1.4500e-04, -3.8370e-03])
tensor([ 0.5555, -0.0075,  0.0055], grad_fn=<SelectBackward0>)
tensor([ 6.2500e-01,  1.8200e-04, -4.4500e-04])
tensor([0.3224, 0.0085, 0.0206], grad_fn=<SelectBackward0>)
tensor([ 0.3000,  0.0004, -0.0022])


                                                                           

tensor([ 0.8606,  0.0295, -0.0019], grad_fn=<SelectBackward0>)
tensor([8.7500e-01, 2.6800e-04, 2.2280e-03])
tensor([ 0.2430, -0.0161, -0.0414], grad_fn=<SelectBackward0>)
tensor([0.1750, 0.0004, 0.0004])
tensor([ 0.8852, -0.0143, -0.0067], grad_fn=<SelectBackward0>)
tensor([9.0000e-01, 1.5300e-04, 1.6050e-03])
tensor([ 0.2687, -0.0066, -0.0008], grad_fn=<SelectBackward0>)
tensor([0.2750, 0.0003, 0.0011])
tensor([ 0.6145,  0.0208, -0.0582], grad_fn=<SelectBackward0>)
tensor([7.0000e-01, 1.1100e-04, 1.1990e-03])
tensor([0.6560, 0.0008, 0.0177], grad_fn=<SelectBackward0>)
tensor([6.5000e-01, 1.3400e-04, 4.0950e-03])
tensor([ 0.4470, -0.0077,  0.0234], grad_fn=<SelectBackward0>)
tensor([ 5.2500e-01,  1.6700e-04, -1.3200e-03])
tensor([0.8909, 0.0401, 0.0720], grad_fn=<SelectBackward0>)
tensor([9.2500e-01, 1.3700e-04, 1.9450e-03])




tensor([ 0.8113, -0.0517, -0.0058])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([0.1032, 0.0187, 0.0134])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([0.3384, 0.0428, 0.0258])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.3437,  0.0008, -0.0155])
tensor([ 0.3500,  0.0004, -0.0008])
tensor([0.1066, 0.0171, 0.0046])
tensor([ 0.0750,  0.0002, -0.0027])


Epoch 7/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0259]

tensor([ 0.8924,  0.0310, -0.0274], grad_fn=<SelectBackward0>)
tensor([8.5000e-01, 2.7400e-04, 3.1270e-03])
tensor([0.6220, 0.0059, 0.0050], grad_fn=<SelectBackward0>)
tensor([6.5000e-01, 3.8300e-04, 1.3290e-03])
tensor([ 0.3757, -0.0040,  0.0354], grad_fn=<SelectBackward0>)
tensor([3.2500e-01, 7.1000e-05, 1.2590e-03])


Epoch 7/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0368]

tensor([ 0.6173,  0.0041, -0.0413], grad_fn=<SelectBackward0>)
tensor([6.7500e-01, 2.4200e-04, 2.9320e-03])
tensor([ 0.9536, -0.0173, -0.0104], grad_fn=<SelectBackward0>)
tensor([ 9.5000e-01,  2.6200e-04, -4.4790e-03])
tensor([0.3103, 0.0114, 0.0346], grad_fn=<SelectBackward0>)
tensor([2.2500e-01, 2.1900e-04, 4.4020e-03])
tensor([ 0.6031, -0.0530, -0.0132], grad_fn=<SelectBackward0>)
tensor([5.2500e-01, 1.9800e-04, 3.4610e-03])
tensor([ 0.7175, -0.0203, -0.0525], grad_fn=<SelectBackward0>)
tensor([7.2500e-01, 2.1000e-04, 1.7840e-03])
tensor([ 0.5850,  0.0207, -0.0219], grad_fn=<SelectBackward0>)
tensor([6.5000e-01, 4.4000e-04, 4.1280e-03])
tensor([ 0.2716, -0.0120, -0.0141], grad_fn=<SelectBackward0>)
tensor([0.2750, 0.0004, 0.0040])


Epoch 7/500:  59%|█████▉    | 13/22 [00:00<00:00, 122.81it/s, loss=0.00949]

tensor([0.2208, 0.0103, 0.0179], grad_fn=<SelectBackward0>)
tensor([0.3000, 0.0004, 0.0040])
tensor([0.3850, 0.0293, 0.0548], grad_fn=<SelectBackward0>)
tensor([3.7500e-01, 1.0100e-04, 5.3200e-04])
tensor([ 0.9187, -0.0269, -0.0593], grad_fn=<SelectBackward0>)
tensor([8.0000e-01, 1.4700e-04, 1.6100e-04])
tensor([0.4578, 0.1245, 0.0495], grad_fn=<SelectBackward0>)
tensor([0.4250, 0.0004, 0.0021])
tensor([ 0.6728, -0.0174, -0.0151], grad_fn=<SelectBackward0>)
tensor([6.7500e-01, 3.1400e-04, 8.7100e-04])
tensor([ 0.3193, -0.0181, -0.0262], grad_fn=<SelectBackward0>)
tensor([3.2500e-01, 2.2000e-04, 8.5900e-04])
tensor([ 0.3933, -0.0252, -0.0135], grad_fn=<SelectBackward0>)
tensor([ 4.0000e-01,  2.2200e-04, -3.6680e-03])
tensor([0.5249, 0.0207, 0.0085], grad_fn=<SelectBackward0>)
tensor([6.0000e-01, 2.0400e-04, 1.6010e-03])


                                                                           

tensor([ 0.3811, -0.0036, -0.0014], grad_fn=<SelectBackward0>)
tensor([ 4.0000e-01,  2.7400e-04, -3.7390e-03])
tensor([ 0.2445,  0.0672, -0.0010], grad_fn=<SelectBackward0>)
tensor([ 2.0000e-01,  1.5600e-04, -1.3590e-03])
tensor([ 0.2303, -0.0298, -0.0359], grad_fn=<SelectBackward0>)
tensor([ 2.5000e-01,  1.5400e-04, -1.2670e-03])
tensor([0.5690, 0.0118, 0.0092], grad_fn=<SelectBackward0>)
tensor([5.5000e-01, 4.2300e-04, 2.6440e-03])
tensor([ 0.7634, -0.0236, -0.0195])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([ 0.0998,  0.0132, -0.0041])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([0.3219, 0.0278, 0.0314])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.3403, -0.0290, -0.0514])
tensor([ 0.3500,  0.0004, -0.0008])
tensor([ 0.0585, -0.0210, -0.0276])
tensor([ 0.0750,  0.0002, -0.0027])


Epoch 8/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0105]

tensor([ 0.1837, -0.0185,  0.0010], grad_fn=<SelectBackward0>)
tensor([ 0.1750,  0.0002, -0.0008])
tensor([0.8460, 0.0053, 0.0183], grad_fn=<SelectBackward0>)
tensor([ 8.7500e-01,  2.6800e-04, -3.2320e-03])
tensor([ 0.8921, -0.0234,  0.0279], grad_fn=<SelectBackward0>)
tensor([9.0000e-01, 7.8000e-05, 2.5040e-03])
tensor([ 0.5553, -0.0370,  0.0248], grad_fn=<SelectBackward0>)
tensor([5.2500e-01, 3.3700e-04, 2.8270e-03])
tensor([ 0.4156, -0.0443, -0.0242], grad_fn=<SelectBackward0>)
tensor([ 4.0000e-01,  3.5200e-04, -4.8100e-04])


Epoch 8/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0208]

tensor([ 0.8762, -0.0117,  0.0279], grad_fn=<SelectBackward0>)
tensor([8.7500e-01, 1.7600e-04, 3.5970e-03])
tensor([ 0.3561,  0.0134, -0.0175], grad_fn=<SelectBackward0>)
tensor([0.3500, 0.0004, 0.0035])
tensor([ 0.8803,  0.0564, -0.0228], grad_fn=<SelectBackward0>)
tensor([ 9.5000e-01,  9.7000e-05, -1.4830e-03])
tensor([ 7.9672e-02, -5.9197e-05,  2.4455e-02], grad_fn=<SelectBackward0>)
tensor([0.1000, 0.0004, 0.0036])
tensor([ 0.7877, -0.0220, -0.0062], grad_fn=<SelectBackward0>)
tensor([ 7.2500e-01,  2.0500e-04, -3.9570e-03])


Epoch 8/500:  68%|██████▊   | 15/22 [00:00<00:00, 144.89it/s, loss=0.0166]

tensor([ 0.0839,  0.0253, -0.0570], grad_fn=<SelectBackward0>)
tensor([0.0250, 0.0004, 0.0035])
tensor([ 0.6113, -0.0198, -0.0098], grad_fn=<SelectBackward0>)
tensor([6.2500e-01, 3.9800e-04, 1.6330e-03])
tensor([ 0.5861, -0.0072, -0.0330], grad_fn=<SelectBackward0>)
tensor([ 5.5000e-01,  1.4500e-04, -3.8370e-03])
tensor([0.5054, 0.0118, 0.0079], grad_fn=<SelectBackward0>)
tensor([4.5000e-01, 3.2700e-04, 3.3130e-03])
tensor([ 0.3925, -0.0327, -0.0637], grad_fn=<SelectBackward0>)
tensor([0.4250, 0.0004, 0.0021])
tensor([-0.0676, -0.0005, -0.0710], grad_fn=<SelectBackward0>)
tensor([0.0250, 0.0003, 0.0006])
tensor([ 0.2132, -0.0300, -0.0285], grad_fn=<SelectBackward0>)
tensor([1.7500e-01, 5.9000e-05, 4.3780e-03])


                                                                          

tensor([ 0.2191,  0.0263, -0.0301], grad_fn=<SelectBackward0>)
tensor([ 0.2250,  0.0004, -0.0012])
tensor([ 0.5733, -0.0059,  0.0017], grad_fn=<SelectBackward0>)
tensor([7.2500e-01, 1.7600e-04, 4.1510e-03])
tensor([ 0.3454, -0.0116,  0.0274], grad_fn=<SelectBackward0>)
tensor([3.5000e-01, 2.5900e-04, 3.1010e-03])
tensor([0.9116, 0.0371, 0.0295], grad_fn=<SelectBackward0>)
tensor([ 9.2500e-01,  1.4300e-04, -2.1440e-03])
tensor([ 6.9149e-01, -2.6974e-02,  6.7139e-04], grad_fn=<SelectBackward0>)
tensor([ 6.7500e-01,  3.1100e-04, -4.2020e-03])
tensor([0.8993, 0.0047, 0.0059])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([0.0882, 0.0015, 0.0086])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([0.2394, 0.0124, 0.0134])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.2925,  0.0073, -0.0184])
tensor([ 0.3500,  0.0004, -0.0008])
tensor([ 0.0558, -0.0090, -0.0201])
tensor([ 0.0750,  0.0002, -0.0027])


Epoch 9/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0114]

tensor([0.8423, 0.0667, 0.0480], grad_fn=<SelectBackward0>)
tensor([ 8.2500e-01,  2.4200e-04, -3.4680e-03])
tensor([0.9475, 0.0250, 0.0359], grad_fn=<SelectBackward0>)
tensor([8.7500e-01, 5.2000e-05, 1.0080e-03])


Epoch 9/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.00964]

tensor([ 0.8043,  0.0366, -0.0017], grad_fn=<SelectBackward0>)
tensor([8.5000e-01, 4.3700e-04, 1.3110e-03])
tensor([6.8061e-01, 2.4642e-03, 1.3544e-04], grad_fn=<SelectBackward0>)
tensor([7.0000e-01, 3.4700e-04, 3.2390e-03])
tensor([ 0.9128, -0.0086,  0.0122], grad_fn=<SelectBackward0>)
tensor([ 8.7500e-01,  4.3700e-04, -1.6100e-04])
tensor([0.3428, 0.0028, 0.0061], grad_fn=<SelectBackward0>)
tensor([ 0.3000,  0.0004, -0.0022])


Epoch 9/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0126] 

tensor([ 0.2038,  0.0228, -0.0247], grad_fn=<SelectBackward0>)
tensor([ 0.2000,  0.0004, -0.0009])
tensor([ 0.0997, -0.0096,  0.0176], grad_fn=<SelectBackward0>)
tensor([7.5000e-02, 6.0000e-05, 4.2210e-03])
tensor([ 0.0876, -0.0158,  0.0111], grad_fn=<SelectBackward0>)
tensor([0.0500, 0.0004, 0.0037])
tensor([0.1673, 0.0408, 0.0363], grad_fn=<SelectBackward0>)
tensor([0.1500, 0.0003, 0.0032])
tensor([ 0.4248, -0.0168,  0.0177], grad_fn=<SelectBackward0>)
tensor([ 5.2500e-01,  4.2200e-04, -4.6300e-04])


Epoch 9/500:  68%|██████▊   | 15/22 [00:00<00:00, 143.19it/s, loss=0.0139]

tensor([0.1060, 0.0361, 0.0365], grad_fn=<SelectBackward0>)
tensor([1.5000e-01, 6.5000e-05, 3.8150e-03])
tensor([0.1878, 0.0543, 0.0499], grad_fn=<SelectBackward0>)
tensor([1.5000e-01, 7.2000e-05, 5.1200e-04])
tensor([ 0.9660, -0.0420, -0.0311], grad_fn=<SelectBackward0>)
tensor([9.7500e-01, 2.6900e-04, 3.1370e-03])
tensor([ 0.1539,  0.0072, -0.0421], grad_fn=<SelectBackward0>)
tensor([2.0000e-01, 1.5700e-04, 1.0630e-03])
tensor([ 5.1482e-01, -2.2174e-04, -9.1151e-02], grad_fn=<SelectBackward0>)
tensor([ 5.7500e-01,  4.0800e-04, -1.2700e-03])


                                                                          

tensor([ 0.7176, -0.0216, -0.0165], grad_fn=<SelectBackward0>)
tensor([ 7.2500e-01,  2.4700e-04, -3.2900e-04])
tensor([0.5686, 0.0471, 0.0705], grad_fn=<SelectBackward0>)
tensor([6.0000e-01, 3.8800e-04, 3.4260e-03])
tensor([ 0.4164, -0.0494,  0.0138], grad_fn=<SelectBackward0>)
tensor([ 0.3750,  0.0004, -0.0026])
tensor([0.2109, 0.0251, 0.0021], grad_fn=<SelectBackward0>)
tensor([ 0.2250,  0.0004, -0.0038])
tensor([ 0.9426, -0.0219,  0.0959], grad_fn=<SelectBackward0>)
tensor([9.0000e-01, 1.8600e-04, 2.3810e-03])
tensor([ 0.2522,  0.0033, -0.0318], grad_fn=<SelectBackward0>)
tensor([ 2.7500e-01,  8.2000e-05, -2.7010e-03])
tensor([ 0.7457,  0.0128, -0.0348])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([0.1269, 0.0077, 0.0039])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([ 0.2315,  0.0062, -0.0020])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.3238, -0.0196, -0.0364])
tensor([ 0.3500,  0.0004, -0.0008])
tensor([ 0.0779,  0.0122, -0.0370])
tensor([ 0.0750,  0

Epoch 10/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0161]

tensor([0.1584, 0.0125, 0.0365], grad_fn=<SelectBackward0>)
tensor([ 1.5000e-01,  1.1300e-04, -3.3580e-03])
tensor([8.3131e-01, 1.8608e-04, 4.3719e-03], grad_fn=<SelectBackward0>)
tensor([ 8.5000e-01,  1.5900e-04, -2.5980e-03])
tensor([ 5.1437e-01,  1.7963e-02, -7.6890e-06], grad_fn=<SelectBackward0>)
tensor([ 5.2500e-01,  7.6000e-05, -4.4300e-03])
tensor([ 0.6469, -0.0274, -0.0363], grad_fn=<SelectBackward0>)
tensor([ 6.2500e-01,  4.2000e-04, -1.8790e-03])
tensor([0.6433, 0.0024, 0.0273], grad_fn=<SelectBackward0>)
tensor([6.2500e-01, 2.1100e-04, 1.2430e-03])
tensor([0.1716, 0.0398, 0.0354], grad_fn=<SelectBackward0>)
tensor([ 0.1500,  0.0004, -0.0034])


Epoch 10/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.016] 

tensor([ 0.0364, -0.0352,  0.1058], grad_fn=<SelectBackward0>)
tensor([2.5000e-02, 4.4800e-04, 9.1000e-05])
tensor([0.0791, 0.0395, 0.0182], grad_fn=<SelectBackward0>)
tensor([0.1750, 0.0003, 0.0003])


Epoch 10/500:   0%|          | 0/22 [00:00<?, ?it/s, loss=0.0118]

tensor([0.9624, 0.0169, 0.0233], grad_fn=<SelectBackward0>)
tensor([9.7500e-01, 8.1000e-05, 3.5490e-03])
tensor([0.3851, 0.0216, 0.0247], grad_fn=<SelectBackward0>)
tensor([3.2500e-01, 7.1000e-05, 1.2590e-03])
tensor([0.2302, 0.0191, 0.0867], grad_fn=<SelectBackward0>)
tensor([ 3.0000e-01,  1.3400e-04, -3.9200e-03])
tensor([ 0.7020, -0.0402, -0.0162], grad_fn=<SelectBackward0>)
tensor([7.5000e-01, 7.3000e-05, 1.7900e-03])


Epoch 10/500:  64%|██████▎   | 14/22 [00:00<00:00, 129.57it/s, loss=0.0105]

tensor([ 0.4272, -0.0243,  0.0065], grad_fn=<SelectBackward0>)
tensor([ 4.2500e-01,  3.3300e-04, -2.3220e-03])
tensor([0.2182, 0.0076, 0.0166], grad_fn=<SelectBackward0>)
tensor([2.2500e-01, 7.3000e-05, 3.4120e-03])
tensor([-0.0031, -0.0071, -0.0235], grad_fn=<SelectBackward0>)
tensor([ 0.0500,  0.0004, -0.0003])
tensor([ 3.8755e-01, -2.2647e-02,  1.5231e-04], grad_fn=<SelectBackward0>)
tensor([4.2500e-01, 3.5100e-04, 2.2600e-03])


Epoch 10/500:  64%|██████▎   | 14/22 [00:00<00:00, 129.57it/s, loss=0.0278]

tensor([0.9612, 0.0019, 0.0196], grad_fn=<SelectBackward0>)
tensor([ 9.7500e-01,  6.3000e-05, -2.1470e-03])
tensor([ 0.0748, -0.0078, -0.0361], grad_fn=<SelectBackward0>)
tensor([ 1.5000e-01,  7.5000e-05, -2.7210e-03])
tensor([ 0.5033,  0.0418, -0.0089], grad_fn=<SelectBackward0>)
tensor([ 5.2500e-01,  3.1200e-04, -4.1710e-03])
tensor([ 0.4609, -0.0121,  0.0035], grad_fn=<SelectBackward0>)
tensor([4.2500e-01, 4.0300e-04, 2.2430e-03])


                                                                           

tensor([ 0.5502, -0.0362, -0.0274], grad_fn=<SelectBackward0>)
tensor([5.2500e-01, 1.5900e-04, 3.5200e-04])
tensor([ 0.0479, -0.0151, -0.0407], grad_fn=<SelectBackward0>)
tensor([ 5.0000e-02,  9.1000e-05, -1.6940e-03])
tensor([0.8258, 0.0383, 0.0381])
tensor([ 8.5000e-01,  1.0300e-04, -3.3150e-03])
tensor([ 0.1228, -0.0041, -0.0240])
tensor([1.5000e-01, 1.3500e-04, 4.3550e-03])
tensor([ 0.2670, -0.0019,  0.0281])
tensor([3.0000e-01, 1.5900e-04, 1.2170e-03])
tensor([ 0.3301, -0.0188, -0.0440])
tensor([ 0.3500,  0.0004, -0.0008])


KeyboardInterrupt: 