In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
from tqdm import tqdm  # Import tqdm for progress bar

# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Convert datasets to DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

# Extract the data and labels
trainX, trainy = next(iter(train_loader))
testX, testy = next(iter(test_loader))

# Convert labels to numpy arrays (if needed)
trainy = trainy.numpy()
testy = testy.numpy()

# Function to sample a batch
def sample_batch(batch_size, device):
    # Randomly select indices
    indices = torch.randperm(trainX.shape[0])[:batch_size]
    
    # Get the batch of data
    data = trainX[indices].to(device)
    
    # Interpolate the data to 32x32
    data = F.interpolate(data, size=(32, 32), mode='bilinear', align_corners=False)
    
    return data

In [2]:
class DiffusionModel:
    def __init__(self, T: int, model: nn.Module, device: str):
        self.T = T
        self.function_approximator = model  # Corrected variable name
        self.device = device

        self.beta = torch.linspace(0.0001, 0.02, T).to(device)
        self.alpha = 1 - self.beta
        self.alpha_bar = torch.cumprod(self.alpha, dim=0)  # Corrected variable name

    def training(self, batch_size, optimizer):
        x0 = sample_batch(batch_size, self.device)  # Clean Images
        eps = torch.rand_like(x0)  # Noise

        # Corrected: Use torch.long instead of torch.long()
        t = torch.randint(0, self.T, (batch_size,), device=self.device, dtype=torch.long)  # Random timesteps

        # Take one gradient descent step
        alpha_bar_t = self.alpha_bar[t].unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)  # Corrected indexing
        eps_predicted = self.function_approximator(
            torch.sqrt(alpha_bar_t) * x0 + torch.sqrt(1 - alpha_bar_t) * eps, t)
        loss = nn.functional.mse_loss(eps, eps_predicted)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        return loss.item()

    @torch.no_grad()
    def sampling(self, n_samples=1, image_channels=1, img_size=(32, 32), use_tqdm=True):
        x = torch.randn(n_samples, image_channels, img_size[0], img_size[1], device=self.device)  # Random sample

        progress_bar = tqdm if use_tqdm else lambda x: x
        for t in progress_bar(range(self.T, 0, -1)):
            z = torch.randn_like(x) if t > 1 else torch.zeros_like(x)
            t_tensor = torch.ones(n_samples, dtype=torch.long, device=self.device) * t  # Corrected variable name

            beta_t = self.beta[t_tensor - 1].unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
            alpha_t = self.alpha[t_tensor - 1].unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
            alpha_bar_t = self.alpha_bar[t_tensor - 1].unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)

            mean = 1 / torch.sqrt(alpha_t) * (x - ((1 - alpha_t) / torch.sqrt(
                1 - alpha_bar_t)) * self.function_approximator(x, t_tensor - 1))
            sigma = torch.sqrt(beta_t)
            x = mean + sigma * z
        return x


In [4]:
# Define the UNet model (assuming it's defined in unet.py)
from unet import UNet

device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 64
model = UNet().to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
diffusion_model = DiffusionModel(1000, model, device)

# Training
for epoch in tqdm(range(40000)):
    loss = diffusion_model.training(batch_size, optimizer)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

  0%|          | 1/40000 [00:00<6:03:25,  1.83it/s]

Epoch 0, Loss: 0.30997204780578613


  0%|          | 101/40000 [00:29<3:09:31,  3.51it/s]

Epoch 100, Loss: 0.022970618680119514


  1%|          | 201/40000 [00:57<3:09:13,  3.51it/s]

Epoch 200, Loss: 0.01759479194879532


  1%|          | 301/40000 [01:26<3:09:36,  3.49it/s]

Epoch 300, Loss: 0.012081791646778584


  1%|          | 401/40000 [01:54<3:08:12,  3.51it/s]

Epoch 400, Loss: 0.013527114875614643


  1%|▏         | 501/40000 [02:23<3:07:55,  3.50it/s]

Epoch 500, Loss: 0.008489023894071579


  2%|▏         | 601/40000 [02:51<3:07:21,  3.50it/s]

Epoch 600, Loss: 0.005259196739643812


  2%|▏         | 701/40000 [03:20<3:08:10,  3.48it/s]

Epoch 700, Loss: 0.00896139070391655


  2%|▏         | 801/40000 [03:49<3:06:18,  3.51it/s]

Epoch 800, Loss: 0.006065023131668568


  2%|▏         | 901/40000 [04:17<3:05:38,  3.51it/s]

Epoch 900, Loss: 0.004726409912109375


  3%|▎         | 1001/40000 [04:46<3:04:54,  3.52it/s]

Epoch 1000, Loss: 0.005042865872383118


  3%|▎         | 1101/40000 [05:14<3:05:08,  3.50it/s]

Epoch 1100, Loss: 0.004475055728107691


  3%|▎         | 1201/40000 [05:43<3:04:23,  3.51it/s]

Epoch 1200, Loss: 0.004907011985778809


  3%|▎         | 1301/40000 [06:11<3:04:00,  3.51it/s]

Epoch 1300, Loss: 0.005500252358615398


  4%|▎         | 1401/40000 [06:40<3:03:24,  3.51it/s]

Epoch 1400, Loss: 0.005866673309355974


  4%|▍         | 1501/40000 [07:08<3:03:25,  3.50it/s]

Epoch 1500, Loss: 0.004579097498208284


  4%|▍         | 1601/40000 [07:37<3:03:29,  3.49it/s]

Epoch 1600, Loss: 0.003147333860397339


  4%|▍         | 1701/40000 [08:05<3:02:17,  3.50it/s]

Epoch 1700, Loss: 0.003612529020756483


  5%|▍         | 1801/40000 [08:34<3:01:34,  3.51it/s]

Epoch 1800, Loss: 0.003053942695260048


  5%|▍         | 1901/40000 [09:02<3:01:11,  3.50it/s]

Epoch 1900, Loss: 0.002923821099102497


  5%|▌         | 2001/40000 [09:31<3:01:56,  3.48it/s]

Epoch 2000, Loss: 0.003610360436141491


  5%|▌         | 2101/40000 [10:00<3:00:09,  3.51it/s]

Epoch 2100, Loss: 0.002707339823246002


  6%|▌         | 2201/40000 [10:28<2:59:51,  3.50it/s]

Epoch 2200, Loss: 0.004389435052871704


  6%|▌         | 2301/40000 [10:57<2:59:09,  3.51it/s]

Epoch 2300, Loss: 0.0038538845255970955


  6%|▌         | 2401/40000 [11:25<2:59:49,  3.48it/s]

Epoch 2400, Loss: 0.0025737956166267395


  6%|▋         | 2501/40000 [11:54<2:58:21,  3.50it/s]

Epoch 2500, Loss: 0.002858712337911129


  7%|▋         | 2601/40000 [12:22<2:57:28,  3.51it/s]

Epoch 2600, Loss: 0.0026122734416276217


  7%|▋         | 2701/40000 [12:51<2:57:10,  3.51it/s]

Epoch 2700, Loss: 0.0033438419923186302


  7%|▋         | 2801/40000 [13:19<2:57:49,  3.49it/s]

Epoch 2800, Loss: 0.003498705802485347


  7%|▋         | 2901/40000 [13:48<2:56:20,  3.51it/s]

Epoch 2900, Loss: 0.0021552471444010735


  8%|▊         | 3001/40000 [14:17<2:55:45,  3.51it/s]

Epoch 3000, Loss: 0.003231316339224577


  8%|▊         | 3101/40000 [14:45<2:55:29,  3.50it/s]

Epoch 3100, Loss: 0.002885356778278947


  8%|▊         | 3201/40000 [15:14<2:55:28,  3.50it/s]

Epoch 3200, Loss: 0.003639606758952141


  8%|▊         | 3301/40000 [15:42<2:55:21,  3.49it/s]

Epoch 3300, Loss: 0.0023367328103631735


  9%|▊         | 3401/40000 [16:11<2:54:07,  3.50it/s]

Epoch 3400, Loss: 0.003985877148807049


  9%|▉         | 3501/40000 [16:39<2:53:24,  3.51it/s]

Epoch 3500, Loss: 0.0025132393930107355


  9%|▉         | 3601/40000 [17:08<2:53:07,  3.50it/s]

Epoch 3600, Loss: 0.002697772579267621


  9%|▉         | 3701/40000 [17:37<2:53:36,  3.48it/s]

Epoch 3700, Loss: 0.0022434222046285868


 10%|▉         | 3801/40000 [18:05<2:51:54,  3.51it/s]

Epoch 3800, Loss: 0.0025843100156635046


 10%|▉         | 3901/40000 [18:34<2:51:31,  3.51it/s]

Epoch 3900, Loss: 0.0021087881177663803


 10%|█         | 4001/40000 [19:02<2:51:04,  3.51it/s]

Epoch 4000, Loss: 0.0021203760989010334


 10%|█         | 4101/40000 [19:31<2:51:33,  3.49it/s]

Epoch 4100, Loss: 0.002163032768294215


 11%|█         | 4201/40000 [19:59<2:50:30,  3.50it/s]

Epoch 4200, Loss: 0.003541953396052122


 11%|█         | 4301/40000 [20:28<2:49:39,  3.51it/s]

Epoch 4300, Loss: 0.0029879040084779263


 11%|█         | 4401/40000 [20:56<2:49:25,  3.50it/s]

Epoch 4400, Loss: 0.002650376409292221


 11%|█▏        | 4501/40000 [21:25<2:49:45,  3.49it/s]

Epoch 4500, Loss: 0.002949169371277094


 12%|█▏        | 4601/40000 [21:54<2:48:18,  3.51it/s]

Epoch 4600, Loss: 0.0027883690781891346


 12%|█▏        | 4701/40000 [22:22<2:47:44,  3.51it/s]

Epoch 4700, Loss: 0.003151970449835062


 12%|█▏        | 4801/40000 [22:51<2:47:14,  3.51it/s]

Epoch 4800, Loss: 0.0030222423374652863


 12%|█▏        | 4901/40000 [23:19<2:47:46,  3.49it/s]

Epoch 4900, Loss: 0.0033233067952096462


 13%|█▎        | 5001/40000 [23:48<2:46:23,  3.51it/s]

Epoch 5000, Loss: 0.0024319656658917665


 13%|█▎        | 5101/40000 [24:16<2:45:35,  3.51it/s]

Epoch 5100, Loss: 0.00227846996858716


 13%|█▎        | 5201/40000 [24:45<2:45:17,  3.51it/s]

Epoch 5200, Loss: 0.0017919789534062147


 13%|█▎        | 5301/40000 [25:13<2:45:14,  3.50it/s]

Epoch 5300, Loss: 0.002134158043190837


 14%|█▎        | 5401/40000 [25:42<2:45:38,  3.48it/s]

Epoch 5400, Loss: 0.0020773899741470814


 14%|█▍        | 5501/40000 [26:11<2:43:49,  3.51it/s]

Epoch 5500, Loss: 0.002483947668224573


 14%|█▍        | 5601/40000 [26:39<2:43:29,  3.51it/s]

Epoch 5600, Loss: 0.0019916933961212635


 14%|█▍        | 5701/40000 [27:08<2:43:04,  3.51it/s]

Epoch 5700, Loss: 0.003265746869146824


 15%|█▍        | 5801/40000 [27:36<2:43:37,  3.48it/s]

Epoch 5800, Loss: 0.0015413683140650392


 15%|█▍        | 5901/40000 [28:05<2:42:15,  3.50it/s]

Epoch 5900, Loss: 0.0017008241266012192


 15%|█▌        | 6001/40000 [28:33<2:41:33,  3.51it/s]

Epoch 6000, Loss: 0.002383158542215824


 15%|█▌        | 6101/40000 [29:02<2:40:59,  3.51it/s]

Epoch 6100, Loss: 0.002503438387066126


 16%|█▌        | 6201/40000 [29:30<2:41:20,  3.49it/s]

Epoch 6200, Loss: 0.00163079216144979


 16%|█▌        | 6301/40000 [29:59<2:40:10,  3.51it/s]

Epoch 6300, Loss: 0.001914815860800445


 16%|█▌        | 6401/40000 [30:28<2:39:43,  3.51it/s]

Epoch 6400, Loss: 0.0021953447721898556


 16%|█▋        | 6501/40000 [30:56<2:39:14,  3.51it/s]

Epoch 6500, Loss: 0.0016103899106383324


 17%|█▋        | 6601/40000 [31:25<2:39:56,  3.48it/s]

Epoch 6600, Loss: 0.0023317502345889807


 17%|█▋        | 6701/40000 [31:53<2:38:20,  3.50it/s]

Epoch 6700, Loss: 0.001722351647913456


 17%|█▋        | 6801/40000 [32:22<2:37:39,  3.51it/s]

Epoch 6800, Loss: 0.002320373198017478


 17%|█▋        | 6901/40000 [32:50<2:37:20,  3.51it/s]

Epoch 6900, Loss: 0.0024949645157903433


 18%|█▊        | 7001/40000 [33:19<2:37:35,  3.49it/s]

Epoch 7000, Loss: 0.0021484503522515297


 18%|█▊        | 7101/40000 [33:48<2:36:19,  3.51it/s]

Epoch 7100, Loss: 0.0024776607751846313


 18%|█▊        | 7201/40000 [34:16<2:35:56,  3.51it/s]

Epoch 7200, Loss: 0.001847767038270831


 18%|█▊        | 7301/40000 [34:45<2:35:18,  3.51it/s]

Epoch 7300, Loss: 0.003087929915636778


 19%|█▊        | 7401/40000 [35:13<2:34:54,  3.51it/s]

Epoch 7400, Loss: 0.0019145696423947811


 19%|█▉        | 7501/40000 [35:42<2:35:21,  3.49it/s]

Epoch 7500, Loss: 0.0026008691638708115


 19%|█▉        | 7601/40000 [36:10<2:33:58,  3.51it/s]

Epoch 7600, Loss: 0.0013678737450391054


 19%|█▉        | 7701/40000 [36:39<2:33:26,  3.51it/s]

Epoch 7700, Loss: 0.0012887767516076565


 20%|█▉        | 7801/40000 [37:07<2:32:53,  3.51it/s]

Epoch 7800, Loss: 0.0014804329257458448


 20%|█▉        | 7901/40000 [37:36<2:33:36,  3.48it/s]

Epoch 7900, Loss: 0.002089393325150013


 20%|██        | 8001/40000 [38:04<2:31:56,  3.51it/s]

Epoch 8000, Loss: 0.0028217611834406853


 20%|██        | 8101/40000 [38:33<2:31:26,  3.51it/s]

Epoch 8100, Loss: 0.001777334720827639


 21%|██        | 8201/40000 [39:01<2:31:19,  3.50it/s]

Epoch 8200, Loss: 0.0012659095227718353


 21%|██        | 8301/40000 [39:30<2:31:47,  3.48it/s]

Epoch 8300, Loss: 0.002241535112261772


 21%|██        | 8401/40000 [39:59<2:30:04,  3.51it/s]

Epoch 8400, Loss: 0.0016264839796349406


 21%|██▏       | 8501/40000 [40:27<2:29:33,  3.51it/s]

Epoch 8500, Loss: 0.002018118277192116


 22%|██▏       | 8601/40000 [40:56<2:29:15,  3.51it/s]

Epoch 8600, Loss: 0.001452352269552648


 22%|██▏       | 8701/40000 [41:24<2:29:35,  3.49it/s]

Epoch 8700, Loss: 0.0017675074050202966


 22%|██▏       | 8801/40000 [41:53<2:28:20,  3.51it/s]

Epoch 8800, Loss: 0.0018121080938726664


 22%|██▏       | 8901/40000 [42:21<2:27:39,  3.51it/s]

Epoch 8900, Loss: 0.002235447522252798


 23%|██▎       | 9001/40000 [42:50<2:27:15,  3.51it/s]

Epoch 9000, Loss: 0.0020265195053070784


 23%|██▎       | 9101/40000 [43:18<2:27:27,  3.49it/s]

Epoch 9100, Loss: 0.0020784297958016396


 23%|██▎       | 9201/40000 [43:47<2:26:22,  3.51it/s]

Epoch 9200, Loss: 0.0017607774352654815


 23%|██▎       | 9301/40000 [44:16<2:25:51,  3.51it/s]

Epoch 9300, Loss: 0.0018873726949095726


 24%|██▎       | 9401/40000 [44:44<2:25:27,  3.51it/s]

Epoch 9400, Loss: 0.002542879432439804


 24%|██▍       | 9501/40000 [45:13<2:24:53,  3.51it/s]

Epoch 9500, Loss: 0.0016013466520234942


 24%|██▍       | 9601/40000 [45:41<2:25:23,  3.48it/s]

Epoch 9600, Loss: 0.0021117727737873793


 24%|██▍       | 9701/40000 [46:10<2:23:53,  3.51it/s]

Epoch 9700, Loss: 0.002490617800503969


 25%|██▍       | 9801/40000 [46:38<2:23:43,  3.50it/s]

Epoch 9800, Loss: 0.0016099221538752317


 25%|██▍       | 9901/40000 [47:07<2:23:14,  3.50it/s]

Epoch 9900, Loss: 0.0011326964013278484


 25%|██▌       | 10001/40000 [47:36<2:23:15,  3.49it/s]

Epoch 10000, Loss: 0.0016105282120406628


 25%|██▌       | 10101/40000 [48:04<2:22:17,  3.50it/s]

Epoch 10100, Loss: 0.0021082069724798203


 26%|██▌       | 10201/40000 [48:33<2:21:26,  3.51it/s]

Epoch 10200, Loss: 0.0019859408494085073


 26%|██▌       | 10301/40000 [49:01<2:21:08,  3.51it/s]

Epoch 10300, Loss: 0.0028672614134848118


 26%|██▌       | 10401/40000 [49:30<2:21:35,  3.48it/s]

Epoch 10400, Loss: 0.0015221042558550835


 26%|██▋       | 10501/40000 [49:58<2:20:18,  3.50it/s]

Epoch 10500, Loss: 0.0022600055672228336


 27%|██▋       | 10601/40000 [50:27<2:19:40,  3.51it/s]

Epoch 10600, Loss: 0.0020698122680187225


 27%|██▋       | 10701/40000 [50:55<2:19:13,  3.51it/s]

Epoch 10700, Loss: 0.0013610003516077995


 27%|██▋       | 10801/40000 [51:24<2:19:38,  3.48it/s]

Epoch 10800, Loss: 0.002159397816285491


 27%|██▋       | 10901/40000 [51:53<2:18:19,  3.51it/s]

Epoch 10900, Loss: 0.0018706070259213448


 28%|██▊       | 11001/40000 [52:21<2:18:01,  3.50it/s]

Epoch 11000, Loss: 0.001463482272811234


 28%|██▊       | 11101/40000 [52:50<2:17:26,  3.50it/s]

Epoch 11100, Loss: 0.0020529264584183693


 28%|██▊       | 11201/40000 [53:18<2:17:49,  3.48it/s]

Epoch 11200, Loss: 0.0016801069723442197


 28%|██▊       | 11301/40000 [53:47<2:16:24,  3.51it/s]

Epoch 11300, Loss: 0.0020073391497135162


 29%|██▊       | 11401/40000 [54:15<2:15:49,  3.51it/s]

Epoch 11400, Loss: 0.0016806446947157383


 29%|██▉       | 11501/40000 [54:44<2:15:24,  3.51it/s]

Epoch 11500, Loss: 0.0015777882654219866


 29%|██▉       | 11601/40000 [55:12<2:14:48,  3.51it/s]

Epoch 11600, Loss: 0.00160171277821064


 29%|██▉       | 11701/40000 [55:41<2:15:16,  3.49it/s]

Epoch 11700, Loss: 0.0021042623557150364


 30%|██▉       | 11801/40000 [56:10<2:13:55,  3.51it/s]

Epoch 11800, Loss: 0.0014493903145194054


 30%|██▉       | 11901/40000 [56:38<2:13:31,  3.51it/s]

Epoch 11900, Loss: 0.0019769002683460712


 30%|███       | 12001/40000 [57:07<2:13:13,  3.50it/s]

Epoch 12000, Loss: 0.0016242197016254067


 30%|███       | 12101/40000 [57:35<2:13:25,  3.48it/s]

Epoch 12100, Loss: 0.001468147151172161


 31%|███       | 12201/40000 [58:04<2:12:08,  3.51it/s]

Epoch 12200, Loss: 0.0019911304116249084


 31%|███       | 12301/40000 [58:32<2:11:56,  3.50it/s]

Epoch 12300, Loss: 0.0011070300824940205


 31%|███       | 12401/40000 [59:01<2:11:04,  3.51it/s]

Epoch 12400, Loss: 0.001181868021376431


 31%|███▏      | 12501/40000 [59:29<2:11:26,  3.49it/s]

Epoch 12500, Loss: 0.0015809220494702458


 32%|███▏      | 12601/40000 [59:58<2:10:08,  3.51it/s]

Epoch 12600, Loss: 0.001359971472993493


 32%|███▏      | 12701/40000 [1:00:27<2:09:39,  3.51it/s]

Epoch 12700, Loss: 0.0016481275670230389


 32%|███▏      | 12801/40000 [1:00:55<2:09:14,  3.51it/s]

Epoch 12800, Loss: 0.001562468591146171


 32%|███▏      | 12901/40000 [1:01:24<2:09:40,  3.48it/s]

Epoch 12900, Loss: 0.0019229301251471043


 33%|███▎      | 13001/40000 [1:01:52<2:08:13,  3.51it/s]

Epoch 13000, Loss: 0.0014918068191036582


 33%|███▎      | 13101/40000 [1:02:21<2:07:49,  3.51it/s]

Epoch 13100, Loss: 0.0016964514506980777


 33%|███▎      | 13201/40000 [1:02:49<2:07:29,  3.50it/s]

Epoch 13200, Loss: 0.0013316739350557327


 33%|███▎      | 13301/40000 [1:03:18<2:07:46,  3.48it/s]

Epoch 13300, Loss: 0.0015424243174493313


 34%|███▎      | 13401/40000 [1:03:47<2:06:23,  3.51it/s]

Epoch 13400, Loss: 0.0016061203787103295


 34%|███▍      | 13501/40000 [1:04:15<2:05:54,  3.51it/s]

Epoch 13500, Loss: 0.0017446568235754967


 34%|███▍      | 13601/40000 [1:04:44<2:05:27,  3.51it/s]

Epoch 13600, Loss: 0.002020919695496559


 34%|███▍      | 13701/40000 [1:05:12<2:04:59,  3.51it/s]

Epoch 13700, Loss: 0.0022150021977722645


 35%|███▍      | 13801/40000 [1:05:41<2:05:22,  3.48it/s]

Epoch 13800, Loss: 0.0011094518704339862


 35%|███▍      | 13901/40000 [1:06:09<2:03:57,  3.51it/s]

Epoch 13900, Loss: 0.002379655372351408


 35%|███▌      | 14001/40000 [1:06:38<2:03:36,  3.51it/s]

Epoch 14000, Loss: 0.0019288661424070597


 35%|███▌      | 14101/40000 [1:07:06<2:03:06,  3.51it/s]

Epoch 14100, Loss: 0.0011181578738614917


 36%|███▌      | 14201/40000 [1:07:35<2:03:30,  3.48it/s]

Epoch 14200, Loss: 0.0011027480941265821


 36%|███▌      | 14301/40000 [1:08:03<2:02:03,  3.51it/s]

Epoch 14300, Loss: 0.0013538163620978594


 36%|███▌      | 14401/40000 [1:08:32<2:01:34,  3.51it/s]

Epoch 14400, Loss: 0.0015446116449311376


 36%|███▋      | 14501/40000 [1:09:00<2:01:04,  3.51it/s]

Epoch 14500, Loss: 0.0011649143416434526


 37%|███▋      | 14601/40000 [1:09:29<2:01:36,  3.48it/s]

Epoch 14600, Loss: 0.0019972759764641523


 37%|███▋      | 14701/40000 [1:09:58<2:00:14,  3.51it/s]

Epoch 14700, Loss: 0.0013042811769992113


 37%|███▋      | 14801/40000 [1:10:26<1:59:38,  3.51it/s]

Epoch 14800, Loss: 0.0018257144838571548


 37%|███▋      | 14901/40000 [1:10:55<1:59:18,  3.51it/s]

Epoch 14900, Loss: 0.0019003708148375154


 38%|███▊      | 15001/40000 [1:11:23<1:59:21,  3.49it/s]

Epoch 15000, Loss: 0.0024001500569283962


 38%|███▊      | 15101/40000 [1:11:52<1:58:27,  3.50it/s]

Epoch 15100, Loss: 0.0014106174930930138


 38%|███▊      | 15201/40000 [1:12:20<1:57:42,  3.51it/s]

Epoch 15200, Loss: 0.001364637166261673


 38%|███▊      | 15301/40000 [1:12:49<1:57:25,  3.51it/s]

Epoch 15300, Loss: 0.0015390547923743725


 39%|███▊      | 15401/40000 [1:13:17<1:57:45,  3.48it/s]

Epoch 15400, Loss: 0.0018738892395049334


 39%|███▉      | 15501/40000 [1:13:46<1:56:29,  3.51it/s]

Epoch 15500, Loss: 0.0016284891171380877


 39%|███▉      | 15601/40000 [1:14:15<1:55:51,  3.51it/s]

Epoch 15600, Loss: 0.002532106591388583


 39%|███▉      | 15701/40000 [1:14:43<1:55:26,  3.51it/s]

Epoch 15700, Loss: 0.0014944340800866485


 40%|███▉      | 15801/40000 [1:15:12<1:55:07,  3.50it/s]

Epoch 15800, Loss: 0.0016343883471563458


 40%|███▉      | 15901/40000 [1:15:40<1:55:21,  3.48it/s]

Epoch 15900, Loss: 0.0011564225424081087


 40%|████      | 16001/40000 [1:16:09<1:54:02,  3.51it/s]

Epoch 16000, Loss: 0.0013674604706466198


 40%|████      | 16101/40000 [1:16:37<1:53:38,  3.50it/s]

Epoch 16100, Loss: 0.001959521323442459


 41%|████      | 16201/40000 [1:17:06<1:53:10,  3.50it/s]

Epoch 16200, Loss: 0.001439843443222344


 41%|████      | 16301/40000 [1:17:35<1:53:15,  3.49it/s]

Epoch 16300, Loss: 0.0008899945532903075


 41%|████      | 16401/40000 [1:18:03<1:52:16,  3.50it/s]

Epoch 16400, Loss: 0.00159858213737607


 41%|████▏     | 16501/40000 [1:18:32<1:51:40,  3.51it/s]

Epoch 16500, Loss: 0.0011252481490373611


 42%|████▏     | 16601/40000 [1:19:00<1:51:09,  3.51it/s]

Epoch 16600, Loss: 0.0011198313441127539


 42%|████▏     | 16701/40000 [1:19:29<1:51:36,  3.48it/s]

Epoch 16700, Loss: 0.0016673016361892223


 42%|████▏     | 16801/40000 [1:19:57<1:50:11,  3.51it/s]

Epoch 16800, Loss: 0.0021061147563159466


 42%|████▏     | 16901/40000 [1:20:26<1:49:42,  3.51it/s]

Epoch 16900, Loss: 0.001559103257022798


 43%|████▎     | 17001/40000 [1:20:54<1:49:15,  3.51it/s]

Epoch 17000, Loss: 0.0019121146760880947


 43%|████▎     | 17101/40000 [1:21:23<1:49:36,  3.48it/s]

Epoch 17100, Loss: 0.0014566443860530853


 43%|████▎     | 17201/40000 [1:21:52<1:48:17,  3.51it/s]

Epoch 17200, Loss: 0.0018234821036458015


 43%|████▎     | 17301/40000 [1:22:20<1:48:06,  3.50it/s]

Epoch 17300, Loss: 0.001242285594344139


 44%|████▎     | 17401/40000 [1:22:49<1:47:16,  3.51it/s]

Epoch 17400, Loss: 0.0016564044635742903


 44%|████▍     | 17501/40000 [1:23:17<1:47:29,  3.49it/s]

Epoch 17500, Loss: 0.0013110239524394274


 44%|████▍     | 17601/40000 [1:23:46<1:46:21,  3.51it/s]

Epoch 17600, Loss: 0.0011362425284460187


 44%|████▍     | 17701/40000 [1:24:14<1:45:59,  3.51it/s]

Epoch 17700, Loss: 0.001255167881026864


 45%|████▍     | 17801/40000 [1:24:43<1:45:25,  3.51it/s]

Epoch 17800, Loss: 0.0014657019637525082


 45%|████▍     | 17901/40000 [1:25:29<1:45:41,  3.49it/s]

Epoch 17900, Loss: 0.001977176871150732


 45%|████▌     | 18001/40000 [1:25:58<1:44:30,  3.51it/s]

Epoch 18000, Loss: 0.0014464861014857888


 45%|████▌     | 18101/40000 [1:26:26<1:44:02,  3.51it/s]

Epoch 18100, Loss: 0.001149771735072136


 46%|████▌     | 18201/40000 [1:26:55<1:43:44,  3.50it/s]

Epoch 18200, Loss: 0.0010173311457037926


 46%|████▌     | 18301/40000 [1:27:23<1:43:47,  3.48it/s]

Epoch 18300, Loss: 0.0016957720508798957


 46%|████▌     | 18401/40000 [1:27:52<1:42:40,  3.51it/s]

Epoch 18400, Loss: 0.0014006143901497126


 46%|████▋     | 18501/40000 [1:28:21<1:42:05,  3.51it/s]

Epoch 18500, Loss: 0.001171874231658876


 47%|████▋     | 18601/40000 [1:28:49<1:41:44,  3.51it/s]

Epoch 18600, Loss: 0.0016418431187048554


 47%|████▋     | 18701/40000 [1:29:18<1:41:43,  3.49it/s]

Epoch 18700, Loss: 0.0019392493413761258


 47%|████▋     | 18801/40000 [1:29:46<1:40:43,  3.51it/s]

Epoch 18800, Loss: 0.001588213606737554


 47%|████▋     | 18901/40000 [1:30:15<1:40:14,  3.51it/s]

Epoch 18900, Loss: 0.0019378821598365903


 48%|████▊     | 19001/40000 [1:30:43<1:39:54,  3.50it/s]

Epoch 19000, Loss: 0.00087659468408674


 48%|████▊     | 19101/40000 [1:31:12<1:39:17,  3.51it/s]

Epoch 19100, Loss: 0.0016342353774234653


 48%|████▊     | 19201/40000 [1:31:41<1:39:32,  3.48it/s]

Epoch 19200, Loss: 0.0021289438009262085


 48%|████▊     | 19301/40000 [1:32:09<1:38:24,  3.51it/s]

Epoch 19300, Loss: 0.0011978578986600041


 49%|████▊     | 19401/40000 [1:32:38<1:37:56,  3.51it/s]

Epoch 19400, Loss: 0.0015903245657682419


 49%|████▉     | 19501/40000 [1:33:06<1:37:18,  3.51it/s]

Epoch 19500, Loss: 0.0017769646365195513


 49%|████▉     | 19601/40000 [1:33:35<1:37:45,  3.48it/s]

Epoch 19600, Loss: 0.0015049346257001162


 49%|████▉     | 19701/40000 [1:34:03<1:36:26,  3.51it/s]

Epoch 19700, Loss: 0.001630238490179181


 50%|████▉     | 19801/40000 [1:34:32<1:35:53,  3.51it/s]

Epoch 19800, Loss: 0.001664498122408986


 50%|████▉     | 19901/40000 [1:35:00<1:35:32,  3.51it/s]

Epoch 19900, Loss: 0.0020038369111716747


 50%|█████     | 20001/40000 [1:35:29<1:35:42,  3.48it/s]

Epoch 20000, Loss: 0.0017194298561662436


 50%|█████     | 20101/40000 [1:35:58<1:34:28,  3.51it/s]

Epoch 20100, Loss: 0.0009411947103217244


 51%|█████     | 20201/40000 [1:36:26<1:34:11,  3.50it/s]

Epoch 20200, Loss: 0.0012269330909475684


 51%|█████     | 20301/40000 [1:36:55<1:33:39,  3.51it/s]

Epoch 20300, Loss: 0.002232104539871216


 51%|█████     | 20401/40000 [1:37:23<1:33:32,  3.49it/s]

Epoch 20400, Loss: 0.001234214985743165


 51%|█████▏    | 20501/40000 [1:37:52<1:32:39,  3.51it/s]

Epoch 20500, Loss: 0.0013802186585962772


 52%|█████▏    | 20601/40000 [1:38:20<1:32:09,  3.51it/s]

Epoch 20600, Loss: 0.0019173887558281422


 52%|█████▏    | 20701/40000 [1:38:49<1:31:41,  3.51it/s]

Epoch 20700, Loss: 0.001469723880290985


 52%|█████▏    | 20801/40000 [1:39:17<1:31:40,  3.49it/s]

Epoch 20800, Loss: 0.001497363904491067


 52%|█████▏    | 20901/40000 [1:39:46<1:30:44,  3.51it/s]

Epoch 20900, Loss: 0.0016300240531563759


 53%|█████▎    | 21001/40000 [1:40:15<1:30:14,  3.51it/s]

Epoch 21000, Loss: 0.0014461041428148746


 53%|█████▎    | 21101/40000 [1:40:43<1:29:48,  3.51it/s]

Epoch 21100, Loss: 0.0017628025962039828


 53%|█████▎    | 21201/40000 [1:41:12<1:29:14,  3.51it/s]

Epoch 21200, Loss: 0.0018165516667068005


 53%|█████▎    | 21301/40000 [1:41:40<1:29:28,  3.48it/s]

Epoch 21300, Loss: 0.0009912471286952496


 54%|█████▎    | 21401/40000 [1:42:09<1:28:23,  3.51it/s]

Epoch 21400, Loss: 0.0013434800785034895


 54%|█████▍    | 21501/40000 [1:42:37<1:27:57,  3.51it/s]

Epoch 21500, Loss: 0.0021807961165905


 54%|█████▍    | 21601/40000 [1:43:06<1:27:24,  3.51it/s]

Epoch 21600, Loss: 0.0019504063529893756


 54%|█████▍    | 21701/40000 [1:43:34<1:27:30,  3.49it/s]

Epoch 21700, Loss: 0.0017376539763063192


 55%|█████▍    | 21801/40000 [1:44:03<1:26:22,  3.51it/s]

Epoch 21800, Loss: 0.001111822435632348


 55%|█████▍    | 21901/40000 [1:44:32<1:25:58,  3.51it/s]

Epoch 21900, Loss: 0.0013191411271691322


 55%|█████▌    | 22001/40000 [1:45:00<1:25:29,  3.51it/s]

Epoch 22000, Loss: 0.0014405969996005297


 55%|█████▌    | 22101/40000 [1:45:29<1:25:46,  3.48it/s]

Epoch 22100, Loss: 0.0012275897897779942


 56%|█████▌    | 22201/40000 [1:45:57<1:24:46,  3.50it/s]

Epoch 22200, Loss: 0.0017498049419373274


 56%|█████▌    | 22301/40000 [1:46:26<1:24:04,  3.51it/s]

Epoch 22300, Loss: 0.001754317432641983


 56%|█████▌    | 22401/40000 [1:46:54<1:23:40,  3.51it/s]

Epoch 22400, Loss: 0.00121794524602592


 56%|█████▋    | 22501/40000 [1:47:23<1:23:40,  3.49it/s]

Epoch 22500, Loss: 0.0018497875425964594


 57%|█████▋    | 22601/40000 [1:47:51<1:22:46,  3.50it/s]

Epoch 22600, Loss: 0.0014052367769181728


 57%|█████▋    | 22701/40000 [1:48:20<1:22:12,  3.51it/s]

Epoch 22700, Loss: 0.001413899939507246


 57%|█████▋    | 22801/40000 [1:48:48<1:21:41,  3.51it/s]

Epoch 22800, Loss: 0.001681218622252345


 57%|█████▋    | 22901/40000 [1:49:17<1:21:55,  3.48it/s]

Epoch 22900, Loss: 0.0013219816610217094


 58%|█████▊    | 23001/40000 [1:49:46<1:20:44,  3.51it/s]

Epoch 23000, Loss: 0.0015097132418304682


 58%|█████▊    | 23101/40000 [1:50:14<1:20:10,  3.51it/s]

Epoch 23100, Loss: 0.0012888398487120867


 58%|█████▊    | 23201/40000 [1:50:43<1:19:55,  3.50it/s]

Epoch 23200, Loss: 0.0017160703428089619


 58%|█████▊    | 23301/40000 [1:51:11<1:19:20,  3.51it/s]

Epoch 23300, Loss: 0.0025289456825703382


 59%|█████▊    | 23401/40000 [1:51:40<1:19:35,  3.48it/s]

Epoch 23400, Loss: 0.0015932517126202583


 59%|█████▉    | 23501/40000 [1:52:09<1:18:26,  3.51it/s]

Epoch 23500, Loss: 0.0016237417003139853


 59%|█████▉    | 23601/40000 [1:52:37<1:17:51,  3.51it/s]

Epoch 23600, Loss: 0.0012928396463394165


 59%|█████▉    | 23701/40000 [1:53:06<1:17:28,  3.51it/s]

Epoch 23700, Loss: 0.0014347682008519769


 60%|█████▉    | 23801/40000 [1:53:34<1:17:21,  3.49it/s]

Epoch 23800, Loss: 0.0015286754351109266


 60%|█████▉    | 23901/40000 [1:54:03<1:16:29,  3.51it/s]

Epoch 23900, Loss: 0.0020624054595828056


 60%|██████    | 24001/40000 [1:54:31<1:16:11,  3.50it/s]

Epoch 24000, Loss: 0.0015200329944491386


 60%|██████    | 24101/40000 [1:55:00<1:15:28,  3.51it/s]

Epoch 24100, Loss: 0.0017662255559116602


 61%|██████    | 24201/40000 [1:55:28<1:15:32,  3.49it/s]

Epoch 24200, Loss: 0.001342378556728363


 61%|██████    | 24301/40000 [1:55:57<1:14:33,  3.51it/s]

Epoch 24300, Loss: 0.0009608555119484663


 61%|██████    | 24401/40000 [1:56:25<1:14:05,  3.51it/s]

Epoch 24400, Loss: 0.0010934550082311034


 61%|██████▏   | 24501/40000 [1:56:54<1:13:41,  3.51it/s]

Epoch 24500, Loss: 0.0014232499524950981


 62%|██████▏   | 24601/40000 [1:57:23<1:13:59,  3.47it/s]

Epoch 24600, Loss: 0.0017176634864881635


 62%|██████▏   | 24701/40000 [1:57:51<1:12:43,  3.51it/s]

Epoch 24700, Loss: 0.0013791244709864259


 62%|██████▏   | 24801/40000 [1:58:20<1:12:17,  3.50it/s]

Epoch 24800, Loss: 0.0017219171859323978


 62%|██████▏   | 24901/40000 [1:58:48<1:11:40,  3.51it/s]

Epoch 24900, Loss: 0.0012712657917290926


 63%|██████▎   | 25001/40000 [1:59:17<1:11:44,  3.48it/s]

Epoch 25000, Loss: 0.0018554581329226494


 63%|██████▎   | 25101/40000 [1:59:45<1:10:53,  3.50it/s]

Epoch 25100, Loss: 0.0011229143710806966


 63%|██████▎   | 25201/40000 [2:00:14<1:10:16,  3.51it/s]

Epoch 25200, Loss: 0.0011248687515035272


 63%|██████▎   | 25301/40000 [2:00:43<1:09:50,  3.51it/s]

Epoch 25300, Loss: 0.001408211886882782


 64%|██████▎   | 25401/40000 [2:01:11<1:09:18,  3.51it/s]

Epoch 25400, Loss: 0.0014123613946139812


 64%|██████▍   | 25501/40000 [2:01:40<1:09:20,  3.49it/s]

Epoch 25500, Loss: 0.0008618033025413752


 64%|██████▍   | 25601/40000 [2:02:08<1:08:28,  3.51it/s]

Epoch 25600, Loss: 0.0014675103593617678


 64%|██████▍   | 25701/40000 [2:02:37<1:07:54,  3.51it/s]

Epoch 25700, Loss: 0.0012969229137524962


 65%|██████▍   | 25801/40000 [2:03:05<1:07:34,  3.50it/s]

Epoch 25800, Loss: 0.0011701751500368118


 65%|██████▍   | 25901/40000 [2:03:34<1:07:35,  3.48it/s]

Epoch 25900, Loss: 0.0017362774815410376


 65%|██████▌   | 26001/40000 [2:04:02<1:06:32,  3.51it/s]

Epoch 26000, Loss: 0.001623782329261303


 65%|██████▌   | 26101/40000 [2:04:31<1:06:02,  3.51it/s]

Epoch 26100, Loss: 0.0016018599271774292


 66%|██████▌   | 26201/40000 [2:04:59<1:05:34,  3.51it/s]

Epoch 26200, Loss: 0.001683202339336276


 66%|██████▌   | 26301/40000 [2:05:28<1:05:33,  3.48it/s]

Epoch 26300, Loss: 0.001928840298205614


 66%|██████▌   | 26401/40000 [2:05:57<1:04:35,  3.51it/s]

Epoch 26400, Loss: 0.001730951713398099


 66%|██████▋   | 26501/40000 [2:06:25<1:04:23,  3.49it/s]

Epoch 26500, Loss: 0.0014377039624378085


 67%|██████▋   | 26601/40000 [2:06:54<1:03:41,  3.51it/s]

Epoch 26600, Loss: 0.001632786588743329


 67%|██████▋   | 26701/40000 [2:07:22<1:03:28,  3.49it/s]

Epoch 26700, Loss: 0.0012742679100483656


 67%|██████▋   | 26801/40000 [2:07:51<1:02:49,  3.50it/s]

Epoch 26800, Loss: 0.0010870214318856597


 67%|██████▋   | 26901/40000 [2:08:19<1:02:13,  3.51it/s]

Epoch 26900, Loss: 0.0017616280820220709


 68%|██████▊   | 27001/40000 [2:08:48<1:01:46,  3.51it/s]

Epoch 27000, Loss: 0.0012326310388743877


 68%|██████▊   | 27101/40000 [2:09:16<1:01:43,  3.48it/s]

Epoch 27100, Loss: 0.001370725454762578


 68%|██████▊   | 27201/40000 [2:09:45<1:00:51,  3.51it/s]

Epoch 27200, Loss: 0.0015583857893943787


 68%|██████▊   | 27301/40000 [2:10:14<1:00:21,  3.51it/s]

Epoch 27300, Loss: 0.0016452440759167075


 69%|██████▊   | 27401/40000 [2:10:42<59:52,  3.51it/s]  

Epoch 27400, Loss: 0.0016721077263355255


 69%|██████▉   | 27501/40000 [2:11:11<59:28,  3.50it/s]  

Epoch 27500, Loss: 0.001412430196069181


 69%|██████▉   | 27601/40000 [2:11:39<59:10,  3.49it/s]

Epoch 27600, Loss: 0.0010402778862044215


 69%|██████▉   | 27701/40000 [2:12:08<58:22,  3.51it/s]

Epoch 27700, Loss: 0.0010483193909749389


 70%|██████▉   | 27801/40000 [2:12:36<57:58,  3.51it/s]

Epoch 27800, Loss: 0.0013903817161917686


 70%|██████▉   | 27901/40000 [2:13:05<57:29,  3.51it/s]

Epoch 27900, Loss: 0.0012101340107619762


 70%|███████   | 28001/40000 [2:13:34<57:28,  3.48it/s]

Epoch 28000, Loss: 0.001116740982979536


 70%|███████   | 28101/40000 [2:14:02<56:35,  3.50it/s]

Epoch 28100, Loss: 0.001261100172996521


 71%|███████   | 28201/40000 [2:14:31<56:02,  3.51it/s]

Epoch 28200, Loss: 0.0011352503206580877


 71%|███████   | 28301/40000 [2:14:59<55:34,  3.51it/s]

Epoch 28300, Loss: 0.0010653999634087086


 71%|███████   | 28401/40000 [2:15:28<55:32,  3.48it/s]

Epoch 28400, Loss: 0.0016271043568849564


 71%|███████▏  | 28501/40000 [2:15:56<54:37,  3.51it/s]

Epoch 28500, Loss: 0.001850765896961093


 72%|███████▏  | 28601/40000 [2:16:25<54:12,  3.50it/s]

Epoch 28600, Loss: 0.0010478482581675053


 72%|███████▏  | 28701/40000 [2:16:53<53:36,  3.51it/s]

Epoch 28700, Loss: 0.001244107261300087


 72%|███████▏  | 28801/40000 [2:17:22<53:35,  3.48it/s]

Epoch 28800, Loss: 0.0016508633270859718


 72%|███████▏  | 28901/40000 [2:17:51<52:43,  3.51it/s]

Epoch 28900, Loss: 0.0012840896379202604


 73%|███████▎  | 29001/40000 [2:18:19<52:12,  3.51it/s]

Epoch 29000, Loss: 0.0012711008312180638


 73%|███████▎  | 29101/40000 [2:18:48<51:46,  3.51it/s]

Epoch 29100, Loss: 0.001387502532452345


 73%|███████▎  | 29201/40000 [2:19:16<51:42,  3.48it/s]

Epoch 29200, Loss: 0.001426898641511798


 73%|███████▎  | 29301/40000 [2:19:45<50:58,  3.50it/s]

Epoch 29300, Loss: 0.0014837052440270782


 74%|███████▎  | 29401/40000 [2:20:13<50:22,  3.51it/s]

Epoch 29400, Loss: 0.0015714067267253995


 74%|███████▍  | 29501/40000 [2:20:42<49:55,  3.51it/s]

Epoch 29500, Loss: 0.0012673735618591309


 74%|███████▍  | 29601/40000 [2:21:10<49:28,  3.50it/s]

Epoch 29600, Loss: 0.0011882976396009326


 74%|███████▍  | 29701/40000 [2:21:39<49:15,  3.48it/s]

Epoch 29700, Loss: 0.0016979804495349526


 75%|███████▍  | 29801/40000 [2:22:08<48:26,  3.51it/s]

Epoch 29800, Loss: 0.0011292092967778444


 75%|███████▍  | 29901/40000 [2:22:36<47:59,  3.51it/s]

Epoch 29900, Loss: 0.001442889217287302


 75%|███████▌  | 30001/40000 [2:23:05<47:34,  3.50it/s]

Epoch 30000, Loss: 0.0012223133817315102


 75%|███████▌  | 30101/40000 [2:23:33<47:23,  3.48it/s]

Epoch 30100, Loss: 0.0017657033167779446


 76%|███████▌  | 30201/40000 [2:24:02<46:35,  3.51it/s]

Epoch 30200, Loss: 0.0015146845253184438


 76%|███████▌  | 30301/40000 [2:24:30<46:03,  3.51it/s]

Epoch 30300, Loss: 0.0020975719671696424


 76%|███████▌  | 30401/40000 [2:24:59<45:36,  3.51it/s]

Epoch 30400, Loss: 0.0014402521774172783


 76%|███████▋  | 30501/40000 [2:25:28<45:32,  3.48it/s]

Epoch 30500, Loss: 0.001585159683600068


 77%|███████▋  | 30601/40000 [2:25:56<44:41,  3.51it/s]

Epoch 30600, Loss: 0.0011656561400741339


 77%|███████▋  | 30701/40000 [2:26:25<44:13,  3.50it/s]

Epoch 30700, Loss: 0.0009681650553829968


 77%|███████▋  | 30801/40000 [2:26:53<43:43,  3.51it/s]

Epoch 30800, Loss: 0.001540267956443131


 77%|███████▋  | 30901/40000 [2:27:22<43:31,  3.48it/s]

Epoch 30900, Loss: 0.0014191002119332552


 78%|███████▊  | 31001/40000 [2:27:50<42:49,  3.50it/s]

Epoch 31000, Loss: 0.0012698534410446882


 78%|███████▊  | 31101/40000 [2:28:19<42:17,  3.51it/s]

Epoch 31100, Loss: 0.0013097121845930815


 78%|███████▊  | 31201/40000 [2:28:47<41:48,  3.51it/s]

Epoch 31200, Loss: 0.0018194399308413267


 78%|███████▊  | 31301/40000 [2:29:16<41:42,  3.48it/s]

Epoch 31300, Loss: 0.0012336665531620383


 79%|███████▊  | 31401/40000 [2:29:45<40:53,  3.50it/s]

Epoch 31400, Loss: 0.001629121950827539


 79%|███████▉  | 31501/40000 [2:30:13<40:22,  3.51it/s]

Epoch 31500, Loss: 0.0012562323827296495


 79%|███████▉  | 31601/40000 [2:30:42<39:53,  3.51it/s]

Epoch 31600, Loss: 0.0012147699017077684


 79%|███████▉  | 31701/40000 [2:31:10<39:26,  3.51it/s]

Epoch 31700, Loss: 0.000841253437101841


 80%|███████▉  | 31801/40000 [2:31:39<39:13,  3.48it/s]

Epoch 31800, Loss: 0.001432770979590714


 80%|███████▉  | 31901/40000 [2:32:07<38:29,  3.51it/s]

Epoch 31900, Loss: 0.0008838865323923528


 80%|████████  | 32001/40000 [2:32:36<38:02,  3.50it/s]

Epoch 32000, Loss: 0.0015121845062822104


 80%|████████  | 32101/40000 [2:33:04<37:33,  3.50it/s]

Epoch 32100, Loss: 0.001114609302021563


 81%|████████  | 32201/40000 [2:33:33<37:17,  3.49it/s]

Epoch 32200, Loss: 0.0014728684909641743


 81%|████████  | 32301/40000 [2:34:02<36:35,  3.51it/s]

Epoch 32300, Loss: 0.0010139744263142347


 81%|████████  | 32401/40000 [2:34:30<36:05,  3.51it/s]

Epoch 32400, Loss: 0.0013830821262672544


 81%|████████▏ | 32501/40000 [2:34:59<35:40,  3.50it/s]

Epoch 32500, Loss: 0.0014542283024638891


 82%|████████▏ | 32601/40000 [2:35:27<35:21,  3.49it/s]

Epoch 32600, Loss: 0.001602798467501998


 82%|████████▏ | 32701/40000 [2:35:56<34:39,  3.51it/s]

Epoch 32700, Loss: 0.0007875065202824771


 82%|████████▏ | 32801/40000 [2:36:24<34:11,  3.51it/s]

Epoch 32800, Loss: 0.0012268469436094165


 82%|████████▏ | 32901/40000 [2:36:53<33:46,  3.50it/s]

Epoch 32900, Loss: 0.0018822369165718555


 83%|████████▎ | 33001/40000 [2:37:22<33:31,  3.48it/s]

Epoch 33000, Loss: 0.0015613995492458344


 83%|████████▎ | 33101/40000 [2:37:50<32:46,  3.51it/s]

Epoch 33100, Loss: 0.0012232912704348564


 83%|████████▎ | 33201/40000 [2:38:19<32:19,  3.51it/s]

Epoch 33200, Loss: 0.0012543258490040898


 83%|████████▎ | 33301/40000 [2:38:47<31:51,  3.50it/s]

Epoch 33300, Loss: 0.0009532049298286438


 84%|████████▎ | 33401/40000 [2:39:16<31:33,  3.49it/s]

Epoch 33400, Loss: 0.0010908859549090266


 84%|████████▍ | 33501/40000 [2:39:44<30:54,  3.50it/s]

Epoch 33500, Loss: 0.0009599651675671339


 84%|████████▍ | 33601/40000 [2:40:13<30:25,  3.51it/s]

Epoch 33600, Loss: 0.001998503925278783


 84%|████████▍ | 33701/40000 [2:40:41<29:56,  3.51it/s]

Epoch 33700, Loss: 0.0016839811578392982


 85%|████████▍ | 33801/40000 [2:41:10<29:29,  3.50it/s]

Epoch 33800, Loss: 0.001217832788825035


 85%|████████▍ | 33901/40000 [2:41:39<29:08,  3.49it/s]

Epoch 33900, Loss: 0.0013394580455496907


 85%|████████▌ | 34001/40000 [2:42:07<28:30,  3.51it/s]

Epoch 34000, Loss: 0.0009354219073429704


 85%|████████▌ | 34101/40000 [2:42:36<28:03,  3.50it/s]

Epoch 34100, Loss: 0.0011846839915961027


 86%|████████▌ | 34201/40000 [2:43:04<27:30,  3.51it/s]

Epoch 34200, Loss: 0.0016106530092656612


 86%|████████▌ | 34301/40000 [2:43:33<27:14,  3.49it/s]

Epoch 34300, Loss: 0.0011399290524423122


 86%|████████▌ | 34401/40000 [2:44:01<26:37,  3.50it/s]

Epoch 34400, Loss: 0.0017662073951214552


 86%|████████▋ | 34501/40000 [2:44:30<26:06,  3.51it/s]

Epoch 34500, Loss: 0.0011703789932653308


 87%|████████▋ | 34601/40000 [2:44:58<25:39,  3.51it/s]

Epoch 34600, Loss: 0.002106302883476019


 87%|████████▋ | 34701/40000 [2:45:27<25:21,  3.48it/s]

Epoch 34700, Loss: 0.0015594724100083113


 87%|████████▋ | 34801/40000 [2:45:56<24:44,  3.50it/s]

Epoch 34800, Loss: 0.0011049921158701181


 87%|████████▋ | 34901/40000 [2:46:24<24:13,  3.51it/s]

Epoch 34900, Loss: 0.0014111188938841224


 88%|████████▊ | 35001/40000 [2:46:53<23:44,  3.51it/s]

Epoch 35000, Loss: 0.001623394200578332


 88%|████████▊ | 35101/40000 [2:47:21<23:27,  3.48it/s]

Epoch 35100, Loss: 0.0014344388619065285


 88%|████████▊ | 35201/40000 [2:47:50<22:47,  3.51it/s]

Epoch 35200, Loss: 0.0012430158676579595


 88%|████████▊ | 35301/40000 [2:48:18<22:19,  3.51it/s]

Epoch 35300, Loss: 0.0014981987187638879


 89%|████████▊ | 35401/40000 [2:48:47<21:51,  3.51it/s]

Epoch 35400, Loss: 0.0011741109192371368


 89%|████████▉ | 35501/40000 [2:49:15<21:31,  3.48it/s]

Epoch 35500, Loss: 0.0016239164397120476


 89%|████████▉ | 35601/40000 [2:49:44<20:56,  3.50it/s]

Epoch 35600, Loss: 0.001234320574440062


 89%|████████▉ | 35701/40000 [2:50:13<20:27,  3.50it/s]

Epoch 35700, Loss: 0.0019446162041276693


 90%|████████▉ | 35801/40000 [2:50:41<19:58,  3.50it/s]

Epoch 35800, Loss: 0.0011714780703186989


 90%|████████▉ | 35901/40000 [2:51:10<19:28,  3.51it/s]

Epoch 35900, Loss: 0.0010799517622217536


 90%|█████████ | 36001/40000 [2:51:38<19:08,  3.48it/s]

Epoch 36000, Loss: 0.001207502675242722


 90%|█████████ | 36101/40000 [2:52:07<18:31,  3.51it/s]

Epoch 36100, Loss: 0.0010658260434865952


 91%|█████████ | 36201/40000 [2:52:35<18:03,  3.51it/s]

Epoch 36200, Loss: 0.0007942215306684375


 91%|█████████ | 36301/40000 [2:53:04<17:33,  3.51it/s]

Epoch 36300, Loss: 0.001412203535437584


 91%|█████████ | 36401/40000 [2:53:33<17:11,  3.49it/s]

Epoch 36400, Loss: 0.001108946860767901


 91%|█████████▏| 36501/40000 [2:54:01<16:37,  3.51it/s]

Epoch 36500, Loss: 0.001485869288444519


 92%|█████████▏| 36601/40000 [2:54:30<16:09,  3.50it/s]

Epoch 36600, Loss: 0.0011716519948095083


 92%|█████████▏| 36701/40000 [2:54:58<15:41,  3.50it/s]

Epoch 36700, Loss: 0.0013166784774512053


 92%|█████████▏| 36801/40000 [2:55:27<15:17,  3.49it/s]

Epoch 36800, Loss: 0.0019288333132863045


 92%|█████████▏| 36901/40000 [2:55:55<14:42,  3.51it/s]

Epoch 36900, Loss: 0.001166755799204111


 93%|█████████▎| 37001/40000 [2:56:24<14:15,  3.51it/s]

Epoch 37000, Loss: 0.0017690564272925258


 93%|█████████▎| 37101/40000 [2:56:52<13:46,  3.51it/s]

Epoch 37100, Loss: 0.001145312562584877


 93%|█████████▎| 37201/40000 [2:57:21<13:23,  3.48it/s]

Epoch 37200, Loss: 0.0011369786225259304


 93%|█████████▎| 37301/40000 [2:57:50<12:50,  3.50it/s]

Epoch 37300, Loss: 0.0019752911757677794


 94%|█████████▎| 37401/40000 [2:58:18<12:20,  3.51it/s]

Epoch 37400, Loss: 0.0012415824458003044


 94%|█████████▍| 37501/40000 [2:58:47<11:53,  3.50it/s]

Epoch 37500, Loss: 0.0014331159181892872


 94%|█████████▍| 37601/40000 [2:59:15<11:27,  3.49it/s]

Epoch 37600, Loss: 0.001511879963800311


 94%|█████████▍| 37701/40000 [2:59:44<10:57,  3.50it/s]

Epoch 37700, Loss: 0.0010212261695414782


 95%|█████████▍| 37801/40000 [3:00:12<10:26,  3.51it/s]

Epoch 37800, Loss: 0.0011308606481179595


 95%|█████████▍| 37901/40000 [3:00:41<09:58,  3.51it/s]

Epoch 37900, Loss: 0.0012273875763639808


 95%|█████████▌| 38001/40000 [3:01:09<09:31,  3.50it/s]

Epoch 38000, Loss: 0.0015081694582477212


 95%|█████████▌| 38101/40000 [3:01:38<09:06,  3.48it/s]

Epoch 38100, Loss: 0.0015953618567436934


 96%|█████████▌| 38201/40000 [3:02:07<08:32,  3.51it/s]

Epoch 38200, Loss: 0.0012924851616844535


 96%|█████████▌| 38301/40000 [3:02:35<08:04,  3.51it/s]

Epoch 38300, Loss: 0.0015629904810339212


 96%|█████████▌| 38401/40000 [3:03:04<07:36,  3.50it/s]

Epoch 38400, Loss: 0.0011895903153344989


 96%|█████████▋| 38501/40000 [3:03:32<07:10,  3.48it/s]

Epoch 38500, Loss: 0.0012623353395611048


 97%|█████████▋| 38601/40000 [3:04:01<06:39,  3.51it/s]

Epoch 38600, Loss: 0.0010959552600979805


 97%|█████████▋| 38701/40000 [3:04:29<06:10,  3.51it/s]

Epoch 38700, Loss: 0.001598544418811798


 97%|█████████▋| 38801/40000 [3:04:58<05:42,  3.50it/s]

Epoch 38800, Loss: 0.0010437096934765577


 97%|█████████▋| 38901/40000 [3:05:26<05:15,  3.48it/s]

Epoch 38900, Loss: 0.0016826551873236895


 98%|█████████▊| 39001/40000 [3:05:55<04:44,  3.51it/s]

Epoch 39000, Loss: 0.0009506857022643089


 98%|█████████▊| 39101/40000 [3:06:24<04:16,  3.51it/s]

Epoch 39100, Loss: 0.0013219350948929787


 98%|█████████▊| 39201/40000 [3:06:52<03:48,  3.50it/s]

Epoch 39200, Loss: 0.0013874988071620464


 98%|█████████▊| 39301/40000 [3:07:21<03:20,  3.49it/s]

Epoch 39300, Loss: 0.0010918080806732178


 99%|█████████▊| 39401/40000 [3:07:49<02:50,  3.51it/s]

Epoch 39400, Loss: 0.0010769926011562347


 99%|█████████▉| 39501/40000 [3:08:18<02:22,  3.51it/s]

Epoch 39500, Loss: 0.0020923875272274017


 99%|█████████▉| 39601/40000 [3:08:46<01:53,  3.51it/s]

Epoch 39600, Loss: 0.0019639371894299984


 99%|█████████▉| 39701/40000 [3:09:15<01:25,  3.49it/s]

Epoch 39700, Loss: 0.0012052500387653708


100%|█████████▉| 39801/40000 [3:09:44<00:56,  3.49it/s]

Epoch 39800, Loss: 0.0009109395905397832


100%|█████████▉| 39901/40000 [3:10:12<00:28,  3.50it/s]

Epoch 39900, Loss: 0.001198838814161718


100%|██████████| 40000/40000 [3:10:40<00:00,  3.50it/s]
