<a href="https://colab.research.google.com/github/bhaweshiitk/6.869_project/blob/main/dropout_model_6_869_project_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
use_gdrive = False

In [2]:
import os
from tqdm import tqdm

In [3]:
! mkdir ~/.kaggle

In [4]:
! cd ~/.kaggle

In [5]:
# First download Kaggle.json from kaggle containing your credentials.
! cp kaggle.json ~/.kaggle/

In [6]:
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d kmader/rsna-bone-age

Downloading rsna-bone-age.zip to /content
100% 9.28G/9.29G [02:52<00:00, 58.6MB/s]
100% 9.29G/9.29G [02:52<00:00, 57.7MB/s]


In [7]:
!mkdir /content/data
!unzip /content/rsna-bone-age.zip -d /content/data

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4264.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4265.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4266.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4268.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4269.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4270.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4271.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4272.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4273.png  
  inflating: /content/data/boneage-training-dataset/boneage-training-dataset/4275.png  
  inflating: /content/data/boneage-training-dataset/bon

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
# You might not have tqdm, which gives you nice progress bars
!pip install tqdm
from tqdm.notebook import tqdm
import os
import copy
import pandas as pd
import PIL 
from google.colab import files  
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using the GPU!")
else:
    print("WARNING: Could not find GPU! Using CPU only")
    print("You may want to try to use the GPU in Google Colab by clicking in:")
    print("Runtime > Change Runtime type > Hardware accelerator > GPU.")

Using the GPU!


In [9]:
class ImageModel(torch.nn.Module):

  def __init__(self, pretrained=True):
    super(ImageModel, self).__init__()
    model_ft = models.resnet18(pretrained=pretrained)
    num_ftrs = model_ft.fc.in_features
    # model_ft.fc = nn.Sequential(nn.Linear(512, 1))
    model_ft.fc = nn.Sequential(nn.Linear(512, 256), nn.Dropout(p=0.2), # Change dropout here
                             nn.ReLU(), nn.Linear(256, 64), nn.Dropout(p=0.2),
                             nn.ReLU(), nn.Linear(64, 1))
    self.model = model_ft # Vggnet, MSE, Pinball loss
    
    # self.input_size = (224, 224)

  def forward(self, x):
    return self.model(x)

In [10]:
from torchvision.transforms.functional import to_grayscale

def get_image_transforms():
    # How to transform the image when you are loading them.
    # you'll likely want to mess with the transforms on the training set.
    
    # we convert the image to a [C,H,W] tensor, then normalize it to values with a given mean/stdev. These normalization constants
    # are derived from aggregating lots of data and happen to produce better results.
    transform = transforms.Compose([
            transforms.Resize(image_size, interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.Grayscale(num_output_channels=3),
            transforms.RandomRotation(degrees=(-10, 10), expand=False),
            transforms.RandomAffine(degrees=(-10, 10), scale=(0.8,1.2)), 
	          transforms.RandomHorizontalFlip(p=0.15),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.1871, 0.1871, 0.1871],std=[0.1401, 0.1401, 0.1401])
            
        ])
    return transform

# import glob
from PIL import Image

class ImageDataset(torch.utils.data.Dataset):

  def __init__(self, df, image_dir):
    self.image_paths = []
    self.bone_ages = []
    self.image_ids = []
    for image_id, boneage in zip(df['id'].values, df['boneage'].values):
      self.image_paths.append(f"{image_dir}/{image_id}.png")
      self.bone_ages.append(boneage)
      self.image_ids.append(image_id)
    self.transform = get_image_transforms()
  
  def __len__(self):
    return len(self.image_paths)
  
  def __getitem__(self, idx):
    filepath = self.image_paths[idx]
    boneage = self.bone_ages[idx]
    image_id = self.image_ids[idx]
    image = Image.open(filepath)
    image = self.transform(image)

    return {'image': image, 'age': boneage, 'id':image_id}

In [11]:
def make_optimizer(model, learning_rate, print_parameters=False):
    # Get all the parameters
    params_to_update = list(model.parameters())
    if print_parameters:
      print("Params to learn:")
      for name, param in model.named_parameters():
          if param.requires_grad == True:
              print("\t",name)

 
    optimizer = optim.SGD(params_to_update, lr=learning_rate, momentum = 0.9)
    return optimizer

def get_loss():
    # Create an instance of the loss function
    # criterion = nn.CrossEntropyLoss()
    criterion = nn.functional.mse_loss
    return criterion

In [12]:

def train_model(model, dataloaders, criterion, optimizer, save_dir = None, save_all_epochs=False, num_epochs=25):
    '''
    model: The NN to train
    dataloaders: A dictionary containing at least the keys 
                 'train','val' that maps to Pytorch data loaders for the dataset
    criterion: The Loss function
    optimizer: The algorithm to update weights 
               (Variations on gradient descent)
    num_epochs: How many epochs to train for
    save_dir: Where to save the best model weights that are found, 
              as they are found. Will save to save_dir/weights_best.pt
              Using None will not write anything to disk
    save_all_epochs: Whether to save weights for ALL epochs, not just the best
                     validation error epoch. Will save to save_dir/weights_e{#}.pt
    '''
    since = time.time()

    val_acc_history = []
    train_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.inf

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            # running_corrects = 0

            # Iterate over data.
            # TQDM has nice progress bars
            for batch in tqdm(dataloaders[phase]):
                image = batch['image'].to(device)
                age = batch['age'].to(device)
                # inputs = inputs.to(device)
                # labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(image)
                    loss = criterion(outputs.squeeze(), age.to(torch.float32), reduction='mean')
                    print(loss)
                    # torch.max outputs the maximum value, and its index
                    # Since the input is batched, we take the max along axis 1
                    # (the meaningful outputs)
                    # _, preds = torch.max(outputs, 1)

                    # backprop + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * image.size(0)
                # running_corrects += torch.sum(preds == age.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            # epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            

            print('{} Loss: {:.4f}'.format(phase, epoch_loss))

            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'train':
                train_acc_history.append(epoch_loss)
            if phase == 'val':
                val_acc_history.append(epoch_loss)
            if save_all_epochs:
                torch.save(model.state_dict(), os.path.join(save_dir, f'weights_{epoch}.pt'))

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val loss: {:4f}'.format(best_loss))

    # save and load best model weights
    torch.save(best_model_wts, os.path.join(save_dir, 'weights_best_val_acc.pt'))
    torch.save(model.state_dict(), os.path.join(save_dir, 'weights_last.pt'.format(epoch)))
    model.load_state_dict(best_model_wts)
    return model, val_acc_history, train_acc_history

In [13]:
from pathlib import Path
data_dir = "./data"
models_dir = data_dir + '/models'
Path(models_dir).mkdir(parents=True, exist_ok=True)
save_dir = models_dir + '/trained_model_1'
os.makedirs(save_dir, exist_ok=True)

In [14]:
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet]
# You can add your own, or modify these however you wish!
# model_name = "vgg"###Your own###

# Number of classes in the dataset, normal, benign, malignant
# num_classes = 3

# Batch size for training (change depending on how much memory you have)
batch_size = 32

# Shuffle the input data?
shuffle_datasets = True

# Number of epochs to train for 
num_epochs = 60 ###Your own###

# Learning rate
learning_rate = 0.00005 ###Your own###

### IO
# Path to a model file to use to start weights at
resume_from = None

# Save all epochs so that you can select the model from a particular epoch
save_all_epochs = False

# Whether to use early stopping (load the model with best accuracy), or not
early_stopping = True

# Dataset directory
image_dir = "/content/data/boneage-training-dataset/boneage-training-dataset"
dataframe_path = "/content/data/boneage-training-dataset.csv"

# Image size
image_size = (224, 224)

# # Directory to save weights to
save_dir = models_dir + '/trained_model_1'
os.makedirs(save_dir, exist_ok=True)


In [15]:
import pandas as pd


df = pd.read_csv(dataframe_path)
df = df.sample(frac=1)
train_df = df.iloc[:6000]
val_df = df.iloc[6000:7000]
test_df = df.iloc[7000:]


In [None]:
# ids_train = list(train_df.id)
# ids_test = list(test_df.id)
# ids_val = list(val_df.id)
# len(ids_train), len(ids_test), len(ids_val)

In [None]:
# import pickle
# # with open('ids_train.pkl', 'wb') as f:
# #   pickle.dump([ids_train, ids_test, ids_val], f)
# with open('ids_train.pkl', 'rb') as f:
#   id_list = pickle.load(f)
# train_df = df[df.id.isin(id_list[0])]
# test_df = df[df.id.isin(id_list[1])]
# val_df = df[df.id.isin(id_list[2])]

In [16]:
train_dataset = ImageDataset(train_df, image_dir)
val_dataset = ImageDataset(val_df, image_dir)
test_dataset = ImageDataset(test_df, image_dir)
len(train_dataset), len(test_dataset), len(val_dataset)

(6000, 5611, 1000)

In [17]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
dataloader = {"train": train_dataloader, "val": val_dataloader, "test": test_dataloader}

In [18]:
# count = 0
# for batch in dataloaders['boneage-test-dataset']:
#   # print(batch)
#   count += batch[0].shape[0]

In [19]:
model = ImageModel(pretrained=False)
model.to(device)

ImageModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_ru

In [None]:
# model_1, input_size = initialize_model(model_name = model_name, num_classes = num_classes, resume_from=resume_from, use_pretrained=pretrained)
# dataloaders = get_dataloaders(mias_dataset_dir, input_size, batch_size, shuffle_datasets)
criterion = get_loss()

# Move the model to the gpu if needed
# model_1 = model.to(device)

optimizer_1 = make_optimizer(model, learning_rate=learning_rate)

# Train the model!
trained_model_1, validation_history_1, train_history_1 = train_model(model=model, 
                                                                     dataloaders=dataloader, 
                                                                     criterion=criterion, 
                                                                     optimizer=optimizer_1,
                                                                     save_dir=save_dir, 
                                                                     save_all_epochs=save_all_epochs, 
                                                                     num_epochs=num_epochs)
del model, optimizer_1, trained_model_1

Epoch 1/20
----------


  0%|          | 0/188 [00:00<?, ?it/s]

tensor(485.7150, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(590.4205, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(601.6787, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(803.6199, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(357.8937, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(659.9370, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(672.0325, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(779.6741, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(671.9200, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(637.1882, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(772.5233, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(403.1878, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(943.0424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(373.6034, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(523.7856, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(753.2164, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(7

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(2209.2505, device='cuda:0')
tensor(1956.5753, device='cuda:0')
tensor(668.3201, device='cuda:0')
tensor(635.8358, device='cuda:0')
tensor(841.7213, device='cuda:0')
tensor(602.6553, device='cuda:0')
tensor(844.3104, device='cuda:0')
tensor(980.6354, device='cuda:0')
tensor(752.9679, device='cuda:0')
tensor(465.0081, device='cuda:0')
tensor(1078.5156, device='cuda:0')
tensor(862.0917, device='cuda:0')
tensor(929.4397, device='cuda:0')
tensor(1001.3746, device='cuda:0')
tensor(1312.5728, device='cuda:0')
tensor(796.2592, device='cuda:0')
tensor(869.1938, device='cuda:0')
tensor(972.5599, device='cuda:0')
tensor(594.3924, device='cuda:0')
tensor(857.3872, device='cuda:0')
tensor(536.8303, device='cuda:0')
tensor(473.7140, device='cuda:0')
tensor(1038.1497, device='cuda:0')
tensor(592.1258, device='cuda:0')
tensor(1399.1545, device='cuda:0')
tensor(1139.0858, device='cuda:0')
tensor(810.5721, device='cuda:0')
tensor(750.5706, device='cuda:0')
tensor(726.0801, device='cuda:0')
tensor

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(1092.8225, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1108.7358, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(593.7417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(865.3090, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(688.5325, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(431.8972, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(563.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(747.4224, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(873.0513, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(547.0251, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(716.0370, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(588.3993, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(609.9797, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(328.2374, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(784.9824, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(730.5695, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(7622.4014, device='cuda:0')
tensor(3223.3188, device='cuda:0')
tensor(1160.6467, device='cuda:0')
tensor(1485.9077, device='cuda:0')
tensor(1524.8274, device='cuda:0')
tensor(799.7698, device='cuda:0')
tensor(1781.2347, device='cuda:0')
tensor(1777.2513, device='cuda:0')
tensor(1018.0286, device='cuda:0')
tensor(1285.6367, device='cuda:0')
tensor(1844.4482, device='cuda:0')
tensor(1399.8917, device='cuda:0')
tensor(1515.6650, device='cuda:0')
tensor(2117.8164, device='cuda:0')
tensor(2040.8171, device='cuda:0')
tensor(1399.6111, device='cuda:0')
tensor(1625.6210, device='cuda:0')
tensor(2090.9307, device='cuda:0')
tensor(1327.4584, device='cuda:0')
tensor(1517.7445, device='cuda:0')
tensor(1044.8726, device='cuda:0')
tensor(1106.2052, device='cuda:0')
tensor(1920.9557, device='cuda:0')
tensor(1283.2642, device='cuda:0')
tensor(2081.2639, device='cuda:0')
tensor(2113.6260, device='cuda:0')
tensor(1648.7013, device='cuda:0')
tensor(1288.3136, device='cuda:0')
tensor(1522.7072, dev

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(413.2243, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(677.7946, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(611.7733, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(475.5352, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(731.4135, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(694.9066, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(432.1163, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(883.3817, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(852.5846, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(840.8398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(552.4362, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(865.4750, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(776.3097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(840.0082, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(770.3018, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(751.6979, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1668.1906, device='cuda:0')
tensor(784.1608, device='cuda:0')
tensor(648.3524, device='cuda:0')
tensor(819.3823, device='cuda:0')
tensor(610.1532, device='cuda:0')
tensor(929.5074, device='cuda:0')
tensor(538.2639, device='cuda:0')
tensor(439.1296, device='cuda:0')
tensor(465.1387, device='cuda:0')
tensor(378.5724, device='cuda:0')
tensor(452.7823, device='cuda:0')
tensor(552.4460, device='cuda:0')
tensor(422.4191, device='cuda:0')
tensor(478.5702, device='cuda:0')
tensor(352.5464, device='cuda:0')
tensor(354.6708, device='cuda:0')
tensor(519.8527, device='cuda:0')
tensor(482.0282, device='cuda:0')
tensor(515.4612, device='cuda:0')
tensor(324.4727, device='cuda:0')
tensor(402.2593, device='cuda:0')
tensor(336.7810, device='cuda:0')
tensor(416.1153, device='cuda:0')
tensor(371.7334, device='cuda:0')
tensor(661.5190, device='cuda:0')
tensor(429.4545, device='cuda:0')
tensor(408.4918, device='cuda:0')
tensor(617.4304, device='cuda:0')
tensor(695.3835, device='cuda:0')
tensor(471.30

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(545.2899, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(711.5064, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(613.4589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(437.4561, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(794.6086, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(785.1316, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(470.8938, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(950.3425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(789.5467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(389.2733, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(722.2048, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(667.6734, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(603.0209, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(771.7395, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(910.1071, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(469.0659, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(6

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(2646.3787, device='cuda:0')
tensor(1223.7014, device='cuda:0')
tensor(1112.9458, device='cuda:0')
tensor(1270.2223, device='cuda:0')
tensor(812.5169, device='cuda:0')
tensor(1708.3884, device='cuda:0')
tensor(709.9615, device='cuda:0')
tensor(712.1427, device='cuda:0')
tensor(513.8000, device='cuda:0')
tensor(563.9261, device='cuda:0')
tensor(555.3395, device='cuda:0')
tensor(832.9300, device='cuda:0')
tensor(418.9029, device='cuda:0')
tensor(683.6247, device='cuda:0')
tensor(504.0028, device='cuda:0')
tensor(473.0517, device='cuda:0')
tensor(658.2523, device='cuda:0')
tensor(523.2798, device='cuda:0')
tensor(754.3153, device='cuda:0')
tensor(612.7119, device='cuda:0')
tensor(762.8835, device='cuda:0')
tensor(443.5186, device='cuda:0')
tensor(539.9506, device='cuda:0')
tensor(517.4756, device='cuda:0')
tensor(525.7015, device='cuda:0')
tensor(646.9028, device='cuda:0')
tensor(406.4738, device='cuda:0')
tensor(860.5731, device='cuda:0')
tensor(1026.0862, device='cuda:0')
tensor(7

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(751.4079, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(600.9031, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(560.1335, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(521.8080, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(420.3116, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(478.0213, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(818.9036, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(737.2116, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1196.8053, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(688.7274, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(642.9535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(706.2284, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(611.9633, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(684.8873, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(662.0355, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(403.6059, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1470.9501, device='cuda:0')
tensor(514.5150, device='cuda:0')
tensor(553.0597, device='cuda:0')
tensor(699.3420, device='cuda:0')
tensor(529.4133, device='cuda:0')
tensor(811.2139, device='cuda:0')
tensor(464.1868, device='cuda:0')
tensor(477.2085, device='cuda:0')
tensor(259.1265, device='cuda:0')
tensor(373.4510, device='cuda:0')
tensor(686.6162, device='cuda:0')
tensor(621.6097, device='cuda:0')
tensor(398.1490, device='cuda:0')
tensor(393.5947, device='cuda:0')
tensor(387.9916, device='cuda:0')
tensor(307.7677, device='cuda:0')
tensor(472.4731, device='cuda:0')
tensor(607.1508, device='cuda:0')
tensor(324.4927, device='cuda:0')
tensor(422.3179, device='cuda:0')
tensor(516.6617, device='cuda:0')
tensor(294.4986, device='cuda:0')
tensor(415.1376, device='cuda:0')
tensor(379.8698, device='cuda:0')
tensor(634.7877, device='cuda:0')
tensor(512.0881, device='cuda:0')
tensor(371.1782, device='cuda:0')
tensor(467.3345, device='cuda:0')
tensor(529.4745, device='cuda:0')
tensor(554.59

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(751.5050, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(680.0162, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(728.2444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(393.9962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(553.2882, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(881.3723, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(763.1702, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(974.8693, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(390.9706, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(625.8666, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(467.8118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.5900, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(656.6282, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(465.1991, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(375.1377, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(471.3228, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(801.9044, device='cuda:0')
tensor(392.2620, device='cuda:0')
tensor(467.8840, device='cuda:0')
tensor(572.2905, device='cuda:0')
tensor(334.5321, device='cuda:0')
tensor(607.2800, device='cuda:0')
tensor(278.4655, device='cuda:0')
tensor(451.6837, device='cuda:0')
tensor(221.4886, device='cuda:0')
tensor(275.5912, device='cuda:0')
tensor(472.7562, device='cuda:0')
tensor(436.8513, device='cuda:0')
tensor(310.7085, device='cuda:0')
tensor(366.6690, device='cuda:0')
tensor(481.5143, device='cuda:0')
tensor(272.0594, device='cuda:0')
tensor(464.1030, device='cuda:0')
tensor(472.1583, device='cuda:0')
tensor(298.3282, device='cuda:0')
tensor(388.8302, device='cuda:0')
tensor(264.8753, device='cuda:0')
tensor(274.8329, device='cuda:0')
tensor(371.0855, device='cuda:0')
tensor(207.8875, device='cuda:0')
tensor(613.7177, device='cuda:0')
tensor(449.2830, device='cuda:0')
tensor(287.0883, device='cuda:0')
tensor(342.8262, device='cuda:0')
tensor(460.3700, device='cuda:0')
tensor(338.782

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(830.8978, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.7073, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(327.1681, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(735.7283, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(605.9496, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(601.4604, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(536.3490, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(345.5708, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(688.4233, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(712.5430, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(511.8760, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(645.9078, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(608.3271, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(664.7707, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(802.4910, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.7516, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(7

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(3579.9929, device='cuda:0')
tensor(766.2780, device='cuda:0')
tensor(751.9341, device='cuda:0')
tensor(1178.1323, device='cuda:0')
tensor(1114.8251, device='cuda:0')
tensor(1483.2407, device='cuda:0')
tensor(1120.2953, device='cuda:0')
tensor(1272.1932, device='cuda:0')
tensor(1723.1301, device='cuda:0')
tensor(1299.1881, device='cuda:0')
tensor(1365.7275, device='cuda:0')
tensor(1432.3994, device='cuda:0')
tensor(1129.2760, device='cuda:0')
tensor(1618.1105, device='cuda:0')
tensor(1094.2106, device='cuda:0')
tensor(1232.4780, device='cuda:0')
tensor(1189.6758, device='cuda:0')
tensor(1237.0461, device='cuda:0')
tensor(1677.5162, device='cuda:0')
tensor(1037.7324, device='cuda:0')
tensor(1309.3303, device='cuda:0')
tensor(1439.2869, device='cuda:0')
tensor(1216.1437, device='cuda:0')
tensor(1330.9614, device='cuda:0')
tensor(1333.2151, device='cuda:0')
tensor(1249.3788, device='cuda:0')
tensor(1176.3564, device='cuda:0')
tensor(1735.9348, device='cuda:0')
tensor(1931.6847, devi

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(557.6080, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(975.8004, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(469.2052, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1116.8715, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(510.5941, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(303.6421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(466.6161, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(743.1861, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(811.6287, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(561.5742, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(643.1849, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(388.6241, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(764.6502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(479.8445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(346.5379, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(306.9552, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1130.9282, device='cuda:0')
tensor(442.2305, device='cuda:0')
tensor(431.0505, device='cuda:0')
tensor(486.4566, device='cuda:0')
tensor(384.8137, device='cuda:0')
tensor(391.2020, device='cuda:0')
tensor(341.8858, device='cuda:0')
tensor(516.0870, device='cuda:0')
tensor(297.3530, device='cuda:0')
tensor(333.8061, device='cuda:0')
tensor(496.8480, device='cuda:0')
tensor(415.5269, device='cuda:0')
tensor(339.3787, device='cuda:0')
tensor(327.1478, device='cuda:0')
tensor(439.6043, device='cuda:0')
tensor(246.4051, device='cuda:0')
tensor(509.2045, device='cuda:0')
tensor(385.5716, device='cuda:0')
tensor(244.9749, device='cuda:0')
tensor(323.4827, device='cuda:0')
tensor(265.1730, device='cuda:0')
tensor(231.8722, device='cuda:0')
tensor(448.1825, device='cuda:0')
tensor(287.3116, device='cuda:0')
tensor(576.1297, device='cuda:0')
tensor(524.3439, device='cuda:0')
tensor(308.5778, device='cuda:0')
tensor(449.6543, device='cuda:0')
tensor(492.5474, device='cuda:0')
tensor(383.32

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(556.3306, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(881.9718, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(516.3396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(544.7797, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(450.3319, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(473.8141, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.6637, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(376.6282, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(570.4115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(435.0957, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(584.6447, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(749.3208, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(758.5656, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(449.1829, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(586.3069, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(898.3699, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1040.6660, device='cuda:0')
tensor(427.5147, device='cuda:0')
tensor(613.6866, device='cuda:0')
tensor(584.4913, device='cuda:0')
tensor(513.3408, device='cuda:0')
tensor(550.8066, device='cuda:0')
tensor(393.5978, device='cuda:0')
tensor(678.1610, device='cuda:0')
tensor(208.0377, device='cuda:0')
tensor(210.8614, device='cuda:0')
tensor(583.7856, device='cuda:0')
tensor(454.5326, device='cuda:0')
tensor(343.8773, device='cuda:0')
tensor(362.9073, device='cuda:0')
tensor(637.4550, device='cuda:0')
tensor(359.4958, device='cuda:0')
tensor(499.7792, device='cuda:0')
tensor(637.7666, device='cuda:0')
tensor(297.1042, device='cuda:0')
tensor(391.9011, device='cuda:0')
tensor(380.5034, device='cuda:0')
tensor(172.4595, device='cuda:0')
tensor(532.2578, device='cuda:0')
tensor(223.9754, device='cuda:0')
tensor(653.2048, device='cuda:0')
tensor(466.2027, device='cuda:0')
tensor(434.9046, device='cuda:0')
tensor(372.4800, device='cuda:0')
tensor(292.6947, device='cuda:0')
tensor(473.12

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(588.9507, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(374.9672, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(788.8534, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(689.2949, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(813.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(517.9724, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(365.4881, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(602.5147, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(572.6038, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.1660, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(518.6703, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(577.7089, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(315.7047, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(664.0935, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(764.6946, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1141.7759, device='cuda:0')
tensor(851.5790, device='cuda:0')
tensor(888.4897, device='cuda:0')
tensor(863.4982, device='cuda:0')
tensor(547.3110, device='cuda:0')
tensor(318.4387, device='cuda:0')
tensor(650.3050, device='cuda:0')
tensor(745.0802, device='cuda:0')
tensor(419.0567, device='cuda:0')
tensor(362.0685, device='cuda:0')
tensor(866.0641, device='cuda:0')
tensor(726.6804, device='cuda:0')
tensor(459.5298, device='cuda:0')
tensor(367.3936, device='cuda:0')
tensor(866.4463, device='cuda:0')
tensor(449.8936, device='cuda:0')
tensor(646.8842, device='cuda:0')
tensor(770.4170, device='cuda:0')
tensor(447.4648, device='cuda:0')
tensor(575.9302, device='cuda:0')
tensor(379.6845, device='cuda:0')
tensor(336.7500, device='cuda:0')
tensor(551.5571, device='cuda:0')
tensor(398.8508, device='cuda:0')
tensor(795.3589, device='cuda:0')
tensor(694.5685, device='cuda:0')
tensor(571.3962, device='cuda:0')
tensor(533.2260, device='cuda:0')
tensor(502.9681, device='cuda:0')
tensor(603.63

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(771.3301, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(485.0477, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(261.0678, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(722.1483, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(685.2702, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(974.4596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.4430, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(804.1125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(320.1702, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(643.8400, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(349.9575, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1081.9714, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(211.4191, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(348.0266, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(616.3369, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(469.9611, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(952.7700, device='cuda:0')
tensor(668.6642, device='cuda:0')
tensor(415.1064, device='cuda:0')
tensor(385.5947, device='cuda:0')
tensor(322.5779, device='cuda:0')
tensor(417.8036, device='cuda:0')
tensor(583.3918, device='cuda:0')
tensor(596.6771, device='cuda:0')
tensor(342.1904, device='cuda:0')
tensor(292.0874, device='cuda:0')
tensor(852.1335, device='cuda:0')
tensor(457.2632, device='cuda:0')
tensor(453.1548, device='cuda:0')
tensor(564.7635, device='cuda:0')
tensor(915.9622, device='cuda:0')
tensor(434.8052, device='cuda:0')
tensor(507.0309, device='cuda:0')
tensor(938.2267, device='cuda:0')
tensor(387.4293, device='cuda:0')
tensor(462.2465, device='cuda:0')
tensor(250.7735, device='cuda:0')
tensor(388.5721, device='cuda:0')
tensor(740.3113, device='cuda:0')
tensor(384.4163, device='cuda:0')
tensor(771.7861, device='cuda:0')
tensor(708.5153, device='cuda:0')
tensor(547.9277, device='cuda:0')
tensor(452.7785, device='cuda:0')
tensor(412.6131, device='cuda:0')
tensor(730.236

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(428.5521, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(575.7151, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(846.1846, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(271.2177, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(499.3027, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.7391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(723.1653, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(460.0084, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(634.6071, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(480.0413, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(484.4884, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(482.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(485.6413, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(648.7944, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(496.8633, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(482.0900, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(6

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(2079.9431, device='cuda:0')
tensor(463.8480, device='cuda:0')
tensor(394.1719, device='cuda:0')
tensor(700.0889, device='cuda:0')
tensor(574.3701, device='cuda:0')
tensor(856.5127, device='cuda:0')
tensor(461.9484, device='cuda:0')
tensor(529.0839, device='cuda:0')
tensor(455.2088, device='cuda:0')
tensor(451.7502, device='cuda:0')
tensor(501.7073, device='cuda:0')
tensor(758.4053, device='cuda:0')
tensor(398.4674, device='cuda:0')
tensor(437.9870, device='cuda:0')
tensor(445.6780, device='cuda:0')
tensor(307.7224, device='cuda:0')
tensor(335.8547, device='cuda:0')
tensor(415.0966, device='cuda:0')
tensor(524.8562, device='cuda:0')
tensor(412.4939, device='cuda:0')
tensor(247.1262, device='cuda:0')
tensor(396.5015, device='cuda:0')
tensor(445.3678, device='cuda:0')
tensor(358.3302, device='cuda:0')
tensor(681.2811, device='cuda:0')
tensor(329.6442, device='cuda:0')
tensor(384.4880, device='cuda:0')
tensor(722.4106, device='cuda:0')
tensor(1039.7212, device='cuda:0')
tensor(310.7

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(372.7879, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(597.9687, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(370.4055, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(409.3068, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(523.7100, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(594.8851, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(603.7261, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(788.5103, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(194.6835, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(783.1144, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(300.8557, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(854.4576, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(323.2791, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(670.8651, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(616.8989, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(548.8209, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1693.9784, device='cuda:0')
tensor(1541.5430, device='cuda:0')
tensor(411.0166, device='cuda:0')
tensor(412.8377, device='cuda:0')
tensor(653.7889, device='cuda:0')
tensor(610.3450, device='cuda:0')
tensor(681.6840, device='cuda:0')
tensor(912.6158, device='cuda:0')
tensor(377.9668, device='cuda:0')
tensor(396.0560, device='cuda:0')
tensor(839.2154, device='cuda:0')
tensor(631.1416, device='cuda:0')
tensor(692.3263, device='cuda:0')
tensor(807.3137, device='cuda:0')
tensor(970.5000, device='cuda:0')
tensor(711.9245, device='cuda:0')
tensor(817.2568, device='cuda:0')
tensor(856.7526, device='cuda:0')
tensor(381.5738, device='cuda:0')
tensor(710.1708, device='cuda:0')
tensor(416.2599, device='cuda:0')
tensor(522.4961, device='cuda:0')
tensor(806.2626, device='cuda:0')
tensor(544.4017, device='cuda:0')
tensor(1238.0156, device='cuda:0')
tensor(860.7125, device='cuda:0')
tensor(746.2072, device='cuda:0')
tensor(419.7642, device='cuda:0')
tensor(560.8601, device='cuda:0')
tensor(747.

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(393.5229, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(467.8184, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(468.3943, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(351.1749, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(467.6361, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(389.9116, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(465.3654, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(625.3738, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(322.2873, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(714.3267, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(426.9567, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.4957, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(456.7724, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(357.9052, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(582.7483, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(411.6675, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(6

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1373.5261, device='cuda:0')
tensor(479.3589, device='cuda:0')
tensor(338.3004, device='cuda:0')
tensor(575.4996, device='cuda:0')
tensor(391.2397, device='cuda:0')
tensor(388.2808, device='cuda:0')
tensor(434.1644, device='cuda:0')
tensor(292.4698, device='cuda:0')
tensor(289.3738, device='cuda:0')
tensor(311.0880, device='cuda:0')
tensor(410.4089, device='cuda:0')
tensor(471.3991, device='cuda:0')
tensor(288.9565, device='cuda:0')
tensor(239.0346, device='cuda:0')
tensor(269.0986, device='cuda:0')
tensor(248.9664, device='cuda:0')
tensor(161.5289, device='cuda:0')
tensor(354.7503, device='cuda:0')
tensor(305.6648, device='cuda:0')
tensor(228.5479, device='cuda:0')
tensor(331.6763, device='cuda:0')
tensor(259.7728, device='cuda:0')
tensor(349.2570, device='cuda:0')
tensor(199.1184, device='cuda:0')
tensor(533.8347, device='cuda:0')
tensor(313.8047, device='cuda:0')
tensor(269.2650, device='cuda:0')
tensor(462.6309, device='cuda:0')
tensor(596.4138, device='cuda:0')
tensor(340.40

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(359.2526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(345.8386, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(247.0005, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(584.8239, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(565.8255, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(372.8601, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.6661, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(795.9707, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(573.3419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(649.5082, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(484.6697, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(422.8917, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(429.2143, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(614.1329, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(554.3428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1130.9396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(593.1486, device='cuda:0')
tensor(547.6540, device='cuda:0')
tensor(586.9241, device='cuda:0')
tensor(430.7138, device='cuda:0')
tensor(402.0117, device='cuda:0')
tensor(304.6794, device='cuda:0')
tensor(321.1999, device='cuda:0')
tensor(317.8998, device='cuda:0')
tensor(215.3744, device='cuda:0')
tensor(274.5817, device='cuda:0')
tensor(281.0613, device='cuda:0')
tensor(241.3836, device='cuda:0')
tensor(279.6647, device='cuda:0')
tensor(268.2380, device='cuda:0')
tensor(384.0065, device='cuda:0')
tensor(207.4618, device='cuda:0')
tensor(308.2643, device='cuda:0')
tensor(429.9532, device='cuda:0')
tensor(152.1632, device='cuda:0')
tensor(270.2023, device='cuda:0')
tensor(237.6384, device='cuda:0')
tensor(239.9242, device='cuda:0')
tensor(266.6920, device='cuda:0')
tensor(230.5348, device='cuda:0')
tensor(543.8607, device='cuda:0')
tensor(350.6364, device='cuda:0')
tensor(199.8451, device='cuda:0')
tensor(335.5269, device='cuda:0')
tensor(397.1608, device='cuda:0')
tensor(432.802

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(554.6193, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(424.2797, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(597.1182, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(574.4979, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(634.3856, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(495.1862, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(626.0217, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(494.5822, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(349.9449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(461.9002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(401.5776, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(783.2832, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(511.0701, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(543.3931, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(467.0743, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(377.1812, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(2946.3574, device='cuda:0')
tensor(540.5236, device='cuda:0')
tensor(398.5631, device='cuda:0')
tensor(630.7726, device='cuda:0')
tensor(505.7562, device='cuda:0')
tensor(890.2014, device='cuda:0')
tensor(647.2578, device='cuda:0')
tensor(497.6176, device='cuda:0')
tensor(625.0056, device='cuda:0')
tensor(520.5922, device='cuda:0')
tensor(789.7090, device='cuda:0')
tensor(1198.2983, device='cuda:0')
tensor(464.2877, device='cuda:0')
tensor(576.9922, device='cuda:0')
tensor(631.9988, device='cuda:0')
tensor(464.7750, device='cuda:0')
tensor(476.1512, device='cuda:0')
tensor(611.5568, device='cuda:0')
tensor(769.8626, device='cuda:0')
tensor(656.3402, device='cuda:0')
tensor(489.8975, device='cuda:0')
tensor(742.2633, device='cuda:0')
tensor(574.6431, device='cuda:0')
tensor(602.0527, device='cuda:0')
tensor(908.5244, device='cuda:0')
tensor(424.7654, device='cuda:0')
tensor(437.8798, device='cuda:0')
tensor(984.9477, device='cuda:0')
tensor(1264.0579, device='cuda:0')
tensor(535.

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(532.2509, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(311.0307, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(415.6509, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(387.6821, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(468.7414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(583.9935, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(445.8667, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(692.5863, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(335.5324, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(364.3881, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(472.9764, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.6205, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(390.2920, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(682.6836, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(536.6857, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(463.1003, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(887.7639, device='cuda:0')
tensor(418.5897, device='cuda:0')
tensor(327.3000, device='cuda:0')
tensor(290.4290, device='cuda:0')
tensor(275.0319, device='cuda:0')
tensor(259.7976, device='cuda:0')
tensor(544.7998, device='cuda:0')
tensor(599.9069, device='cuda:0')
tensor(267.8862, device='cuda:0')
tensor(343.3885, device='cuda:0')
tensor(628.1689, device='cuda:0')
tensor(499.0720, device='cuda:0')
tensor(390.4484, device='cuda:0')
tensor(601.0951, device='cuda:0')
tensor(604.4701, device='cuda:0')
tensor(396.5438, device='cuda:0')
tensor(526.0503, device='cuda:0')
tensor(618.5471, device='cuda:0')
tensor(376.3130, device='cuda:0')
tensor(391.3440, device='cuda:0')
tensor(302.9597, device='cuda:0')
tensor(271.2913, device='cuda:0')
tensor(557.4131, device='cuda:0')
tensor(330.7887, device='cuda:0')
tensor(722.6699, device='cuda:0')
tensor(661.8873, device='cuda:0')
tensor(361.2108, device='cuda:0')
tensor(337.3028, device='cuda:0')
tensor(360.7141, device='cuda:0')
tensor(493.283

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(409.5901, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(468.3544, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(478.8525, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(486.3220, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(666.8962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(365.9414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(454.5773, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(692.6019, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(394.3279, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(415.0654, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(546.7535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.1233, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(789.6794, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(411.0772, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(649.3066, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(789.9181, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(7

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(575.1151, device='cuda:0')
tensor(329.9257, device='cuda:0')
tensor(346.4813, device='cuda:0')
tensor(388.2867, device='cuda:0')
tensor(257.5529, device='cuda:0')
tensor(406.0024, device='cuda:0')
tensor(357.5222, device='cuda:0')
tensor(352.2145, device='cuda:0')
tensor(338.4632, device='cuda:0')
tensor(323.8478, device='cuda:0')
tensor(357.6205, device='cuda:0')
tensor(465.0020, device='cuda:0')
tensor(366.4435, device='cuda:0')
tensor(366.6667, device='cuda:0')
tensor(276.8474, device='cuda:0')
tensor(229.2959, device='cuda:0')
tensor(253.7720, device='cuda:0')
tensor(279.6785, device='cuda:0')
tensor(321.4595, device='cuda:0')
tensor(241.7177, device='cuda:0')
tensor(300.2667, device='cuda:0')
tensor(268.7307, device='cuda:0')
tensor(334.7488, device='cuda:0')
tensor(279.0021, device='cuda:0')
tensor(574.8096, device='cuda:0')
tensor(346.5092, device='cuda:0')
tensor(348.6994, device='cuda:0')
tensor(417.9161, device='cuda:0')
tensor(605.5022, device='cuda:0')
tensor(291.286

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(423.3809, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(345.4298, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(443.6070, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(598.7456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(549.7726, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(455.3999, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(404.2120, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(433.0168, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(695.2780, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(511.3846, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(392.5201, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(330.5287, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(400.3895, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(354.0126, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.6962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.3494, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(1239.3401, device='cuda:0')
tensor(510.5858, device='cuda:0')
tensor(735.7507, device='cuda:0')
tensor(782.0267, device='cuda:0')
tensor(575.2018, device='cuda:0')
tensor(874.3915, device='cuda:0')
tensor(321.3372, device='cuda:0')
tensor(368.3624, device='cuda:0')
tensor(185.3423, device='cuda:0')
tensor(326.1389, device='cuda:0')
tensor(475.4730, device='cuda:0')
tensor(554.9232, device='cuda:0')
tensor(258.7485, device='cuda:0')
tensor(283.5959, device='cuda:0')
tensor(274.5439, device='cuda:0')
tensor(333.9351, device='cuda:0')
tensor(391.1228, device='cuda:0')
tensor(363.0359, device='cuda:0')
tensor(313.7503, device='cuda:0')
tensor(235.8362, device='cuda:0')
tensor(423.9695, device='cuda:0')
tensor(305.1800, device='cuda:0')
tensor(405.7095, device='cuda:0')
tensor(269.4347, device='cuda:0')
tensor(480.7361, device='cuda:0')
tensor(440.0251, device='cuda:0')
tensor(198.3487, device='cuda:0')
tensor(434.7961, device='cuda:0')
tensor(581.2252, device='cuda:0')
tensor(313.20

  0%|          | 0/188 [00:00<?, ?it/s]

tensor(715.2408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(305.5605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(433.0699, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(560.7145, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(407.6928, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(624.1744, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(376.1104, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(547.3158, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.3271, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(296.7914, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(329.6934, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(289.7300, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(361.8477, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(605.5015, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(353.5480, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(499.2801, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

  0%|          | 0/32 [00:00<?, ?it/s]

tensor(758.6187, device='cuda:0')
tensor(656.5195, device='cuda:0')
tensor(427.0017, device='cuda:0')
tensor(457.4674, device='cuda:0')
tensor(325.6287, device='cuda:0')
tensor(420.4635, device='cuda:0')
tensor(328.1499, device='cuda:0')
tensor(353.3926, device='cuda:0')
tensor(247.4022, device='cuda:0')
tensor(270.9248, device='cuda:0')
tensor(374.0406, device='cuda:0')
tensor(372.1169, device='cuda:0')
tensor(240.3438, device='cuda:0')
tensor(257.2220, device='cuda:0')
tensor(262.2682, device='cuda:0')
tensor(184.5726, device='cuda:0')
tensor(269.8996, device='cuda:0')
tensor(296.4832, device='cuda:0')
tensor(226.1192, device='cuda:0')
tensor(223.3875, device='cuda:0')
tensor(245.1852, device='cuda:0')
tensor(164.1450, device='cuda:0')
tensor(247.5465, device='cuda:0')
tensor(222.7758, device='cuda:0')
tensor(512.6537, device='cuda:0')
tensor(359.1945, device='cuda:0')
tensor(195.0766, device='cuda:0')
tensor(292.0749, device='cuda:0')
tensor(218.6568, device='cuda:0')
tensor(326.718

In [None]:
# Load your final model, that we will use for the rest of the PSET.
if early_stopping:
  weights_file = save_dir + '/weights_best_val_acc.pt'
else:
  weights_file = save_dir + '/weights_last.pt'
model = ImageModel()
model = model.to(device)
model.load_state_dict(torch.load(weights_file))

<All keys matched successfully>

In [None]:
def evaluate(model, dataloader, device):

    '''
    A function to return the predicted age and true age (for evaluation). 
    
    Args: 
        model (torch.nn.Module): model 
        dataloader (torch.utils.data.Dataloader): DataLoader object for the test data
        device (str): Your device
        
    Returns: 
        (np.array, np.array): true age, predicted age and mean loss
    '''
    pred_list = []
    y_list = []
    mean_loss = []
    with torch.no_grad():
        model.eval()                
        for batch in tqdm(dataloader):
            epoch_loss = []
            image = batch['image'].to(device)
            age = batch['age'].to(device)
            pred = model(image).squeeze()
            mean_loss.append((pred-age).pow(2).mean().cpu())
            pred_list.extend(list(pred.cpu().numpy()))
            y_list.extend(list(age.cpu().numpy()))
            
            # evaluate your model here
    return y_list, pred_list, np.array(mean_loss).mean()

In [None]:
y_list, pred_list, loss = evaluate(model, test_dataloader, device)
print(loss)

  0%|          | 0/176 [00:00<?, ?it/s]

324.86905


In [None]:
# Dropout  = 0.25, test_mse = 324.82, num_epochs = 45
# Dropout = 0.3, test_mse = 364.57, num_epochs = 55

In [None]:
def apply_dropout(m):
    if type(m) == nn.Dropout:
        m.train()

model.eval()

model.apply(apply_dropout)

In [None]:
y_list_list = []
pred_list_list = []
loss_list = []
model.eval()
# generate uncertainty interval for dropout. Change to 200 if you have more time. Takes 5 hours to run
# on colab. Another approach is to run in several iteration and keep saving the result offline and later
# merging the predictions. Need to make sure, every time dataloader is run in same order, else can't 
# merge results. For this, we need to make sure that data from kaggle is divided into train and test and validation
# set in same way everytime we run the notebook. To achieve this, I saved the ids which went into each of the sets
# and split the data always in same way.
for i in tqdm(range(25)):
  y_list, pred_list, loss = evaluate(model, test_dataloader, device)
  y_list_list.append(y_list)
  pred_list_list.append(pred_list)
  loss_list.append(loss)
  print(loss_list[i])

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/176 [00:00<?, ?it/s]

323.1596


  0%|          | 0/176 [00:00<?, ?it/s]

319.846


  0%|          | 0/176 [00:00<?, ?it/s]

322.33154


  0%|          | 0/176 [00:00<?, ?it/s]

322.49188


  0%|          | 0/176 [00:00<?, ?it/s]

321.84024


  0%|          | 0/176 [00:00<?, ?it/s]

322.50818


  0%|          | 0/176 [00:00<?, ?it/s]

328.1156


  0%|          | 0/176 [00:00<?, ?it/s]

328.71115


  0%|          | 0/176 [00:00<?, ?it/s]

324.13034


  0%|          | 0/176 [00:00<?, ?it/s]

326.12558


  0%|          | 0/176 [00:00<?, ?it/s]

318.58948


  0%|          | 0/176 [00:00<?, ?it/s]

324.7638


  0%|          | 0/176 [00:00<?, ?it/s]

330.81625


  0%|          | 0/176 [00:00<?, ?it/s]

322.37997


  0%|          | 0/176 [00:00<?, ?it/s]

320.58704


  0%|          | 0/176 [00:00<?, ?it/s]

325.08524


  0%|          | 0/176 [00:00<?, ?it/s]

324.64685


  0%|          | 0/176 [00:00<?, ?it/s]

316.7707


  0%|          | 0/176 [00:00<?, ?it/s]

322.73825


  0%|          | 0/176 [00:00<?, ?it/s]

322.8622


  0%|          | 0/176 [00:00<?, ?it/s]

322.9717


  0%|          | 0/176 [00:00<?, ?it/s]

324.2327


  0%|          | 0/176 [00:00<?, ?it/s]

320.44446


  0%|          | 0/176 [00:00<?, ?it/s]

319.74423


  0%|          | 0/176 [00:00<?, ?it/s]

321.54666


In [None]:
data_age = []
id_list = []
for data in test_dataloader:
  data_age.extend(list(data['age'].cpu().numpy()))
  id_list.extend(list(data['id'].cpu().numpy()))

In [None]:
import pickle
import numpy as np

In [None]:
with open('dropout_25_25_2.pkl', 'wb') as f:
  pickle.dump({"pred":pred_list_list, "age": y_list_list, "id": id_list}, f)
files.download('dropout_25_25_2.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files

In [None]:
# final_dict["pred"] = np.concatenate((np.array(dict_new_1["pred"]), np.array(dict_new_2["pred"]),
#                 ), axis = 0)
# final_dict["age"] = dict_new_2["age"]
# final_dict["id"] = dict_new_2["id"]
# with open('dropout_25_200.pkl', 'wb') as f:
#   pickle.dump(final_dict, f)
# files.download('dropout_25_200.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Saved datafile of 200 runs on .3 dropout.
with open('dropout_30_200.pkl', 'rb') as f:
   final_dict = pickle.load(f)


In [None]:
def build_quantile(array, quantile_value):
  """
  Builds quantile for an n_vals X n_sample size array for given quantile value (lower_quantile, upper_quantile)
  Returns 2 X n_sample quantiles
  """
  array = np.sort(array, axis = 0)
  length = array.shape[0]
  indices = (round(quantile_value[0]*length)-1, round(quantile_value[1]*length)-1)
  
  return array[indices, :]

def get_coverage(quantile_array, target_array):
  """
  Returns coverage for a quantile array and true value array input
  
  """
  return np.mean(np.logical_and((quantile_array[0, :]<=target_array),(quantile_array[1, :]>=target_array)))

def get_quantile_width(quantile_array):
  return np.mean(quantile_array[1]-quantile_array[0])

def get_qhat(quantile_array, target_array, alpha):
  n = len(target_array)
  arr = np.zeros(quantile_array.shape)
  arr[0, :] = np.array(quantile_array[0, :] - target_array)
  arr[1, :] = np.array(target_array - quantile_array[1, :])                  
  
  arr = arr.max(axis=0)
  return np.quantile(arr, np.ceil((n+1)*(1-alpha))/n)

def conformilize_quantiles(quantile_array, qhat):
  arr = np.zeros(quantile_array.shape)
  arr[0, :] = np.array(quantile_array[0, :] - qhat)
  arr[1, :] = np.array(quantile_array[1, :] + qhat)
  return arr

In [None]:
def generate_conformal_sets(quantile_array, target, alpha,
                            caliberation_size = 500, n_simulation=10000):
  conformal_coverage = []
  quantile_width = []
  naive_coverage = []
  qhat_list = []
  for i in range(n_simulation):
    indices = np.arange(len(target))
    np.random.shuffle(indices)
    target = np.array(target)[indices]
    quantile_array = quantile_array[:, indices]
    caliberation_quantiles = quantile_array[:, 0:caliberation_size]
    test_quantiles = quantile_array[:, caliberation_size:]
    caliberation_target = target[0:caliberation_size]
    test_target = target[caliberation_size:]
    naive_coverage.append(get_coverage(test_quantiles, test_target))
    qhat = get_qhat(caliberation_quantiles, caliberation_target, alpha = alpha)
    qhat_list.append(qhat)
    conformal_quantiles = conformilize_quantiles(test_quantiles, qhat)
    conformal_coverage.append(get_coverage(conformal_quantiles, test_target))
    quantile_width.append(get_quantile_width(conformal_quantiles))
  mean_qhat = round(np.mean(np.abs(np.array(qhat_list))), 3)
  std_qhat = round(np.std(np.abs(np.array(qhat_list))), 3)
  print("Mean and std for qhat for alpha {} : {} and {} resp.".format(alpha, mean_qhat, std_qhat))
  return quantile_width, conformal_coverage, naive_coverage


In [None]:
def plotting_func(quantile_width, naive_coverage, conformal_coverage, bins = 20):
  figure, axis = plt.subplots(1, 3, figsize=(16,4))
 
  axis[0].hist(quantile_width, bins = bins)
  axis[0].set_title("Conformal Quantile Width")
    
  axis[1].hist(naive_coverage, bins = bins)
  axis[1].set_title("Naive Dropout Coverage")
    
  axis[2].hist(conformal_coverage, bins = bins)
  axis[2].set_title("Conformalized Coverage")
  plt.show()
  print("The Quantile width is {} and std is {}".format
      (round(np.array(quantile_width).mean(), 3), round(np.array(quantile_width).std(), 3)))
  print("The Mean Naive coverage is {} with a std of {}".format(
      round(np.array(naive_coverage).mean(), 3), round(np.array(naive_coverage).std(), 3)))
  print("The Mean Conformal coverage is {} with a std of {}".format(
      round(np.array(conformal_coverage).mean(), 3), round(np.array(conformal_coverage).std(), 3)))
  
  print("")

In [None]:
def build_df(final_dict, df, alpha):
  dropout_quantiles = build_quantile(final_dict["pred"], (alpha/2, 1-alpha/2))
  pred_df = pd.DataFrame(data=(dropout_quantiles.transpose()
                              ), columns=["quantiles_lower", "quantiles_upper"])
  pred_df["id"] = final_dict["id"]
  pred_df["age"] = final_dict["age"]
  pred_df = pred_df.merge(df[["id", "male"]], how = "inner", on = ["id"])
  pred_df = pred_df.set_index(["id"])
  return pred_df

In [None]:
def get_conditional_coverage_gender(df, alpha, caliberation_size = 500, n_simulation=100):
  """
  df:Dataframe with columns "age", "male", "qantiles_lower", "quantiles_upper" 
  alpha: error rate
  caliberation_size = 500
  n_simulations
  returns: width of predict set, conformal coverage and naive coverage for (male, female)
  """
  conformal_coverage = []
  quantile_width = []
  naive_coverage = []
  qhat_list = []
  for i in range(n_simulation):
    df = df.sample(frac = 1)
    
    gender_array = np.array(df.male)
    target = np.array(list(df["age"]))

    quantile_array = np.array((df.quantiles_lower, df.quantiles_upper))
    caliberation_quantiles = quantile_array[:, 0:caliberation_size]
    test_quantiles = quantile_array[:, caliberation_size:]
    caliberation_target = target[0:caliberation_size]
    test_target = target[caliberation_size:]
    gender_array = gender_array[caliberation_size:]

    naive_coverage_male = get_coverage(test_quantiles[:, (gender_array==True)],
                                       test_target[(gender_array==True)])
    naive_coverage_female = get_coverage(test_quantiles[:, (gender_array==False)],
                                       test_target[(gender_array==False)])
    naive_coverage.append([naive_coverage_male, naive_coverage_female])
    qhat = get_qhat(caliberation_quantiles, caliberation_target, alpha = alpha)
    qhat_list.append(qhat)
    conformal_quantiles = conformilize_quantiles(test_quantiles, qhat)

    conformal_coverage_male = get_coverage(conformal_quantiles[:, (gender_array==True)],
                                       test_target[(gender_array==True)])
    conformal_coverage_female = get_coverage(conformal_quantiles[:, (gender_array==False)],
                                       test_target[(gender_array==False)])
    conformal_coverage.append([conformal_coverage_male, conformal_coverage_female])

    quantile_width_male = get_quantile_width(conformal_quantiles[:, (gender_array==True)])
                                       
    quantile_width_female = get_quantile_width(conformal_quantiles[:, (gender_array==False)])
    quantile_width.append([quantile_width_male, quantile_width_female])
  mean_qhat = round(np.mean(np.abs(np.array(qhat_list))), 3)
  print("Mean qhat for alpha {} : {} ".format(alpha, mean_qhat))
  return quantile_width, conformal_coverage, naive_coverage
