In [1]:
# CONFIG FILE

RESOURCES_PATH = "/content/drive/MyDrive/Elcap_Resource"
OUTPUT_PATH = "/content/drive/MyDrive/Elcap_Preprocessed"





PADDING_FOR_LOCALIZATION = 10
BLOCK_SIZE = 128
COORDS_CUBE_SIZE = 32
TARGET_SHAPE = (COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, 3, 5)
# TARGET_SHAPE = (COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, 5)
COORDS_SHAPE = (3, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE)
ANCHOR_SIZES = [10, 30, 60]
VAL_PCT = 0.2
TOTAL_EPOCHS = 200
DEFAULT_LR = 0.01

MODEL = "TOP_LEFT"

if(MODEL == "TOP_LEFT"):
  SAVE_DIR = "top_left_models/"
elif(MODEL == "TOP_RIGHT"):
  SAVE_DIR = "top_right_models/"
elif(MODEL == "BOTTOM_LEFT"):
  SAVE_DIR = "bottom_left_models/"
elif(MODEL == "BOTTOM_RIGHT"):
  SAVE_DIR = "bottom_right_models/"


In [3]:
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.autograd import Variable
from torch.utils.data import DataLoader

from ast import literal_eval
import numpy as np
import math
import random
import time
import os
import itertools
import pandas as pd
from glob import glob

In [4]:

class LunaDataSet(Dataset):
    def __init__(self, indices: list, meta_dataframe: pd.DataFrame):
        self.indices = indices
        self.meta_dataframe = meta_dataframe

    def __getitem__(self, idx, split=None):
        meta = self.meta_dataframe.iloc[self.indices[idx]]
        centers = literal_eval(meta['centers'])
        radii = literal_eval(meta['radii'])
        lungs_bounding_box = literal_eval(meta['lungs_bounding_box'])
        clazz = int(meta['class'])
        sub_dir = 'positives' if clazz == 1 else 'negatives'
        file_path = f'''{OUTPUT_PATH}/augmented/{sub_dir}/{meta['seriesuid']}_{meta['sub_index']}.npy'''
        patch = np.load(file_path)
        target = np.zeros(TARGET_SHAPE)
        if clazz == 1:
            for c in range(len(centers)):
                place = []
                point = []
                windows = []
                for ax in range(len(patch.shape)):
                    window = int(BLOCK_SIZE / TARGET_SHAPE[ax])
                    windows.append(window)

                    val_centers = centers[c][ax] // window
                    if val_centers >= COORDS_CUBE_SIZE:
                      val_centers = COORDS_CUBE_SIZE-1

                    place.append(val_centers)
                    point.append(centers[c][ax] % window)

                # if radii[c] <= ANCHOR_SIZES[0] / 2:
                #     place.append(0)
                # elif radii[c] <= ANCHOR_SIZES[1] / 2:
                #     place.append(1)
                # else:
                #     place.append(2)

                place.append(0)
                vector = [1]
                for p in range(len(point)):
                    vector.append(point[p] / windows[p] - 1)
                vector.append(radii[c])
                target[tuple(place)] = vector
        else:
            for c in range(len(centers)):
                point = []
                for ax in range(len(patch.shape)):
                    window = int(BLOCK_SIZE / TARGET_SHAPE[ax])
                    point.append(centers[c][ax] % window)

        out_patch = patch[np.newaxis, ]

        coords = self._get_coords(lungs_bounding_box)
        return out_patch, target, coords

    def __len__(self):
        return len(self.indices)

    @staticmethod
    def _get_coords(bb):
        div_factor = BLOCK_SIZE / COORDS_CUBE_SIZE
        coords = np.ones((3, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE, COORDS_CUBE_SIZE)) * PADDING_FOR_LOCALIZATION

        bb_new = [[], []]
        for i in (0, 1, 2):
            if bb[0][i] < bb[1][i]:
                bb_new[0].append(math.floor(bb[0][i] / div_factor))
                bb_new[1].append(math.ceil(bb[1][i] / div_factor))
            else:
                bb_new[0].append(math.ceil(bb[0][i] / div_factor))
                bb_new[1].append(math.floor(bb[1][i] / div_factor))

        np_bb0 = np.array(bb_new[0], dtype=int)
        np_bb1 = np.array(bb_new[1], dtype=int)
        distances = np.abs(np_bb0 - np_bb1)
        starts = np.minimum(np_bb0, np_bb1)
        ends = np.maximum(np_bb0, np_bb1)

        if (starts > np.array([32, 32, 32])).any() or (ends < np.array([0, 0, 0])).any():
            return coords
        else:
            for i in (0, 1, 2):
                shp = [1, 1, 1]
                shp[i] = -1
                vec = np.arange(-1 * math.ceil(distances[i] / 2), math.floor(distances[i] / 2)).reshape(
                    tuple(shp)) / math.ceil(
                    distances[i] / 2)
                if bb_new[0][i] > bb_new[1][i]:
                    vec = vec * -1
                matrix = np.broadcast_to(vec, tuple(distances))
                a1 = np.maximum(0, starts)
                b1 = np.minimum(ends, COORDS_CUBE_SIZE)
                a2 = np.maximum(-1 * starts, 0)
                b2 = np.minimum(ends, COORDS_CUBE_SIZE) - starts
                coords[i, a1[0]:b1[0], a1[1]:b1[1], a1[2]:b1[2]] = matrix[a2[0]:b2[0], a2[1]:b2[1], a2[2]:b2[2]]
            return coords

In [5]:
# MODEL


class PostRes(nn.Module):
    def __init__(self, n_in, n_out, stride=1):
        super(PostRes, self).__init__()
        self.conv1 = nn.Conv3d(n_in, n_out, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm3d(n_out)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv3d(n_out, n_out, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm3d(n_out)

        if stride != 1 or n_out != n_in:
            self.shortcut = nn.Sequential(
                nn.Conv3d(n_in, n_out, kernel_size=1, stride=stride),
                nn.BatchNorm3d(n_out))
        else:
            self.shortcut = None

    def forward(self, x):
        residual = x
        if self.shortcut is not None:
            residual = self.shortcut(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        out += residual
        out = self.relu(out)
        return out


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.preBlock = nn.Sequential(
            nn.Conv3d(1, 24, kernel_size=3, padding=1),
            nn.BatchNorm3d(24),
            nn.ReLU(inplace=True),
            nn.Conv3d(24, 24, kernel_size=3, padding=1),
            nn.BatchNorm3d(24),
            nn.ReLU(inplace=True))
        num_blocks_forw = [2, 2, 3, 3]
        num_blocks_back = [3, 3]
        self.featureNum_forw = [24, 32, 64, 64, 64]
        self.featureNum_back = [128, 64, 64]
        for i in range(len(num_blocks_forw)):
            blocks = []
            for j in range(num_blocks_forw[i]):
                if j == 0:
                    blocks.append(PostRes(self.featureNum_forw[i], self.featureNum_forw[i + 1]))
                else:
                    blocks.append(PostRes(self.featureNum_forw[i + 1], self.featureNum_forw[i + 1]))
            setattr(self, 'forw' + str(i + 1), nn.Sequential(*blocks))

        for i in range(len(num_blocks_back)):
            blocks = []
            for j in range(num_blocks_back[i]):
                if j == 0:
                    if i == 0:
                        addition = 3
                    else:
                        addition = 0
                    blocks.append(PostRes(self.featureNum_back[i + 1] + self.featureNum_forw[i + 2] + addition,
                                          self.featureNum_back[i]))
                else:
                    blocks.append(PostRes(self.featureNum_back[i], self.featureNum_back[i]))
            setattr(self, 'back' + str(i + 2), nn.Sequential(*blocks))

        self.maxpool1 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True)
        self.maxpool2 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True)
        self.maxpool3 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True)
        self.maxpool4 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True)
        self.unmaxpool1 = nn.MaxUnpool3d(kernel_size=2, stride=2)
        self.unmaxpool2 = nn.MaxUnpool3d(kernel_size=2, stride=2)

        self.path1 = nn.Sequential(
            nn.ConvTranspose3d(64, 64, kernel_size=2, stride=2),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True))
        self.path2 = nn.Sequential(
            nn.ConvTranspose3d(64, 64, kernel_size=2, stride=2),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True))
        self.drop = nn.Dropout3d(p=0.5, inplace=False)
        self.output = nn.Sequential(nn.Conv3d(self.featureNum_back[0], 64, kernel_size=1),
                                    nn.ReLU(),
                                    nn.Conv3d(64, 5 * len(ANCHOR_SIZES), kernel_size=1))

    def forward(self, x, coord):
        out = self.preBlock(x)  # 16
        out_pool, indices0 = self.maxpool1(out)
        out1 = self.forw1(out_pool)  # 32
        out1_pool, indices1 = self.maxpool2(out1)
        out2 = self.forw2(out1_pool)  # 64
        out2_pool, indices2 = self.maxpool3(out2)
        out3 = self.forw3(out2_pool)  # 96
        out3_pool, indices3 = self.maxpool4(out3)
        out4 = self.forw4(out3_pool)  # 96
        rev3 = self.path1(out4)
        comb3 = self.back3(torch.cat((rev3, out3), 1))  # 96+96
        rev2 = self.path2(comb3)
        comb2 = self.back2(torch.cat((rev2, out2, coord), 1))  # 64+64
        comb2 = self.drop(comb2)
        out = self.output(comb2)
        size = out.size()
        out = out.view(out.size(0), out.size(1), -1)
        out = out.transpose(1, 2).contiguous().view(size[0], size[2], size[3], size[4], len(ANCHOR_SIZES), 5)
        return out

In [6]:
# LOSS FUNCTION


def calculate_iou(pred_box, true_box):
    iou_list = []
    for pred, true in zip(pred_box, true_box):
        _, pz, py, px, pd = pred
        _, lz, ly, lx, ld = true

        # Calculate the difference between the z-coordinates of centers
        dz = abs(pz - lz)

        if dz < 0.5:  # Check if the difference is small
            # Calculate half side lengths of predicted and true boxes
            half_pd = pd / 2
            half_ld = ld / 2

            # Calculate distances between centers along y and x axes
            dy = abs(py - ly)
            dx = abs(px - lx)

            # Calculate the intersection of bounding boxes along y and x axes
            intersection_y = half_pd + half_ld - dy
            intersection_x = half_pd + half_ld - dx

            # Calculate the intersection area
            intersection_area = max(0, intersection_y) * max(0, intersection_x)

            # Calculate the area of the predicted and true boxes
            pred_area = pd * pd
            true_area = ld * ld

            # Calculate the union area
            union_area = pred_area + true_area - intersection_area

            # Calculate IoU
            iou = intersection_area / union_area
        else:
            iou = 0.0  # If the difference is large, IoU is 0

        iou_list.append(iou)

    mean_iou = np.mean([float(tensor.detach().cpu().numpy()) if isinstance(tensor, torch.Tensor) else float(tensor) for tensor in iou_list])
    return mean_iou


def hard_mining(neg_output, neg_labels, num_hard):
    num = num_hard  * len(neg_output)
    _, idcs = torch.topk(neg_output, min(num, len(neg_output)))
    neg_output = torch.index_select(neg_output, 0, idcs)
    neg_labels = torch.index_select(neg_labels, 0, idcs)
    return neg_output, neg_labels


class Loss(nn.Module):
    def __init__(self, num_hard=0.7):
        super(Loss, self).__init__()
        self.sigmoid = nn.Sigmoid()
        self.classify_loss = nn.BCELoss()
        self.regress_loss = nn.SmoothL1Loss()
        self.num_hard = num_hard

    def forward(self, output, labels, train=True):
        batch_size = labels.size(0)
        output = output.view(-1, 5)
        labels = labels.view(-1, 5)
        iou = -1


        pos_idcs = labels[:, 0] > 0.5


        pos_idcs = pos_idcs.unsqueeze(1).expand(pos_idcs.size(0), 5)
        pos_output = output[pos_idcs].view(-1, 5)
        pos_labels = labels[pos_idcs].view(-1, 5)

        neg_idcs = labels[:, 0] < 0.5
        neg_output = output[:, 0][neg_idcs]
        neg_labels = labels[:, 0][neg_idcs]

        if self.num_hard > 0 and train:
            neg_output, neg_labels = hard_mining(neg_output, neg_labels, self.num_hard * batch_size)


        neg_prob = self.sigmoid(neg_output)
        if len(pos_output) > 0:

            pos_prob = self.sigmoid(pos_output[:, 0])
            pz, ph, pw, pd = pos_output[:, 1], pos_output[:, 2], pos_output[:, 3], pos_output[:, 4]
            lz, lh, lw, ld = pos_labels[:, 1], pos_labels[:, 2], pos_labels[:, 3], pos_labels[:, 4]
            regress_losses = [
                self.regress_loss(pz, lz),
                self.regress_loss(ph, lh),
                self.regress_loss(pw, lw),
                self.regress_loss(pd, ld)]
            regress_losses_data = [loz.item() for loz in regress_losses]
            classify_loss = 0.5 * self.classify_loss(
                pos_prob, pos_labels[:, 0]) + 0.5 * self.classify_loss(
                neg_prob, neg_labels)
            pos_correct = (pos_prob.data >= 0.5).sum()
            pos_total = len(pos_prob)

            # print("Pos Loss: ",self.classify_loss(pos_prob, pos_labels[:, 0]))
            # print("Neg Loss: ",self.classify_loss(neg_prob, neg_labels))
            # print("Neg Loss + 1: ",self.classify_loss(neg_prob, neg_labels + 1))

            # Calculate IoU for each positive sample
            iou = calculate_iou(pos_output, pos_labels)

        else:
            regress_losses = [0, 0, 0, 0]
            classify_loss = self.classify_loss(neg_prob, neg_labels)
            pos_correct = 0
            pos_total = 0
            regress_losses_data = [0, 0, 0, 0]


        classify_loss_data = classify_loss.item()
        loss = classify_loss
        for regress_loss in regress_losses:
            loss += regress_loss
        neg_correct = (neg_prob.data < 0.5).sum()
        neg_total = len(neg_prob)

        return [loss, classify_loss_data] + regress_losses_data + [pos_correct, pos_total, neg_correct, neg_total,iou]



In [7]:
*/**# Train Org




def get_lr(epoch):
    if epoch <= TOTAL_EPOCHS * 0.5:
        lr = DEFAULT_LR
    elif epoch <= TOTAL_EPOCHS * 0.8:
        lr = 0.1 * DEFAULT_LR
    else:
        lr = 0.01 * DEFAULT_LR
    return lr


def train(data_loader, net, loss, epoch, optimizer, get_lr, save_dir="./"+SAVE_DIR):
    start_time = time.time()

    net.train()
    lr = get_lr(epoch)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    metrics = []
    print("Training Data length: ", len(data_loader))
    for i, (data, target, coord) in enumerate(data_loader):
        # print("Training Data Left: ", len(data_loader) - i)

        if torch.cuda.is_available():
            data = Variable(data.cuda())
            target = Variable(target.cuda())
            coord = Variable(coord.cuda())
        data = data.float()
        target = target.float()
        coord = coord.float()

        output = net(data, coord)


        loss_output = loss(output, target)
        optimizer.zero_grad()
        loss_output[0].backward()
        # print("losss: ",loss_output[0])
        optimizer.step()

        loss_output[0] = loss_output[0].item()
        metrics.append(loss_output)

        # break

    # Move each tensor in the list to CPU
    metrics_cpu = []
    for metrics_single in metrics:
        metrics_single_cpu = [metric.cpu() if isinstance(metric, torch.Tensor) and metric.device.type == 'cuda' else metric for metric in metrics_single]
        metrics_cpu.append(metrics_single_cpu)

    # Convert the list to NumPy array
    metrics = torch.tensor(metrics_cpu).detach().cpu().numpy()



    if epoch % 10 == 0:
        net_state_dict = net.state_dict()
        for key in net_state_dict.keys():
            net_state_dict[key] = net_state_dict[key].cpu()
        torch.save({
            'epoch': epoch,
            'save_dir': save_dir,
            'model_state_dict': net_state_dict,
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': np.mean(metrics[:, 0])}, os.path.join(save_dir, f'''{epoch}.ckpt'''))

    positive_values = [x for x in metrics[:, 10] if x is not None and x > 0]

    end_time = time.time()
    print(f'''\n\nEpoch {epoch} (lr {lr})''')
    print(f'''Train: tpr {100.0 * np.sum(metrics[:, 6]) / np.sum(metrics[:, 7])},
tnr {100.0 * np.sum(metrics[:, 8]) / np.sum(metrics[:, 9])},
total pos {np.sum(metrics[:, 7])}, total neg {np.sum(metrics[:, 9])}, pos correct {np.sum(metrics[:, 6])} , neg correct {np.sum(metrics[:, 8])} ,
iou {np.mean(positive_values)}, time {end_time - start_time}''')
    print(f'''loss {np.mean(metrics[:, 0])}, classify loss {np.mean(metrics[:, 1])},
regress loss {np.mean(metrics[:, 2])}, {np.mean(metrics[:, 3])},
{np.mean(metrics[:, 4])}, {np.mean(metrics[:, 5])}''')




def validate(data_loader, net, loss):
    start_time = time.time()

    net.eval()

    metrics = []
    print("Validation Data length: ", len(data_loader))
    for i, (data, target, coord) in enumerate(data_loader):
        # print("Validation Data Left: ", len(data_loader) - i)
        if torch.cuda.is_available():
            data = Variable(data.cuda())
            target = Variable(target.cuda())
            coord = Variable(coord.cuda())
        data = data.float()
        target = target.float()
        coord = coord.float()

        output = net(data, coord)


        loss_output = loss(output, target, train=False)

        loss_output[0] = loss_output[0].item()
        metrics.append(loss_output)
    end_time = time.time()


    # Move each tensor in the list to CPU
    metrics_cpu = []
    for metrics_single in metrics:
        metrics_single_cpu = [metric.cpu() if isinstance(metric, torch.Tensor) and metric.device.type == 'cuda' else metric for metric in metrics_single]
        metrics_cpu.append(metrics_single_cpu)

    # Convert the list to NumPy array
    metrics = torch.tensor(metrics_cpu).detach().cpu().numpy()

    # metrics_np = []
    # for metric in metrics[0]:
    #     if isinstance(metric, torch.Tensor) and metric.device.type == 'cuda':
    #         metric = metric.cpu()  # Move tensor to CPU
    #     metrics_np.append(metric)

    # metrics_np = np.asarray(metrics_np, dtype=np.float32)
    # metrics = np.expand_dims(np.asarray(metrics_np, dtype=np.float32), axis=0)

    positive_values = [x for x in metrics[:, 10] if x is not None and x > 0]
    print(f'''Validate''')
    print(f'''time {end_time - start_time}''')
    print(f'''Validate: tpr {100.0 * np.sum(metrics[:, 6]) / np.sum(metrics[:, 7])},
tnr {100.0 * np.sum(metrics[:, 8]) / np.sum(metrics[:, 9])},
total pos {np.sum(metrics[:, 7])}, total neg {np.sum(metrics[:, 9])}, pos correct {np.sum(metrics[:, 6])} , neg correct {np.sum(metrics[:, 8])} ,
iou {np.mean(positive_values)}, time {end_time - start_time}''')
    print(f'''loss {np.mean(metrics[:, 0])}, classify loss {np.mean(metrics[:, 1])},
regress loss {np.mean(metrics[:, 2])}, {np.mean(metrics[:, 3])},
{np.mean(metrics[:, 4])}, {np.mean(metrics[:, 5])}''')

    # Detach From GPU
    data.detach()
    target.detach()
    coord.detach()
    output.detach()


def run(load_last_checkpoint=False):
    # save_dir = f'{OUTPUT_PATH}/models/'
    save_dir = f'{OUTPUT_PATH}/{SAVE_DIR}/'
    os.makedirs(save_dir, exist_ok=True)
    print("Creating: ",save_dir)
    neural_net = Net()
    loss_fn = Loss()
    optim = torch.optim.SGD(neural_net.parameters(), DEFAULT_LR, momentum=0.9, weight_decay=1e-4)
    # optim = torch.optim.Adam(neural_net.parameters(), lr=DEFAULT_LR, weight_decay=1e-4)
    starting_epoch = 0
    initial_loss = None
    if load_last_checkpoint:
        model_paths = glob(f'''{save_dir}*.ckpt''')
        model_names = [int(i.split('/')[-1][:-5]) for i in model_paths]
        latest_model_path = f'''{save_dir}{max(model_names)}.ckpt'''
        print('loading latest model from:', latest_model_path)
        checkpoint = torch.load(latest_model_path)
        neural_net.load_state_dict(checkpoint['model_state_dict'])
        optim.load_state_dict(checkpoint['optimizer_state_dict'])
        starting_epoch = checkpoint['epoch']
        initial_loss = checkpoint['loss']
    if torch.cuda.is_available():
        neural_net = neural_net.cuda()
        loss_fn = loss_fn.cuda()
    print(f'''Training from epoch: {starting_epoch} towards: {TOTAL_EPOCHS},
with learning rate starting from: {get_lr(starting_epoch)}, and loss: {initial_loss}''')
    # meta = pd.read_csv(f'{OUTPUT_PATH}/augmented_meta.csv', index_col=0).sample(frac=1).reset_index(drop=False)
    # meta_group_by_series = meta.groupby(['seriesuid']).indices
    # list_of_groups = [{i: list(meta_group_by_series[i])} for i in meta_group_by_series.keys()]

    # Read the augmented meta CSV file
    meta = pd.read_csv(f'{OUTPUT_PATH}/augmented_meta.csv', index_col=0)

    # Separate positive and negative samples
    positive_samples = meta[meta['class'] == 1]
    negative_samples = meta[meta['class'] == 0]


    # For Top Left
    # if(MODEL == "TOP_LEFT"):
    #   print("Top Left:")
    #   positive_samples = positive_samples[positive_samples['sub_index'].str.contains(r'0_0_1')]
    #   negative_samples = negative_samples[negative_samples['sub_index'].str.contains(r'0_0_1')]
    # elif(MODEL == "TOP_RIGHT"):
    #   print("Top Right:")
    #   positive_samples = positive_samples[positive_samples['sub_index'].str.contains(r'0_1_1')]
    #   negative_samples = negative_samples[negative_samples['sub_index'].str.contains(r'0_1_1')]
    # elif(MODEL == "BOTTOM_LEFT"):
    #   print("Bottom Left:")
    #   positive_samples = positive_samples[positive_samples['sub_index'].str.contains(r'1_0_1')]
    #   negative_samples = negative_samples[negative_samples['sub_index'].str.contains(r'1_0_1')]
    # elif(MODEL == "BOTTOM_RIGHT"):
    #   print("Bottom Right:")

    positive_samples = positive_samples[positive_samples['sub_index'].str.contains(r'.+_.+_1')]
    negative_samples = negative_samples[negative_samples['sub_index'].str.contains(r'.+_.+_1')]

    print(positive_samples["sub_index"])

    # Determine the number of samples you want from each class
    num_samples_positive = int(1 * len(positive_samples))
    num_samples_negative = int(1 * len(negative_samples))

    # Sample the required number of samples from each class
    positive_samples_sampled = positive_samples.sample(n=num_samples_positive, random_state=42)
    negative_samples_sampled = negative_samples.sample(n=num_samples_negative, random_state=42)

    print("pos_len: ",len(positive_samples_sampled))
    print("neg_len: ",len(negative_samples_sampled))

    # Combine the sampled data
    combined_samples = pd.concat([positive_samples_sampled, negative_samples_sampled])

    # Shuffle the combined data
    combined_samples_shuffled = combined_samples.sample(frac=1, random_state=42).reset_index(drop=True)

    # Group samples by 'seriesuid'
    meta_group_by_series = combined_samples_shuffled.groupby('seriesuid').indices

    # Create a list of dictionaries where each dictionary contains the indices of samples for a particular 'seriesuid'
    list_of_groups = [{seriesuid: list(indices)} for seriesuid, indices in meta_group_by_series.items()]


    random.Random(5).shuffle(list_of_groups)
    # random.Random(1).shuffle(list_of_groups)
    # list_of_groups = list_of_groups[:int(len(list_of_groups) * 0.6)]
    val_split = int(VAL_PCT * len(list_of_groups))
    val_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[:val_split]]))
    train_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[val_split:]]))
    ltd = LunaDataSet(train_indices, meta)
    lvd = LunaDataSet(val_indices, meta)
    train_loader = DataLoader(ltd, batch_size=5, shuffle=False)
    val_loader = DataLoader(lvd, batch_size=5, shuffle=False)

    print("Starting: \n\n")
    for ep in range(starting_epoch, TOTAL_EPOCHS):
        train(train_loader, neural_net, loss_fn, ep, optim, get_lr, save_dir=save_dir)
        validate(val_loader, neural_net, loss_fn)


if __name__ == '__main__':
    run(load_last_checkpoint=False)

Creating:  /content/drive/MyDrive/Elcap_Preprocessed/top_left_models//
Training from epoch: 0 towards: 200,
with learning rate starting from: 0.01, and loss: None
1      0_0_1
4      0_1_1
7      1_0_1
10     1_1_1
13     0_0_1
       ...  
514    1_1_1
517    0_0_1
520    0_1_1
523    1_0_1
526    1_1_1
Name: sub_index, Length: 176, dtype: object
pos_len:  176
neg_len:  24
Starting: 


Training Data length:  32


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass




Epoch 0 (lr 0.01)
Train: tpr 97.53086419753086,
tnr 60.56703642908106,
total pos 162.0, total neg 15728478.0, pos correct 158.0 , neg correct 9526273.0 ,
iou 0.21296759935233678, time 275.0031967163086
loss 4.101836811751127, classify loss 0.543983013369143,
regress loss 0.903761446941644, 0.509295078314608,
0.7229241896711756, 1.4218731748405844
Validation Data length:  8
Validate
time 60.73010492324829
Validate: tpr 52.77777777777778,
tnr 81.30262423056851,
total pos 36.0, total neg 3932124.0, pos correct 19.0 , neg correct 3196920.0 ,
iou 0.18064857232901785, time 60.73010492324829
loss 473.60383850336075, classify loss 5.011665068566799,
regress loss 113.75022377027199, 123.1022597886622,
125.71689976938069, 106.02278819680214
Training Data length:  32


Epoch 1 (lr 0.01)
Train: tpr 97.53086419753086,
tnr 68.6720927479442,
total pos 162.0, total neg 15728478.0, pos correct 158.0 , neg correct 10801075.0 ,
iou 0.45545408354720573, time 31.813861846923828
loss 3.2780148526653647, c