# Imports

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import os, sys
import math
import seaborn as sns
import pytorch_lightning as pl
from torchvision.models import resnet152, ResNet152_Weights
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import functional as Func
from torchvision import transforms as T
from PIL import Image
from torchvision import transforms, utils
import matplotlib.pyplot as plt
sys.path.append(os.path.dirname(os.path.realpath('.')))

In [18]:
from helpers.supernet import SuperNet
from helpers.gradcam import NetworkGradCAM
from helpers.xrai import XRai

# Data Read-Ins

In [3]:
DATA_DIR_PATH = '/home/jmryan/teams/dsc-180a---a14-[88137]/bnpp_224_pandas/'
SEG_DATA_DIR_PATH = '/home/jmryan/teams/dsc-180a---a14-[88137]/seg_224_pandas/'
TEST_PATH = '/home/jmryan/private/DSC180/A/test/testdata.csv'
TRAIN_PATH = '/home/jmryan/private/DSC180/A/train/traindata.csv'
VAL_PATH = '/home/jmryan/private/DSC180/A/val/valdata.csv'
SEG_PATH = '/home/jmryan/teams/dsc-180a---a14-[88137]/segmented_datapaths_meta.csv'

In [4]:
test = pd.read_csv(TEST_PATH, header=0, index_col=0)
train = pd.read_csv(TRAIN_PATH, index_col = 0)
val = pd.read_csv(VAL_PATH, index_col = 0)
seg = pd.read_csv(SEG_PATH, index_col = 0)

In [5]:
print(f'Prop Train W Segments: {train.seg.sum()/len(train)}')
print(f'Prop Val W Segments: {val.seg.sum()/len(val)}')
print(f'Prop Test W Segments: {test.seg.sum()/len(test)}')

Prop Train W Segments: 0.8810999736217356
Prop Val W Segments: 0.8687924725561944
Prop Test W Segments: 0.8754799780581459


- once mimic data available, preprocessed needs to just be filepath and binary value so it works w both
-means need another class/func before for albert data to mess w bnpp threshold for binary truth
-make sure to keep results from different thresholds for presentation

In [9]:
class PreprocessedImageDataset(Dataset):
    def __init__(self, df, transform=None, target_transform=None, seg = False):
        self.df = df
        self.transform = transform
        self.target_transform = target_transform
        if seg:
            self.path = SEG_DATA_DIR_PATH
        else:
            self.path = DATA_DIR_PATH
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df[idx]
        filepath = row[2]  
        val = row[0]
        heart = row[1]
        im = torch.load(self.path + filepath)
        return im.view(1, 224, 224).expand(3, -1, -1), val, heart

In [11]:
train_df = train[train.seg][['bnpp_value_log', 'heart', 'filepaths']]
val_df = val[val.seg][['bnpp_value_log', 'heart', 'filepaths']]
test_df = test[test.seg][['bnpp_value_log', 'heart', 'filepaths']]

In [12]:
train_dataset = PreprocessedImageDataset(df=train_df.to_numpy())
val_dataset = PreprocessedImageDataset(df=val_df.to_numpy())
test_dataset = PreprocessedImageDataset(df=test_df.to_numpy())

In [13]:
BATCH_SIZE = 32
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=16, shuffle=True)
val_dl = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers = 16, shuffle=False)

# Model

In [15]:
torch.cuda.is_available()

True

In [16]:
model = resnet152(weights=ResNet152_Weights.DEFAULT)

In [21]:
final_lin = [[2048, 4096], [4096, 2048], [2048, 512], [512, 256], [256, 1]]

net = SuperNet(layer_defs=None, linear_layers = final_lin, is_transfer=True, 
           model = model, lr_scheduler=True, lr = 1e-5, print_on = True)

In [22]:
trainer = pl.Trainer(
    accelerator='gpu',
    max_epochs=30, 
    enable_progress_bar=False,
    logger=False,
    enable_checkpointing=False)
net.train()
trainer.fit(net, train_dl, val_dl)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type       | Params
-----------------------------------------
0 | model     | ResNet     | 60.2 M
1 | regresser | Sequential | 18.0 M
2 | layers    | Sequential | 58.1 M
-----------------------------------------
78.2 M    Trainable params
0         Non-trainable params
78.2 M    Total params
312.627   Total estimated model params size (MB)


	Val {'AUC': 0.5, 'PRC': 0.0, 'loss': 2.8990248558784204}; mean_mae: 2.8990248558784204; mean_heart_hat: 0.0; mean_heart_true: 0.578125
	Val {'AUC': 0.525127771652963, 'PRC': 0.6599552572706935, 'loss': 0.8304811637842755}; mean_mae: 0.8304729596027609; mean_heart_hat: 0.5379061371841155; mean_heart_true: 0.6383874849578821
Epoch 0
Epoch 0
	Train {'AUC': 0.5081083353107542, 'PRC': 0.6625783592259471, 'loss': 1.7029442534529424}; mean_mae: 1.7031777330245954; mean_heart_hat: 0.27460519422198937; mean_heart_true: 0.6491280592770002
	Val {'AUC': 0.5348084640584887, 'PRC': 0.6693735498839907, 'loss': 0.8124815077613355}; mean_mae: 0.8124580444906778; mean_heart_hat: 0.5186522262334536; mean_heart_true: 0.6383874849578821
Epoch 1
Epoch 1
	Train {'AUC': 0.5199557790664134, 'PRC': 0.6669106881405563, 'loss': 1.1033092700826002}; mean_mae: 1.103188193501877; mean_heart_hat: 0.5111892822393533; mean_heart_true: 0.6491280592770002
	Val {'AUC': 0.5526525849942211, 'PRC': 0.6800825593395253, 'loss

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd7d87d7af0>  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__

Traceback (most recent call last):
    self._shutdown_workers()
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd7d87d7af0>
Traceback (most recent call last):
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
 

    self._shutdown_workers()
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd7d87d7af0>
Traceback (most recent call last):
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception 

    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd7d87d7af0>
Traceback (most recent call last):
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd7d87d7af0>
Traceback (most recent call last):
  File "/home/jmryan/.local/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()


	Val {'AUC': 0.708409327213049, 'PRC': 0.7815577439570277, 'loss': 0.573794813786862}; mean_mae: 0.5737154518373947; mean_heart_hat: 0.6720818291215404; mean_heart_true: 0.6383874849578821
Epoch 8
Epoch 8
	Train {'AUC': 0.6601942043665388, 'PRC': 0.7783669141039237, 'loss': 0.7898207489429028}; mean_mae: 0.7899294063112838; mean_heart_hat: 0.5646283960781379; mean_heart_true: 0.6491280592770002
	Val {'AUC': 0.711021216602552, 'PRC': 0.7897196261682243, 'loss': 0.5698941447525924}; mean_mae: 0.5697822303422432; mean_heart_hat: 0.6438026474127557; mean_heart_true: 0.6383874849578821
Epoch 9
Epoch 9
	Train {'AUC': 0.6686285681623838, 'PRC': 0.785514950166113, 'loss': 0.7582332860028964}; mean_mae: 0.7583169141316426; mean_heart_hat: 0.5632063468303271; mean_heart_true: 0.6491280592770002
	Val {'AUC': 0.715315034163921, 'PRC': 0.7880434782608695, 'loss': 0.5650054988572981}; mean_mae: 0.5648971904726423; mean_heart_hat: 0.6642599277978339; mean_heart_true: 0.6383874849578821
Epoch 10
Epoch

`Trainer.fit` stopped: `max_epochs=30` reached.


	Val {'AUC': 0.7342522123824414, 'PRC': 0.8001800180018002, 'loss': 0.5407439287746414}; mean_mae: 0.5406756869078821; mean_heart_hat: 0.6684717208182912; mean_heart_true: 0.6383874849578821
Epoch 29
Epoch 29
	Train {'AUC': 0.8318556785819551, 'PRC': 0.9062897886427298, 'loss': 0.49700191816504985}; mean_mae: 0.4968131001877722; mean_heart_hat: 0.5878302522266298; mean_heart_true: 0.6491280592770002
