In [2]:
import os
import random
import numpy as np
import pandas as pd
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import torch
import torch.nn as nn
import albumentations
import torch.optim as optim
from albumentations.pytorch import ToTensorV2, ToTensor

from C2C.models.resnet import *
from C2C import train
from C2C.loss import KLDLoss
from C2C.eval_model import *


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
torch.manual_seed(12)
torch.cuda.manual_seed(12)
np.random.seed(12)
random.seed(12)

torch.backends.cudnn.deterministic=True

### Read Data CSV

- CSV file must contain following columns:
    - path - location of each patch
    - wsi - Unique identifier for WSI
    - label - Label of WSI (Binary 0 or 1)
    - is_valid - If WSI part of validation cohort

In [4]:
CSV_PATH = '/workspace/icml_rebuttual/patch_data.csv'
df = pd.read_csv(CSV_PATH)

In [5]:
df.head()

Unnamed: 0,path,wsi,is_valid,label
0,./patch_dataset_4096_512/65bf2cc6f9ed1eed86b8e...,65bf2cc6f9ed1eed86b8e7a908efe834,False,2
1,./patch_dataset_4096_512/65bf2cc6f9ed1eed86b8e...,65bf2cc6f9ed1eed86b8e7a908efe834,False,2
2,./patch_dataset_4096_512/65bf2cc6f9ed1eed86b8e...,65bf2cc6f9ed1eed86b8e7a908efe834,False,2
3,./patch_dataset_4096_512/65bf2cc6f9ed1eed86b8e...,65bf2cc6f9ed1eed86b8e7a908efe834,False,2
4,./patch_dataset_4096_512/65bf2cc6f9ed1eed86b8e...,65bf2cc6f9ed1eed86b8e7a908efe834,False,2


### Model

In [6]:
# Initialize Model
model_ft = WSIClassifier(6, bn_track_running_stats=True)
model_ft = model_ft.to(device)



### Loss, Optimizer, and Data Transformation

In [7]:
# Data Transforms
data_transforms = albumentations.Compose([
    ToTensor()
    ])    

# Cross Entropy Loss 
criterion_ce = nn.CrossEntropyLoss()
criterion_kld = KLDLoss()
criterion_dic = {'CE': criterion_ce, 'KLD': criterion_kld}

# Observe that all parameters are being optimized
optimizer = optim.Adam(model_ft.parameters(), lr=1e-4)

### Train Model

In [8]:
model_ft = train.train_model(model_ft, 
                             criterion_dic, 
                             optimizer, 
                             df, 
                             data_transforms=data_transforms,
                             alpha=1, 
                             beta=0.01, 
                             gamma=0.01, 
                             num_epochs=1, 
                             fpath='trained/checkpoint.pt',
                             topk=True)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  valid_images = dict(df.loc[df['is_valid']==1].groupby('wsi')['path'].apply(list))
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  valid_images_label = dict(df.loc[df['is_valid']==1].groupby('wsi')['label'].apply(max))


Epoch 0/0
----------


0it [00:00, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s][ WARN:0@26.949] global loadsave.cpp:244 findDecoder imread_('./patch_dataset_4096_512/0c7d6e0e07621b5582117e9977327b4c_2.png'): can't open/read file: check file path/integrity
  0%|          | 0/5 [00:00<?, ?it/s]


error: OpenCV(4.7.0) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


### Evaluate on Test Data

In [8]:
from C2C.utils import *

ckp_path = "trained/checkpoint.pt"
model_ft, optimizer = load_ckp(ckp_path, model_ft, optimizer)

In [9]:
TEST_PATH = 'data/11-3-2021 celiac_normal_test_split.csv'
df_test = pd.read_csv(TEST_PATH)

In [10]:
pred_df = eval_test(model_ft, df_test, data_transforms)

100%|██████████| 87/87 [03:37<00:00,  2.50s/it]

Test Accuracy:  0.7701149425287356



