In [1]:
import os
import json
import matplotlib.pyplot as plt 
import matplotlib.image as image 
import numpy as np
import pandas as pd
import albumentations as A
import albumentations.pytorch
import cv2
import math

import torch
from pytorch_lightning import LightningModule, Trainer
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import MNIST

import sys
sys.path.append('../')
from utils.dataset import *
# from train import PapsClsModel


In [2]:
train_df = pd.read_csv( '../lbp_data/train.csv')
test_df = pd.read_csv( '../lbp_data/test.csv')

In [3]:
train_df.shape

(13829, 14)

In [4]:
test_df.shape

(5506, 14)

In [5]:
train_df.head()

Unnamed: 0,file_name,task,label,xmin,ymin,w,h,occluded,des,cell_type,bbox,diag,ID,label_id
0,patch_images/2021.05.11/LBC573-20210113(1)/LBC...,[ASCUS] LBC573,ASC-US,349,1080,174,161,0,,Atypical squamous cells of undetermined signif...,"[349, 1080, 174, 161]",237,8,abnormal
1,patch_images/2021.05.11/LBC573-20210113(1)/LBC...,[ASCUS] LBC573,ASC-US,267,711,161,124,0,,Atypical squamous cells of undetermined signif...,"[267, 711, 161, 124]",203,9,abnormal
2,patch_images/2021.05.11/LBC573-20210113(1)/LBC...,[ASCUS] LBC573,ASC-US,202,970,131,176,0,,Atypical squamous cells of undetermined signif...,"[202, 970, 131, 176]",219,10,abnormal
3,patch_images/2021.01.06/LBC37-20210102(1)/LBC3...,[ASCUS] LBC37,ASC-US,1349,420,100,113,0,,Atypical squamous cells of undetermined signif...,"[1349, 420, 100, 113]",150,11,abnormal
4,patch_images/2021.01.06/LBC37-20210102(1)/LBC3...,[ASCUS] LBC37,ASC-US,1575,720,163,213,0,,Atypical squamous cells of undetermined signif...,"[1575, 720, 163, 213]",268,12,abnormal


In [6]:
train_df.label.value_counts()

AS                           4455
LS                           1869
ASC-US                       1831
ASC-US with HPV infection    1429
LSIL                         1361
AH                            848
HS                            579
LSIL with HPV infection       404
HSIL                          374
N - Endocervical Cell         268
ASC-H                         190
HN                            102
HSIL with HPV infection        90
Carcinoma                      18
H                               9
SC                              1
ASC-H with HPV infection        1
Name: label, dtype: int64

In [7]:
len(train_df.label_id.unique())

1

In [8]:
train_dataset = PapsDataset(train_df, defaultpath='../lbp_data/', transform=train_transforms)

['ASC-US' 'LSIL' 'HSIL' 'ASC-H' 'N - Endocervical Cell' 'Carcinoma'
 'Benign']
[0 1 2 4 5]
(12790, 15)


In [9]:
images, labels = next(iter(train_dataset))
images.shape

../lbp_data/patch_images/2021.05.11/LBC573-20210113(1)/LBC573-20210113(1)_1891.png


torch.Size([3, 224, 224])

In [10]:
train_df.label.value_counts()

0    7715
1    3634
2    1145
4    1039
5     296
Name: label, dtype: int64

In [11]:
train_df.head()

Unnamed: 0,file_name,task,label,xmin,ymin,w,h,occluded,des,cell_type,bbox,diag,ID,label_id
0,patch_images/2021.05.11/LBC573-20210113(1)/LBC...,[ASCUS] LBC573,0,349,1080,174,161,0,,Atypical squamous cells of undetermined signif...,"[349, 1080, 174, 161]",237,8,abnormal
1,patch_images/2021.05.11/LBC573-20210113(1)/LBC...,[ASCUS] LBC573,0,267,711,161,124,0,,Atypical squamous cells of undetermined signif...,"[267, 711, 161, 124]",203,9,abnormal
2,patch_images/2021.05.11/LBC573-20210113(1)/LBC...,[ASCUS] LBC573,0,202,970,131,176,0,,Atypical squamous cells of undetermined signif...,"[202, 970, 131, 176]",219,10,abnormal
3,patch_images/2021.01.06/LBC37-20210102(1)/LBC3...,[ASCUS] LBC37,0,1349,420,100,113,0,,Atypical squamous cells of undetermined signif...,"[1349, 420, 100, 113]",150,11,abnormal
4,patch_images/2021.01.06/LBC37-20210102(1)/LBC3...,[ASCUS] LBC37,0,1575,720,163,213,0,,Atypical squamous cells of undetermined signif...,"[1575, 720, 163, 213]",268,12,abnormal


In [12]:
train_df.iloc[0, 8]

nan

In [13]:
train_dataset.df.label.unique()

array([0, 1, 2, 5])

In [14]:
from pytorch_lightning import LightningDataModule
class PapsDataModule(LightningDataModule):
    def __init__(self, data_dir: str = '../lbp_data/'):
        super().__init__()
        self.data_dir = data_dir
        self.train_transform = train_transforms
        self.test_transform = test_transforms

        # self.dims is returned when you call dm.size()
        # Setting default dims here because we know them.
        # Could optionally be assigned dynamically in dm.setup()
        self.dims = (1, 28, 28)
        self.num_classes = 5

    def prepare_data(self):
        # download
        pass

    def setup(self, stage=None):

        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            train_df = pd.read_csv(self.data_dir + 'train.csv')
            self.train_dataset = PapsDataset(train_df, defaultpath=self.data_dir, transform=self.train_transforms)

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            test_df = pd.read_csv(self.data_dir + 'test.csv')
            self.test_dataset = PapsDataset(test_df, defaultpath=self.data_dir, transform=self.test_transforms)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=BATCH_SIZE)

    def val_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=BATCH_SIZE)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=BATCH_SIZE)