In [19]:
import os
import json
import matplotlib.pyplot as plt 
import matplotlib.image as image 
import numpy as np
import pandas as pd
import albumentations as A
import albumentations.pytorch
import cv2
import math

import torch
from pytorch_lightning import LightningModule, Trainer
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import MNIST

import sys
sys.path.append('../')
from utils.dataset import *
# from train import PapsClsModel


In [20]:
train_df = pd.read_csv( '../lbp_data/train.csv')

In [21]:
train_df.shape

(16569, 13)

In [22]:
train_df.head()

Unnamed: 0,ID,file_name,task,bbox,xmin,ymin,w,h,label,label_id,occluded,des,cell_type
0,0,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[1539, 199, 139, 211]",1539,199,139,211,C,Candida,0,,ASC-US
1,1,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[1337, 102, 256, 136]",1337,102,256,136,AS,ASC-US,0,,ASC-US
2,2,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[220, 619, 166, 169]",220,619,166,169,AS,ASC-US,0,,ASC-US
3,3,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[658, 1747, 191, 166]",658,1747,191,166,AS,ASC-US,0,,ASC-US
4,4,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[1571, 365, 136, 146]",1571,365,136,146,AS,ASC-US,0,,ASC-US


In [23]:
train_df.label.value_counts()

AS                           4322
ASC-US                       2448
LS                           2112
Negative                     1770
ASC-US with HPV infection    1390
LSIL                         1262
AH                            753
HSIL                          546
LSIL with HPV infection       447
HS                            395
Candida                       271
ASC-H                         235
C                             213
N - Endocervical Cell         209
HN                            102
HSIL with HPV infection        86
판독불가                            3
ASCUS-SIL                       2
SC                              1
N - Endometrial cell            1
ASC-H with HPV infection        1
Name: label, dtype: int64

In [24]:
len(train_df.label_id.unique())

9

In [25]:
train_dataset = PapsDataset(train_df, defaultpath='../lbp_data', transform=train_transforms)

In [28]:
train_df.head()

Unnamed: 0,ID,file_name,task,bbox,xmin,ymin,w,h,label,label_id,occluded,des,cell_type
0,0,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[1539, 199, 139, 211]",1539,199,139,211,5,Candida,0,,ASC-US
1,1,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[1337, 102, 256, 136]",1337,102,256,136,0,ASC-US,0,,ASC-US
2,2,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[220, 619, 166, 169]",220,619,166,169,0,ASC-US,0,,ASC-US
3,3,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[658, 1747, 191, 166]",658,1747,191,166,0,ASC-US,0,,ASC-US
4,4,patch_images/2021.01.14/LBC424-20210111(1)/LBC...,[AS6] LBC424,"[1571, 365, 136, 146]",1571,365,136,146,0,ASC-US,0,,ASC-US


In [27]:
train_df.iloc[0, 8]

5

In [32]:
train_dataset.df.label.unique()

array([0, 1, 4, 2, 3])

In [17]:
from pytorch_lightning import LightningDataModule
class PapsDataModule(LightningDataModule):
    def __init__(self, data_dir: str = '../lbp_data/'):
        super().__init__()
        self.data_dir = data_dir
        self.train_transform = train_transforms
        self.test_transform = test_transforms

        # self.dims is returned when you call dm.size()
        # Setting default dims here because we know them.
        # Could optionally be assigned dynamically in dm.setup()
        self.dims = (1, 28, 28)
        self.num_classes = 5

    def prepare_data(self):
        # download
        pass

    def setup(self, stage=None):

        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            train_df = pd.read_csv(self.data_dir + 'train.csv')
            self.train_dataset = PapsDataset(train_df, defaultpath=self.data_dir, transform=self.train_transforms)

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            test_df = pd.read_csv(self.data_dir + 'test.csv')
            self.test_dataset = PapsDataset(test_df, defaultpath=self.data_dir, transform=self.test_transforms)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=BATCH_SIZE)

    def val_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=BATCH_SIZE)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=BATCH_SIZE)