In [2]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
from torch.utils.data import Dataset
from PIL import Image
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import cv2


class IntracranialDataset(Dataset):

    def __init__(self, csv_file, path, labels, transform=None):
        
        self.path = path
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.labels = labels

    def __len__(self):
        
        return len(self.data)

    def __getitem__(self, idx):
        
        img_name = os.path.join(self.path, self.data.loc[idx, 'Image'] + '.png')
        img = cv2.imread(img_name)   
        
        if self.transform:       
            
            augmented = self.transform(image=img)
            img = augmented['image']   
            
        if self.labels:
            
            labels = torch.tensor(
                self.data.loc[idx, ['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural', 'any']])
            return {'image': img, 'labels': labels}    
        
        else:      
            
            return {'image': img}

In [6]:
RSNA_test = 'stage_1_test_png_224x'

data = pd.read_csv('stage_1_train.csv')
data[['ID', 'Image', 'Diagnosis']] = data['ID'].str.split('_', expand=True)
data = data[['Image', 'Diagnosis', 'Label']]
data.drop_duplicates(inplace=True)
data = data.pivot(index='Image', columns='Diagnosis', values='Label').reset_index()
data['Image'] = 'ID_' + data['Image']
data.head()

Diagnosis,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_000039fa0,0,0,0,0,0,0
1,ID_00005679d,0,0,0,0,0,0
2,ID_00008ce3c,0,0,0,0,0,0
3,ID_0000950d7,0,0,0,0,0,0
4,ID_0000aee4b,0,0,0,0,0,0


In [9]:
test = pd.read_csv('stage_2_sample_submission.csv')
test[['ID', 'Image', 'Diagnosis']] = test['ID'].str.split('_', expand=True)
test = test[['Image', 'Diagnosis', 'Label']]
test.drop_duplicates(inplace=True)
test = test.pivot(index='Image', columns='Diagnosis', values='Label').reset_index()
test['Image'] = 'ID_' + test['Image']
test.head()

Diagnosis,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_000000e27,0.5,0.5,0.5,0.5,0.5,0.5
1,ID_000009146,0.5,0.5,0.5,0.5,0.5,0.5
2,ID_00007b8cb,0.5,0.5,0.5,0.5,0.5,0.5
3,ID_000134952,0.5,0.5,0.5,0.5,0.5,0.5
4,ID_000176f2a,0.5,0.5,0.5,0.5,0.5,0.5


In [12]:
test[test['Image']=='ID_0aad404f7']

Diagnosis,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural


In [13]:
# Check if the ID exists in the 'Diagnosis' column
id_to_check = 'ID_0bc6645ff'
is_present = id_to_check in test['Image'].values
is_present

False