<a href="https://www.kaggle.com/code/rimzakhama/rsna-pytorch-baseline-inference?scriptVersionId=143880360" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
from IPython.display import display_html
def restartkernel() :
    display_html("",raw=True)
restartkernel()


# Install timm package from local directory
!pip install '/kaggle/input/timm-pack/timm_package/timm-0.6.12-py3-none-any.whl'


In [None]:
# The medical images provided are in (two types of) Dicom format, and in order to read these 
# we will need to install and import the required dependent libraries:

'''
## If Online : install required packages for dcm processing 
!pip install -qU pylibjpeg pylibjpeg-openjpeg pylibjpeg-libjpeg pydicom python-gdcm dicomsdl

#import all the required dicom packages
import gdcm
import pydicom
import pylibjpeg
import dicomsdl
'''

!pip install pylibjpeg --no-index --find-links=file:///kaggle/input/read-dicom-set/dicom_read
!pip install pylibjpeg-openjpeg --no-index --find-links=file:///kaggle/input/read-dicom-set/dicom_read
!pip install pylibjpeg-libjpeg --no-index --find-links=file:///kaggle/input/read-dicom-set/dicom_read
!pip install pydicom --no-index --find-links=file:///kaggle/input/read-dicom-set/dicom_read
!pip install python-gdcm --no-index --find-links=file:///kaggle/input/read-dicom-set/dicom_read
!pip install dicomsdl --no-index --find-links=file:///kaggle/input/read-dicom-set/dicom_read

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import torch
import torch.nn as nn
from torchvision import transforms

import os

#import pydicom as dicom
import pydicom
import cv2

import timm
import torch.optim as optim
from sklearn import model_selection
from sklearn.metrics import f1_score

from tqdm.autonotebook import tqdm

import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
class Config :
    BATCH_SIZE = 2
    MODEL_PATH = '/kaggle/input/efficientb4-model/model.bin'
    NUM_CLASSES = 1

In [None]:

class Dataset:
    def __init__(self, df, transform=None):
        self.df = df.copy()
        self.transform = transform
     
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        patient_id = self.df.loc[idx, 'patient_id']
        image_id = self.df.loc[idx, 'image_id']
        prediction_id = self.df.loc[idx, 'prediction_id']
        
        # Get and preprocess images
        # This path should be changed
        #png_path = '/kaggle/input/rsna-png-images-same-format-as-original/output/rsna_pngs/train_images'
        png_path = '/kaggle/working/output/test_images' # test images path
        # Image path
        image_png_path =  os.path.join(png_path, patient_id.astype(str), image_id.astype(str)+'.png')

        image = mpimg.imread(image_png_path)
        
        image = cv2.resize(image, (512,512))
        
        # Apply transformers on images
        if self.transform:
            image = self.transform(image)
            
     
        
        # Convert to tensors
        image = torch.tensor(image, dtype=torch.long)
        
        
        
        #{ 'image' : torch.tensor(image, dtype=torch.long),
         #        'prediction_id' : prediction_id
          #         }
        
        return image, prediction_id

In [None]:
class BreastCancerModel(nn.Module):
    def __init__(self, Config):
        super().__init__()
        self.efficientnet = timm.create_model('efficientnet_b4', pretrained=False,
                                             in_chans=1)
        in_features = self.efficientnet.classifier.in_features
        self.efficientnet.classifier = nn.Linear(in_features, Config.NUM_CLASSES)   
        
    def forward(self, image):
        output = self.efficientnet(image)
        
        return output


In [None]:
''' Probabilistic F1 score'''
def pfbeta(labels, predictions, beta):
    y_true_count = 0
    ctp = 0
    cfp = 0

    for idx in range(len(labels)):
        prediction = min(max(predictions[idx], 0), 1)
        if (labels[idx]):
            y_true_count += 1
            ctp += prediction
        else:
            cfp += prediction

    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0

In [None]:
# Preprocess test images (convert to png format)

test_dcm_images_path = '/kaggle/input/rsna-breast-cancer-detection/test_images'
patient = '10008'
size = 512

patient_input_path = test_dcm_images_path + '/' + patient

# Convert test images to png
#output_path =  '/kaggle/working/output/rsna_pngs/test_images'
output_path =  '/kaggle/working/output/test_images'

# Create directory if it does not exists
if not os.path.isdir(output_path+'/'+patient):
    os.makedirs(output_path+'/'+patient)

for image in os.listdir(patient_input_path):
    image_input_path = patient_input_path + '/'+ image
    dicom_image = pydicom.dcmread(image_input_path)
    
    image_array = dicom_image.pixel_array
    scaled_img = (np.maximum(image_array,0) / image_array.max()) * 255.0
    if dicom_image.PhotometricInterpretation == "MONOCHROME1":
        scaled_img = 1 - scaled_img
    img = scaled_img.astype(np.uint8)
    resized_image = cv2.resize(img, (size, size))
    
    
    cv2.imwrite(output_path + '/' + patient + '/' + image.replace('dcm', 'png'),resized_image)

    # Plot png image
    img = mpimg.imread(output_path + '/' + patient + '/' + image.replace('dcm', 'png'))
    imgplot = plt.imshow(img)
    plt.show()

    
#The inference will be applied on dcm or pngs images??

# This code should be, first, applied on test images, then submitted
# to be tested on kaggle test dataset.

# But the question is : what is the format of the test images in submission?

In [None]:
dfx = pd.read_csv('/kaggle/input/rsna-breast-cancer-detection/test.csv')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BreastCancerModel(Config=Config)

model.to(device)

model.load_state_dict(torch.load(Config.MODEL_PATH))
model.eval()

transform = transforms.Compose([
    transforms.ToTensor()
    ])

dataset = Dataset(dfx, transform=transform)
data_loader = torch.utils.data.DataLoader(dataset, batch_size= Config.BATCH_SIZE,
                                             num_workers=2)

sample = pd.DataFrame(columns = ['prediction_id', 'cancer'])
with torch.no_grad():
    for batch_idx, (images, prediction_id) in enumerate(data_loader):
        images = images.to(device, dtype=torch.float) 
        outputs = model(images).squeeze()
        predictions = outputs.sigmoid().cpu().numpy()
        
        new_dataframe = pd.DataFrame({'prediction_id' : prediction_id , 'cancer' : predictions })
        sample = pd.concat([sample, new_dataframe])
        
# Put the results in the format of submission file
sample = sample.groupby('prediction_id')['cancer'].mean().reset_index()
THRES = 0.84
sample['cancer'] = (sample.cancer > THRES).astype(float)

# Save sample DataFrame as csv
sample.to_csv('submission.csv', index=False)
!head submission.csv

In [None]:
'''
How can we improve our results:
- Create folds and different models.
- improve the model architecture
- change the model
- train the pretrained model??
- change the transform, do augmentations on images
- increase number of epochs
- Use features fro csv file to improve the accuracy
- change the optimizer and its parameters

'''
