In [None]:
# https://www.kaggle.com/code/deannahedges/mammography-submission

In [3]:
!pip install /kaggle/input/dicomsdl-offline-installer/dicomsdl-0.109.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl

Collecting dicomsdl
  Downloading dicomsdl-0.109.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: dicomsdl
Successfully installed dicomsdl-0.109.1
[0m

In [11]:
import pydicom
import cv2
import os
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
from pathlib import Path
from pydicom.pixel_data_handlers.util import apply_voi_lut
import dicomsdl
import sys
import time
import numpy as np
import pandas as pd
import tensorflow as tf

RESIZE_TO = (256, 256)

In [5]:
!mkdir -p /kaggle/working/test_images_processed_cv2_dicomsdl_{RESIZE_TO[0]}/

# https://www.kaggle.com/code/tanlikesmath/brain-tumor-radiogenomic-classification-eda/notebook
def dicom_file_to_ary(path):
    dcm_file = dicomsdl.open(str(path))
    data = dcm_file.pixelData()

    data = (data - data.min()) / (data.max() - data.min())

    if dcm_file.getPixelDataInfo()['PhotometricInterpretation'] == "MONOCHROME1":
        data = 1 - data

    data = cv2.resize(data, RESIZE_TO)
    data = (data * 255).astype(np.uint8)
    return data


image_directories = []
for img in Path('/kaggle/input/rsna-breast-cancer-detection/test_images/10008/').iterdir():
    image_directories.append(img)
print(len(image_directories))

def process_directory(directory_path):
    
    processed_ary = dicom_file_to_ary(directory_path)
        
    cv2.imwrite(
        f'test_images_processed_cv2_dicomsdl_{RESIZE_TO[0]}/{directory_path.stem}.png',
        processed_ary
    )
test_dir = Path("/kaggle/working/test_images_processed_cv2_dicomsdl_256/")

import multiprocessing as mp

with mp.Pool(64) as p:
    p.map(process_directory, image_directories)


4


In [7]:
image_count = len(list(test_dir.glob('*.png')))
print(image_count)

4


In [106]:
df = pd.read_csv("/kaggle/input/rsna-breast-cancer-detection/test.csv")
df['cancer']=0
df.head()

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,implant,machine_id,prediction_id,cancer
0,2,10008,736471439,L,MLO,81,0,21,10008_L,0
1,2,10008,1591370361,L,CC,81,0,21,10008_L,0
2,2,10008,68070693,R,MLO,81,0,21,10008_R,0
3,2,10008,361203119,R,CC,81,0,21,10008_R,0


In [12]:
model = tf.keras.models.load_model('/kaggle/input/mammography-model-v1/model.h5')

In [107]:
def predict_cancer(img_path):
    img = tf.keras.utils.load_img(img_path,target_size=RESIZE_TO)
    img_array = tf.keras.utils.img_to_array(img)
    img_array = tf.expand_dims(img_array,0)
    predictions = model.predict(img_array)
    confidence = np.where(predictions > 0.5,1,0).squeeze().item()
    print(str(predictions).strip('[]'))
    return float(str(predictions).strip('[]'))

In [108]:
for img_dir in test_dir.iterdir():
    print(str(img_dir.stem))
    df.loc[(df.image_id==int(img_dir.stem)),'cancer'] = predict_cancer(img_dir)

68070693
0.00645487
361203119
0.00845106
1591370361
0.00331484
736471439
0.00353256


In [111]:
df = df[['prediction_id','cancer']]

final = df
df.head()

Unnamed: 0,prediction_id,cancer
0,10008_L,0.003533
1,10008_L,0.003315
2,10008_R,0.006455
3,10008_R,0.008451


In [112]:
final = final.groupby('prediction_id').max().reset_index()
final.head()

Unnamed: 0,prediction_id,cancer
0,10008_L,0.003533
1,10008_R,0.008451


In [113]:
final.to_csv('submission.csv',index=False)