In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import pydicom
from glob import glob
!pip install dicomsdl
import dicomsdl
import matplotlib.pyplot as plt
import cv2
import torch
from torch import nn
import torch.nn.functional as F
import zipfile
from tqdm import tqdm

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [ ]:
def visualize_image(orig_image, image):
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    axes[0].imshow(orig_image, cmap='gray')
    axes[0].set_title('Original DICOM Image')
    axes[1].imshow(image, cmap='gray')
    axes[1].set_title('Processed DICOM Image')
    plt.show()



def __dataset__to_numpy_image(self, index=0):
    info = self.getPixelDataInfo()
    dtype = info['dtype']
    if info['SamplesPerPixel'] != 1:
        raise RuntimeError('SamplesPerPixel != 1')
    else:
        shape = [info['Rows'], info['Cols']]
    outarr = np.empty(shape, dtype=dtype)
    self.copyFrameData(index, outarr)
    return outarr
dicomsdl._dicomsdl.DataSet.to_numpy_image = __dataset__to_numpy_image



def glob_sorted(path):
    return sorted(glob(path), key=lambda x: int(x.split('/')[-1].split('.')[0]))

def get_rescaled_image(dcm, img):
    resI, resS = dcm.RescaleIntercept, dcm.RescaleSlope
    img = resS * img + resI
    return img

def get_windowed_image(img, WL=50, WW=400):
    upper, lower = WL+WW//2, WL-WW//2
    X = np.clip(img.copy(), lower, upper)
    X = X - np.min(X)
    X = X / np.max(X)
    X = (X*255.0).astype('uint8')

    return X


def load_volume(dcms):
    volume = []
    pos_zs = []
    k = 1

    for dcm_path in dcms:
        pydcm = pydicom.dcmread(dcm_path)

        pos_z = pydcm[(0x20, 0x32)].value[-1]
        pos_zs.append(pos_z)

        dcm = dicomsdl.open(dcm_path)

        orig_image = dcm.to_numpy_image()

        image = get_rescaled_image(dcm, orig_image)

        image = get_windowed_image(image)

        if np.min(image)<0:
            image = image + np.abs(np.min(image))

        image = image / image.max()
        image = (image * 255).astype(np.uint8)


        volume.append(image)

        # Visualize the image
        #if k == 1:
        #    k +=1
        #    print(image.shape)
        #    visualize_image(orig_image, image)


    return np.stack(volume)



def process_volume(volume, patient):
    volume = np.stack([cv2.resize(x, (128, 128)) for x in volume])

    #to visualize the squished image
    #print(volume.shape)
    #image = volume[0]
    #visualize_image(image, image)

    #we don't need this, this was some pre-processing done for pytorch input, we just want to download the images
    '''
    volumes = []
    cuts = [(x, x+32) for x in np.arange(0, volume.shape[0], 32)[:-1]]
    
    if cuts:
        for cut in cuts:
            volumes.append(volume[cut[0]:cut[1]])
        volumes = np.stack(volumes)
    else:
        volumes = np.zeros((1, 32, 128, 128), dtype=np.uint8)
        volumes[0, :len(volume)] = volume
    
    if cuts:
        last_volume = np.zeros((1, 32, 128, 128), dtype=np.uint8)
        last_volume[0, :volume[cuts[-1][1]:].shape[0]] =  volume[cuts[-1][1]:]
        volumes = np.concatenate([volumes, last_volume])
    
    volumes = torch.as_tensor(volumes).float()
    
    return volumes
    '''

    # Create a folder to save the images if it doesn't exist
    output_folder = f"/kaggle/working/output/{patient}"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Save each image in the numpy array
    for i in range(len(volume)):
        # Generate the filename for the image
        filename = os.path.join(output_folder, f"{i}.png")

        # Plot and save the image using matplotlib
        plt.imsave(filename, volume[i], cmap='gray')  # Assuming grayscale images, adjust cmap as needed



def get_volume_data(grd, step=96, stride=1, stride_cutoff=200):
    volumes = []

    if len(grd)>stride_cutoff:
        grd = grd[::stride]

    take_last = False
    if not str(len(grd)/step).endswith('.0'):
        take_last = True

    started = False
    for i in range(len(grd)//step):
        rows = grd[i*step:(i+1)*step]

        if len(rows)!=step:
            rows = pd.DataFrame([rows.iloc[int(x*len(rows))] for x in np.arange(0, 1, 1/step)])

        volumes.append(rows)

        started = True

    if not started:
        rows = grd
        rows = pd.DataFrame([rows.iloc[int(x*len(rows))] for x in np.arange(0, 1, 1/step)])
        volumes.append(rows)

    if take_last:
        rows = grd[-step:]
        if len(rows)==step:
            volumes.append(rows)

    return volumes


# Define your preprocessing functions here
IMAGE_FOLDER = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'

patients = os.listdir(f"{IMAGE_FOLDER}")
sorted_patients = sorted(patients, key=lambda x: int(x))
arange = range(3147, 3148)

for i in tqdm(arange):
    patient = sorted_patients[i]
    studies = os.listdir(f'{IMAGE_FOLDER}/{patient}')
    #take only the first study
    study = studies[0]
    files = glob_sorted(f"{IMAGE_FOLDER}/{patient}/{study}/*")

    # shape = (number of images, height, width)
    volume = load_volume(files)

    volumes = process_volume(volume, patient)

output_folder = f"/kaggle/working/output"
with zipfile.ZipFile(f"/kaggle/working/batch_{arange}.zip", 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(output_folder):
        for file in files:
            zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), output_folder))


In [ ]:
import shutil
import os

def delete_folder(folder_path):
    try:
        # Attempt to remove the folder and its contents
        shutil.rmtree(folder_path)
        print(f"Folder '{folder_path}' successfully deleted.")
    except Exception as e:
        print(f"Error occurred while deleting folder '{folder_path}': {e}")

# Example usage:
folder_to_delete = "/kaggle/working/output"

delete_folder(folder_to_delete)