# Kaggle download

This is a copy of the kaggle notebook we used to download the data.
It includes some image pre-processing (basically contrast editing) to account for the body area we're working with - abdominal trauma.

The code (especially the image processing for the abdominal window) is inspired by the winning solution of the kaggle competition: [link](https://www.kaggle.com/competitions/rsna-2023-abdominal-trauma-detection/overview)

The code is not runnable locally.

In [ ]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os

!pip install dicomsdl
import dicomsdl
import zipfile
from tqdm import tqdm
from kaggle.kaggle_download import __dataset__to_numpy_image, glob_sorted, load_volume, save_volume

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [ ]:
dicomsdl._dicomsdl.DataSet.to_numpy_image = __dataset__to_numpy_image

# Define your preprocessing functions here
IMAGE_FOLDER = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'
OUTPUT_FOLDER = "/kaggle/working/output"

patients = os.listdir(f"{IMAGE_FOLDER}")
sorted_patients = sorted(patients, key=lambda x: int(x))
print(f"amount of patients", {len(sorted_patients)})
# we download the data in batches
batch = range(0, 1000)

for i in tqdm(batch):
    patient = sorted_patients[i]
    studies = os.listdir(f'{IMAGE_FOLDER}/{patient}')
    #take only the first study
    study = studies[0]
    files = glob_sorted(f"{IMAGE_FOLDER}/{patient}/{study}/*")

    # shape = (number of images, height, width)
    volume = load_volume(files)
    save_volume(volume, patient, OUTPUT_FOLDER, (128, 128))

with zipfile.ZipFile(f"/kaggle/working/batch_{batch}.zip", 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(OUTPUT_FOLDER):
        for file in files:
            zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), OUTPUT_FOLDER))


In [ ]:
# to delete the download folder
"""
import shutil
import os

def delete_folder(folder_path):
    try:
        # Attempt to remove the folder and its contents
        shutil.rmtree(folder_path)
        print(f"Folder '{folder_path}' successfully deleted.")
    except Exception as e:
        print(f"Error occurred while deleting folder '{folder_path}': {e}")

# Example usage:
folder_to_delete = "/kaggle/working/output"

delete_folder(folder_to_delete)
"""