<a href="https://colab.research.google.com/github/mauro-nievoff/MultiCaRe_Dataset/blob/main/demos/create_your_own_case_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Create your Own Case Series Dataset 🚀 - No Code Needed 😉

## Step 1: Open this notebook in Colab

Note: You can use the 'Open in Colab' button for this.

## Step 2: Fill the form below with the details of your Case Series Dataset:

In [None]:
# @title Demographic Information

# @markdown Age Range:
min_age = 0 # @param {type: "slider", min: 0, max: 100}
max_age = 100 # @param {type: "slider", min: 0, max: 100}

# @markdown Gender:
female = True # @param {type:"boolean"}
male =  True # @param {type:"boolean"}
transgender = True # @param {type:"boolean"}

In [None]:
# @title Article Information

# @markdown Article Year Range:

min_year = 1990 # @param {type: "slider", min: 1990, max: 2023}
max_year = 2023 # @param {type: "slider", min: 1990, max: 2023}

# @markdown Article License:

license = 'All License Types' # @param ["All License Types", "Commercial Use Only"]

In [None]:
# @title Free Text Search

# @markdown Words included in Clinical Case (separate with commas):

case_words = '' # @param {type:"string"}
case_operator = 'Any' # @param ["All", "Any", "None"]

# @markdown Words included in Image Captions (separate with commas):

caption_words = '' # @param {type:"string"}
caption_operator = 'Any' # @param ["All", "Any", "None"]


In [None]:
# @title Metadata Query

# @markdown Keywords (separate with commas):
keywords = '' # @param {type:"string"}
kw_operator = 'Any' # @param ["All", "Any", "None"]

# @markdown MeSH Terms (separate with commas):
mesh_terms = '' # @param {type:"string"}
mesh_operator = 'Any' # @param ["All", "Any", "None"]

In [None]:
# @title Image Tags

# @markdown Image Type:
image_type = 'Any' # @param ["Any", 'mri', 'ct', 'ultrasound', 'echocardiogram', 'x_ray', 'cta', 'pet', 'ekg', 'angiography', 'h&e', 'methenamine_silver', 'immunofluorescence', 'immunoreactivity', 'immunostaining', 'ihc', 'gastroscopy', 'mra', 'colonoscopy', 'dsa', 'endoscopy', 'eeg', 'mammography', 'scintigraphy', 'fundus_photograph', 'oct', 'cystoscopy', 'mrcp', 'broncoscopy', 'opg', 'venogram', 'egd', 'emg', 'myelogram', 'autofluorescence', 'laryngoscopy', 'arthroscopy', 'ercp', 'spect', 'tractography', 'congo_red', 'ziehl_neelsen', 'masson_trichrome', 'culture', 'giemsa', 'acid_fast', 'pas', 'ki67', 'fish', 'papanicolaou', 'nuclear_staining', 'gram', 'red_stain', 'van_gieson', 'cytoplasmatic_staining', 'alcian_blue', 'green_birefringence', 'blue_stain', 'methylene_blue', 'cotton_blue', 'ck_5/6']

# @markdown Anatomical Terms:
abdomen = False # @param {type:"boolean"}
bone = False # @param {type:"boolean"}
bowel = False # @param {type:"boolean"}
brain = False # @param {type:"boolean"}
breast = False # @param {type:"boolean"}
chest = False # @param {type:"boolean"}
colorectal = False # @param {type:"boolean"}
esophagous = False # @param {type:"boolean"}
eye = False # @param {type:"boolean"}
gi = False # @param {type:"boolean"}
head = False # @param {type:"boolean"}
heart = False # @param {type:"boolean"}
kidney = False # @param {type:"boolean"}
liver = False # @param {type:"boolean"}
lung = False # @param {type:"boolean"}
lymph_node = False # @param {type:"boolean"}
mediastinum = False # @param {type:"boolean"}
mouth = False # @param {type:"boolean"}
ovary = False # @param {type:"boolean"}
pancreas = False # @param {type:"boolean"}
pelvis = False # @param {type:"boolean"}
peritoneum = False # @param {type:"boolean"}
retroperitoneum = False # @param {type:"boolean"}
skin = False # @param {type:"boolean"}
skull = False # @param {type:"boolean"}
spine = False # @param {type:"boolean"}
stomach = False # @param {type:"boolean"}
uterus = False # @param {type:"boolean"}

## Step 3: Click Runtime > Run all in the colab toolbar. Do not modify any cell.

### The dataset will be created & downloaded in around 10 mins. If you get an error, rerun all the cells.

In [None]:
from datetime import datetime
import shutil
import os
from google.colab import files

In [None]:
!git clone https://github.com/mauro-nievoff/MultiCaRe_Dataset

from MultiCaRe_Dataset.multicare import MedicalDatasetCreator

Cloning into 'MultiCaRe_Dataset'...
remote: Enumerating objects: 129, done.[K
remote: Counting objects: 100% (42/42), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 129 (delta 25), reused 9 (delta 9), pack-reused 87[K
Receiving objects: 100% (129/129), 1.35 MiB | 4.20 MiB/s, done.
Resolving deltas: 100% (65/65), done.


In [None]:
mdc = MedicalDatasetCreator(directory = 'medical_datasets')

Downloading the MultiCaRe Dataset from Zenodo. This may take 5 to 10 minutes.
Importing and pre-processing the main files.
Done!


In [None]:
filters = []

if min_age != 0:
  filters.append({'field': 'min_age', 'string_list': [str(min_age)]})

if max_age != 100:
  filters.append({'field': 'max_age', 'string_list': [str(max_age)]})

gender_list = []
if female:
  gender_list.append('Female')
if male:
  gender_list.append('Male')
if transgender:
  gender_list.append('Transgender')

if gender_list:
  filters.append({'field': 'gender', 'string_list': gender_list})

if min_year != 1990:
  filters.append({'field': 'min_year', 'string_list': [str(min_year)]})

if max_year != 2023:
  filters.append({'field': 'max_year', 'string_list': [str(max_year)]})

if license == 'Commercial Use Only':
  filters.append({'field': 'license', 'string_list':  ['CC0', 'CC BY', 'CC BY-SA', 'CC BY-ND']})

if case_words:
  case_word_list = case_words.split(',')
  case_word_list = [string.lower().strip() for string in case_word_list]
  filters.append({'field': 'case_strings', 'string_list': case_word_list, 'operator': case_operator.lower()})

if caption_words:
  caption_word_list = caption_words.split(',')
  caption_word_list = [string.lower().strip() for string in caption_word_list]
  filters.append({'field': 'caption', 'string_list': caption_word_list, 'operator': caption_operator.lower()})

if keywords:
  keyword_list = keywords.split(',')
  keyword_list = [string.lower().strip() for string in keyword_list]
  filters.append({'field': 'keywords', 'string_list': keyword_list, 'operator': kw_operator.lower(), 'match_type': 'partial_match'})

if mesh_terms:
  mesh_term_list = mesh_terms.split(',')
  mesh_term_list = [string.lower().strip() for string in mesh_term_list]
  filters.append({'field': 'mesh_terms', 'string_list': mesh_term_list, 'operator': mesh_operator.lower(), 'match_type': 'partial_match'})

if image_type != 'Any':
  filters.append({'field': 'normalized_extractions', 'string_list': [image_type]})

normalized_extractions = []

if abdomen:
  normalized_extractions.append('abdomen')

if bone:
  normalized_extractions.append('bone')

if bowel:
  normalized_extractions.append('bowel')

if brain:
  normalized_extractions.append('brain')

if breast:
  normalized_extractions.append('breast')

if chest:
  normalized_extractions.append('chest')

if colorectal:
  normalized_extractions.append('colorectal')

if esophagous:
  normalized_extractions.append('esophagous')

if eye:
  normalized_extractions.append('eye')

if gi:
  normalized_extractions.append('gi')

if head:
  normalized_extractions.append('head')

if heart:
  normalized_extractions.append('heart')

if kidney:
  normalized_extractions.append('kidney')

if liver:
  normalized_extractions.append('liver')

if lung:
  normalized_extractions.append('lung')

if lymph_node:
  normalized_extractions.append('lymph_node')

if mediastinum:
  normalized_extractions.append('mediastinum')

if mouth:
  normalized_extractions.append('mouth')

if ovary:
  normalized_extractions.append('ovary')

if pancreas:
  normalized_extractions.append('pancreas')

if pelvis:
  normalized_extractions.append('pelvis')

if peritoneum:
  normalized_extractions.append('peritoneum')

if retroperitoneum:
  normalized_extractions.append('retroperitoneum')

if skin:
  normalized_extractions.append('skin')

if skull:
  normalized_extractions.append('skull')

if spine:
  normalized_extractions.append('spine')

if stomach:
  normalized_extractions.append('stomach')

if uterus:
  normalized_extractions.append('uterus')

if normalized_extractions:
  filters.append({'field': 'normalized_extractions', 'string_list': normalized_extractions, 'operator': 'any'})


In [None]:
date_string = datetime.now().strftime('%Y%m%d_%H%M%S')

In [None]:
dataset_name = f"case_series_dataset_{date_string}"

In [None]:
mdc.create_dataset(dataset_name = dataset_name, filter_list = filters, dataset_type = 'case_series')

The case_series_dataset_20231216_212513 was successfully created!


In [None]:
shutil.make_archive(dataset_name, 'zip', f'/content/medical_datasets/{dataset_name}')

'/content/case_series_dataset_20231216_212513.zip'

In [None]:
files.download('/content/' + dataset_name + '.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## And that's it! Enjoy your new dataset!