In [6]:
import matplotlib
matplotlib.use('TkAgg')  # or 'Qt5Agg' 

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from matplotlib import colors
from matplotlib.widgets import Slider
import tensorflow as tf

matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = ['Arial', 'Tahoma', 'DejaVu Sans', 'Verdana']
import sys


In [2]:
from medmnist import VesselMNIST3D


In [3]:



train_dataset = VesselMNIST3D(split='train', size=28, download=True)
trainx = []
trainy = []

test_dataset = VesselMNIST3D(split='test', size=28, download=True)
testx = []
testy = []

val_dataset = VesselMNIST3D(split='train', size=28, download=True)
valx = []
valy = []

for i in range(len(train_dataset)):
    trainx.append(train_dataset[i][0])
    trainy.append(train_dataset[i][1])

for i in range(len(test_dataset)):
    testx.append(test_dataset[i][0])
    testy.append(test_dataset[i][1])

for i in range(len(val_dataset)):
    valx.append(val_dataset[i][0])
    valy.append(val_dataset[i][1])

trainx_tensor = tf.convert_to_tensor(trainx, dtype=tf.float16)
trainy_tensor = tf.convert_to_tensor(trainy, dtype=tf.float16)
testx_tensor = tf.convert_to_tensor(testx, dtype=tf.float16)
testy_tensor = tf.convert_to_tensor(testy, dtype=tf.float16)
valx_tensor = tf.convert_to_tensor(valx, dtype=tf.float16)
valy_tensor = tf.convert_to_tensor(valy, dtype=tf.float16)
# float16 doesn't run any faster on the 4090s, but it cuts memory usage in half!





Dataset Stats

In [4]:
print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"\nInput shape: {trainx_tensor.shape}")
print(f"Label shape: {trainy_tensor.shape}")
print(f"\nSingle image shape: {trainx[0].shape}")
print(f"Data type: {trainx_tensor.dtype}")

Training samples: 1335
Test samples: 382
Validation samples: 1335

Input shape: (1335, 1, 28, 28, 28)
Label shape: (1335, 1)

Single image shape: (1, 28, 28, 28)
Data type: <dtype: 'float16'>


Distribution

In [5]:
# Training set
unique_train, counts_train = np.unique(trainy, return_counts=True)
print("\nTraining set class distribution:")
for label, count in zip(unique_train, counts_train):
    percentage = (count / len(trainy)) * 100
    print(f"  Class {label}: {count} samples ({percentage:.2f}%)")  # Remove [0]

# Test set
unique_test, counts_test = np.unique(testy, return_counts=True)
print("\nTest set class distribution:")
for label, count in zip(unique_test, counts_test):
    percentage = (count / len(testy)) * 100
    print(f"  Class {label}: {count} samples ({percentage:.2f}%)")  # Remove [0]

# Validation set
unique_val, counts_val = np.unique(valy, return_counts=True)
print("\nValidation set class distribution:")
for label, count in zip(unique_val, counts_val):
    percentage = (count / len(valy)) * 100
    print(f"  Class {label}: {count} samples ({percentage:.2f}%)")  # Remove [0]


Training set class distribution:
  Class 0: 1185 samples (88.76%)
  Class 1: 150 samples (11.24%)

Test set class distribution:
  Class 0: 339 samples (88.74%)
  Class 1: 43 samples (11.26%)

Validation set class distribution:
  Class 0: 1185 samples (88.76%)
  Class 1: 150 samples (11.24%)


Range analysis

In [8]:
sample_vol = np.array(trainx[0])
print(f"Min value: {sample_vol.min()}")
print(f"Max value: {sample_vol.max()}")
print(f"Mean value: {sample_vol.mean():.4f}")
print(f"Std deviation: {sample_vol.std():.4f}")

print(f"\nData range: [{sample_vol.min()}, {sample_vol.max()}]")



Min value: 0.0
Max value: 1.0
Mean value: 0.0374
Std deviation: 0.1896

Data range: [0.0, 1.0]


Memory Usage

In [9]:


trainx_size_mb = sys.getsizeof(trainx_tensor.numpy()) / (1024**2)
testx_size_mb = sys.getsizeof(testx_tensor.numpy()) / (1024**2)
valx_size_mb = sys.getsizeof(valx_tensor.numpy()) / (1024**2)


print(f"Training data: {trainx_size_mb:.2f} MB")
print(f"Test data: {testx_size_mb:.2f} MB")
print(f"Validation data: {valx_size_mb:.2f} MB")
print(f"Total: {trainx_size_mb + testx_size_mb + valx_size_mb:.2f} MB")


Training data: 55.90 MB
Test data: 15.99 MB
Validation data: 55.90 MB
Total: 127.79 MB
