# DICOM Histograms

## Global variables & Immports

In [None]:
# Path to DICOM directory
DICOM_PATH = "/home/pyuser/data/Paradise_DICOMs"

# Path to Histogram directory
HISTOGRAM_PATH = "/home/pyuser/data/Paradise_Histograms"

import os
import pydicom
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import warnings

# Suppress the specific pydicom character encoding warning
warnings.filterwarnings("ignore", category=UserWarning, module="pydicom.charset")

## Graphs creation

### Per DICOM file

In [None]:
from tqdm import tqdm

def create_dicom_histograms():
    """
    Create histograms for all DICOM files in the DICOM_PATH directory
    and save them to the HISTOGRAM_PATH directory.
    """
    # Create histogram directory if it doesn't exist
    Path(HISTOGRAM_PATH).mkdir(parents=True, exist_ok=True)
    
    # Get all DICOM files
    dicom_files = []
    for root, dirs, files in os.walk(DICOM_PATH):
        for file in files:
            if file.lower().endswith(('.dcm', '.dicom')):
                dicom_files.append(os.path.join(root, file))
    
    print(f"Found {len(dicom_files)} DICOM files")
    
    for dicom_file in tqdm(dicom_files, desc="Creating histograms"):
        try:
            # Read DICOM file
            ds = pydicom.dcmread(dicom_file)
            
            # Get pixel data
            pixel_array = ds.pixel_array
            
            # Get pixel value statistics
            min_pixel = np.min(pixel_array)
            max_pixel = np.max(pixel_array)
            
            # Get bits allocated from DICOM header
            bits_allocated = getattr(ds, 'BitsAllocated', None)
            if bits_allocated is None:
                # Fallback: estimate from pixel data type
                if pixel_array.dtype == np.uint8:
                    bits_allocated = 8
                elif pixel_array.dtype == np.uint16:
                    bits_allocated = 16
                else:
                    bits_allocated = 16  # Default fallback
            
            # Get photometric interpretation (MONOCHROME1 or MONOCHROME2)
            photometric = getattr(ds, 'PhotometricInterpretation', 'Unknown')
            if 'MONOCHROME' in photometric:
                if '1' in photometric:
                    monochrome_type = 'MONOCHROME1'
                elif '2' in photometric:
                    monochrome_type = 'MONOCHROME2'
                else:
                    monochrome_type = photometric
            else:
                monochrome_type = photometric
            
            # Calculate actual bits used (based on maximum pixel value)
            if max_pixel > 0:
                bits_used = int(np.ceil(np.log2(max_pixel + 1)))
            else:
                bits_used = 1
            
            # Determine color and bins based on bits allocated
            if bits_allocated <= 8:
                color = 'blue'
                bins = min(256, max_pixel - min_pixel + 1)
            elif bits_allocated <= 12:
                color = 'green'
                bins = min(4096, max_pixel - min_pixel + 1)
            elif bits_allocated <= 14:
                color = 'red'
                bins = min(16384, max_pixel - min_pixel + 1)
            else:
                color = 'purple'
                bins = min(65536, max_pixel - min_pixel + 1)
            
            # Ensure we have at least 50 bins for good visualization
            bins = max(bins, 50)
            
            # Create histogram
            plt.figure(figsize=(10, 6))
            n, bins_edges, patches = plt.hist(pixel_array.flatten(), bins=bins, alpha=0.7, edgecolor='black')
            
            # Apply color to the histogram bars
            for patch in patches:
                patch.set_facecolor(color)
            
            plt.title(f'Pixel Value Distribution - {os.path.basename(dicom_file)}')
            plt.xlabel('Pixel Value')
            plt.ylabel('Frequency')
            plt.grid(True, alpha=0.3)
            
            # Add comprehensive information text box with proper newlines
            info_text = f'Bits Allocated: {bits_allocated}\nBits Used: {bits_used}\n{monochrome_type}\nMin Pixel Value: {min_pixel}\nMax Pixel Value: {max_pixel}'
            plt.text(0.02, 0.98, info_text, 
                    transform=plt.gca().transAxes, verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8),
                    fontsize=10)
            
            # Save histogram
            base_name = os.path.splitext(os.path.basename(dicom_file))[0]
            output_path = os.path.join(HISTOGRAM_PATH, f"{base_name}_histogram.png")
            plt.savefig(output_path, dpi=300, bbox_inches='tight')
            plt.close()
            
        except Exception as e:
            print(f"Error processing {dicom_file}: {str(e)}")
            continue

# Execute the function
create_dicom_histograms()


### Global histogram