**<h1 align="center">DICOM Metadata Exploration</h1>**

## Global Variables

### Project Specific Variables

In [None]:
# CSV Files
CSV_FOLDER = "../../data/Paradise_CSV/"
CSV_METADATA_FILE = "DICOM_Metadata.csv"
CSV_ARCHIMED_FILE = 'ArchiMed_Data.csv'
CSV_LABELED_DATA_FILE = 'Labeled_Data_RAW.csv'
CSV_SEPARATOR = ";"  # Specify the CSV separator, e.g., ',' or '\t'
IMPORT_COLUMNS = []  # If empty, import all columns
CHUNK_SIZE = 50000  # Number of rows per chunk

SAMPLE_SIZE = 25

### Colors

In [None]:
# ANSI escape codes for colored output
ANSI = {
    'R' : '\033[91m',  # Red
    'G' : '\033[92m',  # Green
    'B' : '\033[94m',  # Blue
    'Y' : '\033[93m',  # Yellow
    'W' : '\033[0m',  # White
}

## Imports

In [None]:
import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype

## CSV Import

In [None]:
try:
    # Import Metadata CSV
    df_metadata = pd.read_csv(
        CSV_FOLDER + CSV_METADATA_FILE,
        sep=CSV_SEPARATOR,
        usecols=IMPORT_COLUMNS if IMPORT_COLUMNS else None,
        chunksize=CHUNK_SIZE
    )
    df_metadata = pd.concat(df_metadata, ignore_index=True)
    print(f"{ANSI['G']}Successfully imported{ANSI['W']} {CSV_METADATA_FILE}")
    
    # Import Archimed CSV
    df_archimed = pd.read_csv(
        CSV_FOLDER + CSV_ARCHIMED_FILE,
        sep=CSV_SEPARATOR,
        usecols=IMPORT_COLUMNS if IMPORT_COLUMNS else None,
        chunksize=CHUNK_SIZE
    )
    df_archimed = pd.concat(df_archimed, ignore_index=True)
    print(f"{ANSI['G']}Successfully imported{ANSI['W']} {CSV_ARCHIMED_FILE}")
    
    # Import Metadata CSV
    df_labels = pd.read_csv(
        CSV_FOLDER + CSV_LABELED_DATA_FILE,
        sep=CSV_SEPARATOR,
        usecols=IMPORT_COLUMNS if IMPORT_COLUMNS else None,
        chunksize=CHUNK_SIZE
    )
    df_labels = pd.concat(df_labels, ignore_index=True)
    print(f"{ANSI['G']}Successfully imported{ANSI['W']} {CSV_LABELED_DATA_FILE}")

except Exception as e:
    print(f"{ANSI['R']}Error importing CSV files: {str(e)}{ANSI['W']}")

## Fix Column Types

### Function

In [None]:
def smart_numeric_cast(df: pd.DataFrame) -> pd.DataFrame:
    """
    Return a copy of *df* where each column is cast to the
    narrowest numeric dtype it can safely hold.
    Order tried: Int64 (nullable integers) → float64 → original.
    """
    out = df.copy()

    for col in out.columns:
        s = out[col]

        # Skip non‑object/non‑string columns that are already numeric
        if is_numeric_dtype(s):
            continue

        # 1) Try nullable integers
        try:
            out[col] = pd.to_numeric(s, errors="raise").astype("Int64")
            continue             # success → next column
        except (ValueError, TypeError):
            pass

        # 2) Try floats
        try:
            out[col] = pd.to_numeric(s, errors="raise").astype("float64")
            continue
        except (ValueError, TypeError):
            pass

        # 3) Leave as is (mixed strings, dates, etc.)
        # nothing to do
    return out

### Execute

In [None]:
df_metadata = smart_numeric_cast(df_metadata)