<a href="https://colab.research.google.com/github/arogya-gyawali/brainscan_AI/blob/main/notebooks/00_convert_mat_to_jpg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧠 Convert MATLAB .mat Tumor MRI Files to .JPG

This notebook:

    Loads .mat brain MRI files from the Figshare tumor dataset

    Converts each MRI to a normalized .jpg file

    Extracts tumor label and bounding box

    Saves the info into a metadata.csv

    Output goes to :
    /content/drive/MyDrive/BrainScanAI/tumor_converted/


In [34]:
#mounting google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [35]:
#Importing necessary libraries
import h5py
import os
import numpy as np
import pandas as pd
from PIL import Image

In [36]:
#Defining the Input/Output paths for easier access
input_folder = '/content/drive/MyDrive/BrainScanAI/figshare_raw_matlab'
output_folder = '/content/drive/MyDrive/BrainScanAI/tumor_converted'

#Making sure that the output folder is only created if it doesnt already exists
os.makedirs(output_folder, exist_ok=True)
print(f"✅ Output folder created at: {output_folder}")

✅ Output folder created at: /content/drive/MyDrive/BrainScanAI/tumor_converted


In [37]:
#This line filters only .mat files and ensure we skip any hidden files etc.
file_list = [f for f in os.listdir(input_folder) if f.endswith('.mat')]
print(f"Found {len(file_list)} v7.3 .mat files to convert")


Found 3064 v7.3 .mat files to convert


In [38]:
#This code block loops through files, convert to JPG. To store metadata for CSV
metadata = []

file_list = sorted(os.listdir(input_folder))

for i, file_name in enumerate(file_list):
    file_path = os.path.join(input_folder, file_name)

    try:
        with h5py.File(file_path, 'r') as f:
            cjdata = f['cjdata']

            # --- Image Extraction and Normalization ---
            image_data = np.array(cjdata['image'])
            image_data = image_data.astype(np.float32)
            norm_img = 255 * (image_data - image_data.min()) / (image_data.max() - image_data.min())
            norm_img = norm_img.astype(np.uint8)

            # --- Save Normalized Image ---
            base_name = os.path.splitext(file_name)[0]
            output_path = os.path.join(output_folder, base_name + ".jpg")
            Image.fromarray(norm_img).save(output_path)

            # --- Extract Label and Optional Fields ---
            label = int(np.array(cjdata['label'])[0][0])  # 1: meningioma, 2: glioma, 3: pituitary

            # Extract bounding box (border)
            tumor_border = np.array(cjdata['tumorBorder']).tolist()

            # Save metadata
            metadata.append({
                "file": base_name + ".jpg",
                "label": label,
                "tumorBorder": tumor_border
            })

            if i % 100 == 0:
                print(f"✅ Converted {i}/{len(file_list)}")

    except Exception as e:
        print(f"❌ Error in {file_name}: {e}")

# --- Save Metadata to CSV ---
metadata_df = pd.DataFrame(metadata)
csv_path = os.path.join(output_folder, "metadata.csv")
metadata_df.to_csv(csv_path, index=False)

print("🏁 All conversions complete. Metadata saved.")


✅ Converted 0/3064
✅ Converted 100/3064
✅ Converted 200/3064
✅ Converted 300/3064
✅ Converted 400/3064
✅ Converted 500/3064
✅ Converted 600/3064
✅ Converted 700/3064
✅ Converted 800/3064
✅ Converted 900/3064
✅ Converted 1000/3064
✅ Converted 1100/3064
✅ Converted 1200/3064
✅ Converted 1300/3064
✅ Converted 1400/3064
✅ Converted 1500/3064
✅ Converted 1600/3064
✅ Converted 1700/3064
✅ Converted 1800/3064
✅ Converted 1900/3064
✅ Converted 2000/3064
✅ Converted 2100/3064
✅ Converted 2200/3064
✅ Converted 2300/3064
✅ Converted 2400/3064
✅ Converted 2500/3064
✅ Converted 2600/3064
✅ Converted 2700/3064
✅ Converted 2800/3064
✅ Converted 2900/3064
✅ Converted 3000/3064
🏁 All conversions complete. Metadata saved.
