In [51]:
import scipy.io
import os
import sys
import numpy as np
from pathlib import Path

# Get current working directory
cwd = Path(os.getcwd())
# Set project root by going one level up
PROJECT_ROOT = cwd.parents[1]  # Adjust this as necessary

print("PROJECT_ROOT:", PROJECT_ROOT)


PROJECT_ROOT: /home/fesih/Desktop/ubuntu_projects/GAST2


In [None]:
with h5py.File(gt_path, "r") as f:
    print("\nKeys in Houston13_7gt.mat:", list(f.keys()))
    # Let's assume the main data is under the first key
    main_key = list(f.keys())[0]
    cube = f[main_key][()]
    print(f"Cube shape: {cube.shape}")
    print(f"Cube dtype: {cube.dtype}")
    print("Min value:", np.min(cube))
    print("Max value:", np.max(cube))
    print("Mean value:", np.mean(cube))
    print("Std value:", np.std(cube))


In [53]:
# Path to the Houston13 .mat file
mat_path = PROJECT_ROOT / "src/Dataset/Houston13/Houston13.mat"
gt_path = PROJECT_ROOT / "src/Dataset/Houston13/Houston13_7gt.mat"

import h5py
# Load the .mat file

with h5py.File(mat_path, "r") as f:
    print("Keys in 2013_IEEE_GRSS_DF_Contest_CASI_349_1905_144.mat:", list(f.keys()))
    # Let's assume the main data is under the first key
    main_key = list(f.keys())[0]
    cube = f[main_key][()]
    print(f"Cube shape: {cube.shape}")
    print(f"Cube dtype: {cube.dtype}")
    print("Min value:", np.min(cube))
    print("Max value:", np.max(cube))
    print("Mean value:", np.mean(cube))
    print("Std value:", np.std(cube))

with h5py.File(gt_path, "r") as f:
    print("\nKeys in Houston13_7gt.mat:", list(f.keys()))
    # Let's assume the main data is under the first key
    main_key = list(f.keys())[0]
    cube = f[main_key][()]
    print(f"Cube shape: {cube.shape}")
    print(f"Cube dtype: {cube.dtype}")
    print("Min value:", np.min(cube))
    print("Max value:", np.max(cube))
    print("Mean value:", np.mean(cube))
    print("Std value:", np.std(cube))



Keys in Houston13.mat: ['ori_data']
Cube shape: (48, 954, 210)
Cube dtype: float64
Min value: 0.0
Max value: 1.0
Mean value: 0.08720015110251707
Std value: 0.057268523853831174

Keys in Houston13_7gt.mat: ['map']
Cube shape: (954, 210)
Cube dtype: float64
Min value: 0.0
Max value: 7.0
Mean value: 0.05218129180393331
Std value: 0.5166417734271238


## Explanation: Loading Any Version of MATLAB `.mat` Files in Python

This code provides a robust way to load MATLAB `.mat` files of **any version** (including v7.3 HDF5-based files) into Python using `scipy.io.loadmat`. It automatically converts v7.3 files to the older v7.2 format if needed.

### Key Components

- **convert_mat_v73_to_v72(v73_path, v72_path):**
  - Loads a v7.3 (HDF5) `.mat` file using `h5py`.
  - Recursively extracts all datasets and groups into a Python dictionary.
  - Saves the data as a v7.2 `.mat` file using `scipy.io.savemat`, which is compatible with `scipy.io.loadmat`.

- **load_mat_any_version(mat_path):**
  - Checks if a v7.2-converted file already exists and loads it if available.
  - Otherwise, tries to load the original file with `scipy.io.loadmat`.
  - If the file is v7.3 and not supported, it converts it to v7.2 and then loads the converted file.

- **Usage:**
  - Specify the path to your `.mat` file (`mat_path`).
  - Call `load_mat_any_version(mat_path)` to load the data, regardless of the MATLAB version.
  - The loaded data is a Python dictionary with keys corresponding to the variables in the `.mat` file.

### Benefits

- **Automatic compatibility:** Handles both old and new MATLAB formats seamlessly.
- **Caching:** Avoids repeated conversion by saving and reusing the v7.2 file.
- **No manual intervention:** Just use `load_mat_any_version` for any `.mat` file.

---

**Example output:**
```
Loaded keys: dict_keys(['Houston13', '__header__', '__version__', '__globals__'])
```
This shows the variable names loaded from the `.mat` file.

In [54]:
import os
from scipy.io import loadmat, savemat
import h5py
import numpy as np


def convert_mat_v73_to_v72(v73_path, v72_path):
    def extract_group(g):
        d = {}
        for k, v in g.items():
            if isinstance(v, h5py.Dataset):
                d[k] = v[()]
            elif isinstance(v, h5py.Group):
                d[k] = extract_group(v)
        return d
    with h5py.File(v73_path, "r") as f:
        data_dict = extract_group(f)
    savemat(v72_path, data_dict)

def load_mat_any_version(mat_path):
    v72_path = os.path.splitext(mat_path)[0] + "_v72.mat"
    # 1. If v72 exists, use it
    if os.path.exists(v72_path):
        print(f"v7.2 .mat file already exists Found cached : {v72_path}")
        return loadmat(v72_path)
    # 2. Otherwise, try to load original
    try:
        print(f"Trying to load {mat_path} using scipy.io.loadmat ...")
        return loadmat(mat_path)
    except NotImplementedError as e:
        if "Please use HDF reader for matlab v7.3 files" in str(e):
            print("File is v7.3 (HDF5). Converting to v7.2 format...")
            convert_mat_v73_to_v72(mat_path, v72_path)
            print(f"Converted to: {v72_path}. Reloading...")
            return loadmat(v72_path)
        else:
            raise

In [61]:

mat_path =  PROJECT_ROOT / "src/Dataset/Houston18/Houston18.mat"

data = load_mat_any_version(mat_path)
print("Loaded keys:", data.keys())

max_value = np.max(data['ori_data'])
print("Max value in data['ori_data']:", max_value)

max_value = np.min(data['ori_data'])
print("Max value in data['ori_data']:", max_value)

v7.2 .mat file already exists Found cached : /home/fesih/Desktop/ubuntu_projects/GAST2/src/Dataset/Houston18/Houston18_v72.mat
Loaded keys: dict_keys(['__header__', '__version__', '__globals__', 'ori_data'])
Max value in data['ori_data']: 1.0
Max value in data['ori_data']: 0.0


In [62]:
gt_path =  PROJECT_ROOT / "src/Dataset/Houston18/Houston18_7gt.mat"

data = load_mat_any_version(gt_path)
print("Loaded keys:", data.keys())

max_value = np.max(data['map'])
print("Max value in data['map']:", max_value)

max_value = np.min(data['map'])
print("Max value in data['map']:", max_value)

v7.2 .mat file already exists Found cached : /home/fesih/Desktop/ubuntu_projects/GAST2/src/Dataset/Houston18/Houston18_7gt_v72.mat
Loaded keys: dict_keys(['__header__', '__version__', '__globals__', 'map'])
Max value in data['map']: 7.0
Max value in data['map']: 0.0


In [65]:

mat_path =  PROJECT_ROOT / "src/Dataset/Houston13/Houston13.mat"

data = load_mat_any_version(mat_path)
print("Loaded keys:", data.keys())

max_value = np.max(data['ori_data'])
print("Max value in data['ori_data']:", max_value)

max_value = np.min(data['ori_data'])
print("Max value in data['ori_data']:", max_value)

v7.2 .mat file already exists Found cached : /home/fesih/Desktop/ubuntu_projects/GAST2/src/Dataset/Houston13/Houston13_v72.mat
Loaded keys: dict_keys(['__header__', '__version__', '__globals__', 'ori_data'])
Max value in data['ori_data']: 1.0
Max value in data['ori_data']: 0.0


In [79]:
gt_path = PROJECT_ROOT / "src/Dataset/Houston13/Houston13.mat"

data = load_mat_any_version(gt_path)
print("Loaded keys:", data.keys())

# Now, inspect the keys and use the correct one
# For example, if you see 'map' in the keys, you can access data['map']
# Otherwise, use one of the keys printed above

max_value = np.max(data[list(data.keys())[-1]])
print("Max value in data['map']:", max_value)

max_value = np.min(data[list(data.keys())[-1]])
print("Max value in data['map']:", max_value)

Trying to load /home/fesih/Desktop/ubuntu_projects/GAST2/src/Dataset/Houston13/Houston13.mat using scipy.io.loadmat ...
Loaded keys: dict_keys(['__header__', '__version__', '__globals__', 'ans'])
Max value in data['map']: 65517
Max value in data['map']: 0


In [81]:
gt_path = PROJECT_ROOT / "src/Dataset/Houston13/Houston13_gt.mat"

data = load_mat_any_version(gt_path)
print("Loaded keys:", data.keys())

# Now, inspect the keys and use the correct one
# For example, if you see 'map' in the keys, you can access data['map']
# Otherwise, use one of the keys printed above

max_value = np.max(data[list(data.keys())[-1]])
print("Max value in data['map']:", max_value)

max_value = np.min(data[list(data.keys())[-1]])
print("Max value in data['map']:", max_value)

Trying to load /home/fesih/Desktop/ubuntu_projects/GAST2/src/Dataset/Houston13/Houston13_gt.mat using scipy.io.loadmat ...
Loaded keys: dict_keys(['__header__', '__version__', '__globals__', 'name'])
Max value in data['map']: 15
Max value in data['map']: 0
