In [1]:
import os
import hdf5storage
import scipy.io as sio

source_path = '/Users/elizabethnemeti/Desktop/data'
unlabeled_path = '/Users/elizabethnemeti/Desktop/unlabeled_data'

# Ensure the output directory exists
os.makedirs(unlabeled_path, exist_ok=True)

def extract_unlabeled_data(source_path, unlabeled_path):
    for root, dirs, files in os.walk(source_path):
        for file in files:
            if file.endswith('.mat') and file != '.DS_Store':
                source_file = os.path.join(root, file)
                try:
                    # Load the .mat file
                    data = hdf5storage.loadmat(source_file)
                    # Extract image data or other features needed for unsupervised learning
                    image_data = data['cjdata']['image']

                    # Create a new .mat file with only the image data (unlabeled)
                    new_filename = os.path.join(unlabeled_path, file)
                    sio.savemat(new_filename, {'image': image_data})

                except KeyError as e:
                    print(f"Missing expected data in {file}: {e}")
                except Exception as e:
                    print(f"Could not process {file}: {e}")

def verify_unlabeled_data(unlabeled_path):
    for file in os.listdir(unlabeled_path):
        if file.endswith('.mat'):
            try:
                # Load the .mat file
                data = sio.loadmat(os.path.join(unlabeled_path, file))
                # Check if only 'image' key exists
                if data.keys() == {'__header__', '__version__', '__globals__', 'image'}:
                    print(f"File {file} is successfully unlabeled.")
                else:
                    print(f"File {file} may still contain labels or unexpected data.")
            except Exception as e:
                print(f"Failed to verify {file}: {e}")

# Run the extraction function
extract_unlabeled_data(source_path, unlabeled_path)

# Verify the unlabeled data
verify_unlabeled_data(unlabeled_path)

print("Extraction and verification complete.")

File 259.mat is successfully unlabeled.
File 1915.mat is successfully unlabeled.
File 265.mat is successfully unlabeled.
File 1097.mat is successfully unlabeled.
File 503.mat is successfully unlabeled.
File 517.mat is successfully unlabeled.
File 1083.mat is successfully unlabeled.
File 271.mat is successfully unlabeled.
File 2561.mat is successfully unlabeled.
File 1068.mat is successfully unlabeled.
File 2213.mat is successfully unlabeled.
File 1732.mat is successfully unlabeled.
File 1054.mat is successfully unlabeled.
File 701.mat is successfully unlabeled.
File 1295.mat is successfully unlabeled.
File 2944.mat is successfully unlabeled.
File 2788.mat is successfully unlabeled.
File 1281.mat is successfully unlabeled.
File 926.mat is successfully unlabeled.
File 2005.mat is successfully unlabeled.
File 2011.mat is successfully unlabeled.
File 2777.mat is successfully unlabeled.
File 932.mat is successfully unlabeled.
File 1256.mat is successfully unlabeled.
File 1530.mat is success