In [1]:
# HDF5 Dataset Overview
'''
This notebook provides an interface to the HDF5 file containing numor data extracted from GRASP. 
Each 'Numor' group in the HDF5 file consists of datasets including `q_x`, `q_y`, `intensity`, and `intensity_err`, 
along with metadata such as `count_time`, magnetic field, heating power, temperature sensors readings, etc. 
''' 

## Function Overview

'''
### plot_numor_data
**Purpose:** Visualize scattering data for individual numors.  
**Usage:** Ideal for initial data exploration and quality checks on data collection.  
**Example:** `plot_numor_data(hdf5_file, numor_ids[0])`

### get_data_for_ml
**Purpose:** Prepare and extract data for machine learning models.  
**Usage:** Fetches data in bulk or individually to feed into predictive models or statistical analyses.  
**Example:** `multiple_numor_data = get_data_for_ml(hdf5_file, numor_ids)`

### print_data_and_metadata
**Purpose:** Print detailed data and metadata for selected numors.  
**Usage:** Useful for detailed inspections of data and associated experimental parameters.  
**Example:** `print_data_and_metadata(hdf5_file, numor_ids)`
'''


'\n### plot_numor_data\n**Purpose:** Visualize scattering data for individual numors.  \n**Usage:** Ideal for initial data exploration and quality checks on data collection.  \n**Example:** `plot_numor_data(hdf5_file, numor_ids[0])`\n\n### get_data_for_ml\n**Purpose:** Prepare and extract data for machine learning models.  \n**Usage:** Fetches data in bulk or individually to feed into predictive models or statistical analyses.  \n**Example:** `multiple_numor_data = get_data_for_ml(hdf5_file, numor_ids)`\n\n### print_data_and_metadata\n**Purpose:** Print detailed data and metadata for selected numors.  \n**Usage:** Useful for detailed inspections of data and associated experimental parameters.  \n**Example:** `print_data_and_metadata(hdf5_file, numor_ids)`\n'

In [2]:
import h5py
import numpy as np
import matplotlib.pyplot as plt

def open_hdf5(file_path):
    """ Open an HDF5 file and return the file object. """
    return h5py.File(file_path, 'r')

In [3]:
import matplotlib.pyplot as plt

def plot_numor_data(hdf5_file, numor_id):
    """ Plot the data for a specific numor. Ensures only one numor is inputted. """
    # Check if numor_ids is a list and has exactly one item
    if isinstance(numor_id, int):
        numor_id = numor_id
    else:
        return "Error: You can only input a single numor here (e.g [107897])."

    group = hdf5_file[f'Numor{numor_id}']
    q_x = group['q_x (inverse angstrom)'][:]
    q_y = group['q_y (inverse angstrom)'][:]
    intensity = group['intensity (counts per standard monitor)'][:]

    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(q_x, q_y, c=intensity, cmap='jet', marker='s', s=40, edgecolors='none')
    cbar = plt.colorbar(scatter)
    #cbar.set_label(zlabel, rotation=270, labelpad=15)  # Adjust label orientation and position
    plt.xlabel('q_x (Å^{-1})')
    plt.ylabel('q_y (Å^{-1})')
    plt.title(f'Intensity Distribution for Numor {numor_id}')
    plt.show()


In [9]:
hdf5_path = r"/Users/cadenmyers/billingelab/Instrument_Appended_September_extracted_data.h5"
hdf5_file = open_hdf5(hdf5_path)

def get_data_for_ml(hdf5_file, numor_ids):
    """ Extract data and metadata for ML/AI applications for one or multiple numors. """
    all_data = {}
    
    # Ensure numor_ids is a list for single and multiple numor handling
    if not isinstance(numor_ids, list):
        numor_ids = [numor_ids]
    
    for numor_id in numor_ids:
        group = hdf5_file[f'Numor{numor_id}']
        data = {
            'q_x': group['q_x (inverse angstrom)'][:],
            'q_y': group['q_y (inverse angstrom)'][:],
            'intensity': group['intensity (counts per standard monitor)'][:],
            'intensity_err': group['intensity_err (counts per standard monitor)'][:]
        }
        
        # Retrieve and include metadata in the dictionary
        metadata = {attr: group.attrs[attr] for attr in group.attrs}
        data.update(metadata)
        
        # Store data for this numor keyed by its ID
        all_data[numor_id] = data
    
    return all_data

monitor_5_count = get_data_for_ml(hdf5_file, 73430)[73430]['Monitor_5_counts [counts]']
print(monitor_5_count)

2179142


In [13]:
# GETTING MONITOR_5_COUNT AND MONITOR_1_COUNT
start_numor = 74028
end_numor =  start_numor + 298 #make same as start_numor if you only want to look at one numor
numor_ids = list(range(start_numor,end_numor+1))

monitor_5_list = []
monitor_1_list = []
for numor in numor_ids:
    mon_5 = get_data_for_ml(hdf5_file, numor)[numor]['Monitor_5_counts [counts]']
    mon_1 = get_data_for_ml(hdf5_file, numor)[numor]['Monitor_1_counts [counts]']
    monitor_5_list.append(mon_5)
    monitor_1_list.append(mon_1)

In [14]:
# SAVE AS NPZ

mon_5 = np.array(monitor_5_list)
# np.savez('/Users/cadenmyers/billingelab/dev/skyrmion_lattices/experimental_data/npz_sept_data/npz_field_sweep/mon_5/pos29mT_553_50mW.npz', 
#          monitor_5_count=mon_5)

mon_1 = np.array(monitor_1_list)
# np.savez('/Users/cadenmyers/billingelab/dev/skyrmion_lattices/experimental_data/npz_sept_data/npz_field_sweep/mon_1/pos29mT_553_50mW.npz', 
#          monitor_1_count=mon_1)

In [None]:
def print_data_and_metadata(hdf5_file, numor_ids):
    """ Print data and metadata for specified numors in a formatted manner.
    
    Args:
        hdf5_file (h5py.File): Open HDF5 file object.
        numor_ids (int or list): Single numor ID or list of numor IDs whose data and metadata are to be printed.
    """
    if isinstance(numor_ids, int):
        numor_ids = [numor_ids]  # Convert single numor ID to list for uniform processing

    for numor_id in numor_ids:
        group_name = f'Numor{numor_id}'
        if group_name in hdf5_file:
            group = hdf5_file[group_name]
            print(f"Data and Metadata for Numor {numor_id}:")
            print("-" * 60)  # Print a divider for better visual separation

            # Print datasets
            for dataset_name in ['q_x (inverse angstrom)', 'q_y (inverse angstrom)', 'intensity (counts per standard monitor)', 'intensity_err (counts per standard monitor)']:
                if dataset_name in group:
                    data_array = group[dataset_name][:]
                    print(f"{dataset_name} (sample points): {data_array.shape[0]}")
                    print(f"{dataset_name} (values):", data_array)

            # Print metadata
            print("Metadata:")
            for key, value in group.attrs.items():
                print(f"{key:25}: {value}")
            print("-" * 60)  # End divider
        else:
            print(f"Error: Numor {numor_id} not found in the HDF5 file.")
            print("-" * 60)

# print_data_and_metadata(hdf5_file, 73430)

Data and Metadata for Numor 73430:
------------------------------------------------------------
q_x (inverse angstrom) (sample points): 128
q_x (inverse angstrom) (values): [[-0.02697548 -0.02697556 -0.02697565 ... -0.02697566 -0.02697558
  -0.02697549]
 [-0.02653964 -0.02653973 -0.02653981 ... -0.02653982 -0.02653974
  -0.02653966]
 [-0.0261038  -0.02610388 -0.02610396 ... -0.02610397 -0.02610389
  -0.02610381]
 ...
 [ 0.02754021  0.0275403   0.02754038 ...  0.0275404   0.02754031
   0.02754022]
 [ 0.02797601  0.0279761   0.02797619 ...  0.0279762   0.02797611
   0.02797602]
 [ 0.0284118   0.02841189  0.02841198 ...  0.02841199  0.0284119
   0.02841181]]
q_y (inverse angstrom) (sample points): 128
q_y (inverse angstrom) (values): [[-0.02772872 -0.02729291 -0.02685708 ...  0.02678726  0.02722309
   0.0276589 ]
 [-0.0277288  -0.02729299 -0.02685716 ...  0.02678734  0.02722317
   0.02765899]
 [-0.02772889 -0.02729307 -0.02685725 ...  0.02678742  0.02722326
   0.02765907]
 ...
 [-0.027728

In [None]:
# Open HDF5 File
hdf5_path = r"/Users/cadenmyers/billingelab/Instrument_Appended_September_extracted_data.h5"
hdf5_file = open_hdf5(hdf5_path)
start_numor = 121870 #111001
end_numor =  121871 #108370 #make same as start_numor if you only want to look at one numor
numor_ids = list(range(start_numor,end_numor+1))

# Example of Plotting Numor Data (can only use if specifying a single item in the list (e.q. numors_ids[0] or 115849))
plot_numor_data(hdf5_file, numor_ids[0])
#
# Example of Extracting Data for ML (builds a dictionary including the data and metadata)
single_numor_data = get_data_for_ml(hdf5_file, start_numor) # For a single numor

multiple_numor_data = get_data_for_ml(hdf5_file, numor_ids) # For multiple numors

# Example of Printing Data and Metadata
#print_data_and_metadata(hdf5_file, numor_ids[0])

KeyError: "Unable to synchronously open object (object 'Numor121870' doesn't exist)"