# Custom package for h5py usage see scH5loader

### Partial reading in using h5py and anndata

#### Read in individual elements when constructing the anndata

In [None]:
import h5py
import anndata

# Path to the AnnData file
adata_file_path = '/path/to/data.h5ad'


# see what is stored within the h5ad file
with h5py.File(adata_file_path, "r") as f:
    print(f)
    print(f.keys())


# Open the H5 file using h5py
with h5py.File(adata_file_path, "r") as f:
    # Create an AnnData object and populate it with the loaded elements
    adata_expanded = anndata.AnnData(
        X=f["X"][:],  # Load the data matrix directly
        obs=f["obs"],
        var=f["var"],
        uns=f["uns"],
        obsm=f["obsm"],
        obsp=f["obsp"]
    )

#### Read in just individual elements with their own variable

In [None]:
import h5py
import anndata
from anndata._io.specs import read_elem

# Path to the AnnData file
adata_file_path = '/path/to/data.h5ad'

# Open the H5 file using h5py
with h5py.File(adata_file_path, "r") as f:
    # Load observation metadata (obs)
    obs_metadata = read_elem(f["obs"])

    # Load variable (gene) metadata (var)
    var_metadata = read_elem(f["var"])

    # Load the data matrix (X)
    data_matrix = f["X"][:]

    # Load any additional annotations or layers if present
    if "layers" in f:
        # Load layers if available
        layers = read_elem(f["layers"])

    if "uns" in f:
        # Load unstructured annotations if available
        uns_annotations = read_elem(f["uns"])

# Create an AnnData object and populate it with the loaded elements
adata_expanded = anndata.AnnData(X=data_matrix)

# Assign loaded metadata to AnnData object
adata_expanded.obs = obs_metadata
adata_expanded.var = var_metadata

# Assign loaded layers and unstructured annotations if available
if "layers" in locals():
    adata_expanded.layers = layers

if "uns_annotations" in locals():
    adata_expanded.uns = uns_annotations

In [None]:
with h5py.File(adata_file_path, "r") as f:
    m = f["obs"][f["obs"].attrs["_index"]].shape[0]

In [None]:
# could identify a column for celltypes of interest then load in just informtion related to those celltypes of interest over loading the whole data object in 

dataframe = 'obs'
column = 'anno'
variable = 'mac'
columns_keep = ['anno']

with h5py.File(adata_file_path, 'r') as f:
    # Assuming 'obs' is the name of your dataset
    df = f[dataframe]

    # Identify the column in dataframe containing the annotation of interest
    annotation_column = obs_data[column]
    
    # Find indices where the annotation column has the specified value
    indices_of_interest = [i for i, val in enumerate(annotation_column) if val == value_of_interest]
    
    # Load all relevant information for the specified value
    relevant_data = {
        'X': f['X'][:,indices_of_interest]
        'obs': obs_data[indices_of_interest][columns_keep],
        # Add other datasets if needed, e.g., 'X', 'var', etc.
    }

# Create an AnnData object with the relevant information
adata_interest = anndata.AnnData(**relevant_data)