In [1]:
## The following code ensures that all functions and init files are reloaded before executions.
%load_ext autoreload
%autoreload 2

In [3]:
from pathlib import Path
from insitupy import read_xenium
import scanpy as sc
from insitupy import InSituExperiment

In [4]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
from typing import Union, Optional

In [6]:
import os

In [11]:
data_path = r"C:\Users\ge37voy\.cache\InSituPy\demo_datasets\hbreastcancer\output-XETG00000__slide_id__hbreastcancer"

In [12]:
exp = InSituExperiment()

In [13]:
exp.add(data=data_path)

In [14]:
exp

[1mInSituExperiment[0m with 1 samples:
           uid slide_id    sample_id
0     77a722e3  0001879  Replicate 1

In [15]:
config_path = r"C:\Users\ge37voy\Github\InSituPy\notebooks\demo_dataset\insituexperiment_config.xlsx"

In [18]:
exp2 = InSituExperiment.from_config(config_path=config_path)

In [23]:
exp2.load_cells()
exp2.load_images()

Replicate 1
Loading cells...
Replicate 1
Loading cells...
Replicate 1
Loading images...
Replicate 1
Loading images...


In [24]:
exp2.show(1)

Viewer(camera=Camera(center=(0.0, 2738.80625, 3762.84375), zoom=0.06412522090381216, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 5477.825, 0.2125), (0.0, 7525.9, 0.2125)), current_step=(12888, 17707), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'nuclei' at 0x2066b207790>], help='use <2> for transform', status='Ready', tooltip=Tooltip(visible=False, text=''), theme='dark', title='0001879: Replicate 1', mouse_over_canvas=False, mouse_move_callbacks=[], mouse_drag_callbacks=[], mouse_double_click_callbacks=[], mouse_wheel_callbacks=[<function dims_scroll at 0x000002066871A160>], _persisted_mouse_event={}, _mouse_drag_gen={}, _mouse_wheel_gen={}, keymap={})

In [20]:
exp2.data

[[1m[31mInSituData[0m
 [1mMethod:[0m		Xenium
 [1mSlide ID:[0m	0001879
 [1mSample ID:[0m	Replicate 1
 [1mPath:[0m		C:\Users\ge37voy\.cache\InSituPy\demo_datasets\hbreastcancer\output-XETG00000__slide_id__hbreastcancer
 [1mMetadata file:[0m	experiment.xenium,
 [1m[31mInSituData[0m
 [1mMethod:[0m		Xenium
 [1mSlide ID:[0m	0001879
 [1mSample ID:[0m	Replicate 1
 [1mPath:[0m		C:\Users\ge37voy\.cache\InSituPy\demo_datasets\hbreastcancer\output-XETG00000__slide_id__hbreastcancer
 [1mMetadata file:[0m	experiment.xenium]

In [6]:
p = "./demo_dataset/qupath_project/"

In [7]:
isinstance(p, Path)

False

In [96]:
import pandas as pd

class InSituExperiment:
    def __init__(self):
        """
        Initialize an InSituExperiment object.

        Args:
            patient_id (str): Unique identifier for the patient.
            disease (str): Disease associated with the experiment.
            age (int): Age of the patient.
            sex (str): Sex of the patient.
        """
        self._metadata = pd.DataFrame(columns=['sample_id', 'slide_id'])
        self._data = {}
        self._key_pattern = "{slide_id}__{sample_id}"

    @property
    def data(self):
        """Get the dataset dictionary.

        Returns:
            dict: A dictionary of datasets, where keys are sample IDs and values are Dataset objects.
        """
        return self._data

    @property
    def metadata(self):
        """Get the metadata DataFrame.

        Returns:
            pd.DataFrame: A DataFrame containing metadata.
        """
        return self._metadata

    def add(self,
            dataset,
            metadata: Optional[dict] = None
            ):
        """Add a dataset to the experiment and update metadata.

        Args:
            dataset (Dataset): A dataset object to be added.

        Raises:
            TypeError: If the dataset is not an instance of the Dataset class.
        """
        # Check if the dataset is of the correct type
        if not isinstance(dataset, Dataset):
            raise TypeError("The dataset must be an instance of the Dataset class.")

        # Use the combination of slide_id and sample_id as the key
        key = self._key_pattern.format(slide_id=dataset.slide_id, sample_id=dataset.sample_id)

        # Add the dataset to the data dictionary
        self._data[key] = dataset

        # Create a new DataFrame for the new metadata
        new_metadata = {
            'sample_id': dataset.sample_id,
            'slide_id': dataset.slide_id
        }

        if metadata is not None:
            # add information from metadata argument
            new_metadata = metadata | new_metadata

        # convert to dataframe
        new_metadata = pd.DataFrame([new_metadata])

        # Concatenate the new metadata with the existing metadata
        self._metadata = pd.concat([self._metadata, new_metadata], axis=0, ignore_index=True)

    def get(self, slide_id, sample_id):
        """Retrieve a dataset by the combined key of slide_id and sample_id.

        Args:
            key (str): The combined key of slide_id and sample_id of the dataset to retrieve.

        Returns:
            Dataset: The dataset associated with the given key.

        Raises:
            KeyError: If the key does not exist in the data dictionary.
        """
        key = self._key_pattern.format(slide_id=slide_id, sample_id=sample_id)

        if key not in self._data:
            raise KeyError(f"Dataset with key '{key}' not found.")
        return self._data[key]

    def iget(self, index):
        """Retrieve a dataset by its row position in the metadata DataFrame.

        Args:
            index (int): The row position of the dataset to retrieve.

        Returns:
            Dataset: The dataset associated with the given row position.

        Raises:
            IndexError: If the index is out of bounds.
        """
        if index < 0 or index >= len(self._metadata):
            raise IndexError("Index out of bounds.")
        slide_id, sample_id = self._metadata.iloc[0][["slide_id", "sample_id"]]
        return self.get(slide_id=slide_id, sample_id=sample_id)

# Example usage
class Dataset:
    def __init__(self, slide_id, sample_id, data):
        """Initialize a Dataset object.

        Args:
            slide_id (str): Unique identifier for the slide.
            sample_id (str): Unique identifier for the dataset.
            data (dict): Data associated with the dataset.
        """
        self.slide_id = slide_id  # Unique identifier for the slide
        self.sample_id = sample_id  # Unique identifier for the dataset
        self.data = data

# Create some dataset objects
dataset1 = Dataset(slide_id="SL001", sample_id="S001", data={"measurement1": 1.2, "measurement2": 3.4})
dataset2 = Dataset(slide_id="SL002", sample_id="S002", data={"measurement1": 2.3, "measurement2": 4.5})

# Create an InSituExperiment object
experiment = InSituExperiment()

# Add datasets to the experiment
experiment.add(dataset1)
experiment.add(dataset2)

# Retrieve metadata and datasets
print("Metadata:\n", experiment.metadata)

# Access datasets directly using the get() method
try:
    dataset = experiment.get(slide_id="SL001", sample_id="S001")  # Access dataset with combined key
    print("Dataset SL001__S001:", dataset.data)
except KeyError as e:
    print(e)

# Access datasets using the iget() method
try:
    dataset = experiment.iget(0)  # Access the first dataset by index
    print("Dataset at index 0:", dataset.data)
except IndexError as e:
    print(e)

# Example of trying to access a non-existent dataset
try:
    dataset = experiment.get("SL003", "S003")  # This should raise a KeyError
except KeyError as e:
    print(e)

# Example of trying to access an out-of-bounds index
try:
    dataset = experiment.iget(2)  # This should raise an IndexError
except IndexError as e:
    print(e)


Metadata:
   sample_id slide_id
0      S001    SL001
1      S002    SL002
Dataset SL001__S001: {'measurement1': 1.2, 'measurement2': 3.4}
Dataset at index 0: {'measurement1': 1.2, 'measurement2': 3.4}
"Dataset with key 'SL003__S003' not found."
Index out of bounds.


In [92]:
experiment.metadata.iloc[0][["slide_id", "sample_id"]]

slide_id     SL001
sample_id     S001
Name: 0, dtype: object

In [93]:
experiment.metadata.iloc[0][["sample_id", "slide_id"]]

sample_id     S001
slide_id     SL001
Name: 0, dtype: object

In [87]:
a

'S001'

In [88]:
b

'SL001'

In [81]:
"{slide_id}__{sample_id}".format(slide_id="test", sample_id="blubb")

'test__blubb'

In [79]:
experiment.metadata

Unnamed: 0,unique_id,sample_id,slide_id
0,SL001__S001,S001,SL001
1,SL002__S002,S002,SL002


In [40]:
x = {'a': 1, 'b': 2}
y = {'b': 3, 'c': 4}

In [41]:
x | y

{'a': 1, 'b': 3, 'c': 4}

In [42]:
y | x

{'b': 2, 'c': 4, 'a': 1}

In [68]:
new_df.to_dict(orient="list")

{'unique_id': ['a'],
 'sample_id': [None],
 'slide_id': ['blubb'],
 'patient_id': [None],
 'blubb': ['check']}

In [43]:
df = experiment.metadata

In [44]:
df

Unnamed: 0,unique_id,sample_id,slide_id,patient_id,disease,age,sex
0,SL001__S001,S001,SL001,,,,
0,SL002__S002,S002,SL002,,,,


In [45]:
pd.DataFrame(df)

Unnamed: 0,unique_id,sample_id,slide_id,patient_id,disease,age,sex
0,SL001__S001,S001,SL001,,,,
0,SL002__S002,S002,SL002,,,,


In [46]:
new_row = {"unique_id": "a", "sample_id": None, "slide_id": "blubb", "patient_id": None, "blubb": "check"}

In [47]:
new_df = pd.DataFrame([new_row])

In [50]:
new_df

Unnamed: 0,unique_id,sample_id,slide_id,patient_id,blubb
0,a,,blubb,,check


In [60]:
new_df = pd.DataFrame([row])

In [61]:
pd.concat([df, new_df], ignore_index=True)

Unnamed: 0,unique_id,sample_id,slide_id,patient_id,disease,age,sex,blubb
0,SL001__S001,S001,SL001,,,,,
1,SL002__S002,S002,SL002,,,,,
2,a,,blubb,,,,,check


In [20]:
pd.concat([df, new_row], axis=1)

InvalidIndexError: Reindexing only valid with uniquely valued Index objects

In [5]:
experiment.metadata

Unnamed: 0,unique_id,sample_id,slide_id,patient_id,disease,age,sex
0,SL001__S001,S001,SL001,,,,
0,SL002__S002,S002,SL002,,,,


In [3]:
experiment.metadata

Unnamed: 0,sample_id,patient_id,disease,age,sex
0,S001,,,,
0,S002,,,,


In [62]:
md = pd.DataFrame(columns=['patient_id', 'disease', 'age', 'sex'])
md.index.name = "id"

In [63]:
md

Unnamed: 0_level_0,patient_id,disease,age,sex
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [57]:
experiment.metadata

Unnamed: 0,sample_id,patient_id,disease,age,sex
S001,,,,,
S002,,,,,


In [56]:
experiment.get("S001")

<__main__.Dataset at 0x1b05c9c84f0>

In [54]:
experiment.metadata

Unnamed: 0,sample_id,patient_id,disease,age,sex
0,S001,,,,
1,S002,,,,


In [53]:
experiment.data

{'S001': <__main__.Dataset at 0x1b05c6de790>,
 'S002': <__main__.Dataset at 0x1b05c6dea00>}

In [51]:
experiment._data["S001"]

<__main__.Dataset at 0x1b05c7e26a0>

In [47]:
experiment.data["S001"]

<__main__.Dataset at 0x1b05c7e26a0>

In [50]:
experiment.data["S001"]

<__main__.Dataset at 0x1b05c7e26a0>