# InSituPy demonstration - InSituExperiment

In [1]:
## The following code ensures that all functions and init files are reloaded before executions.
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from insitupy import read_xenium
import scanpy as sc
from insitupy import InSituExperiment

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Load Xenium data into `InSituData` object

Now the Xenium data can be parsed by providing the data path to the `InSituPy` project folder.

In [5]:
insitupy_project = Path("demo_dataset/demo_insitupy_project")
xd = read_xenium(insitupy_project)
xd.load_all(skip="transcripts")

Loading annotations...
Loading cells...
	No alternative cells found...
Loading images...
['nuclei', 'CD20', 'HER2', 'DAPI', 'HE']
Loading regions...


### Create `InSituExperiment`

#### Method 1: Manually add `InSituData` objects

In [6]:
exp = InSituExperiment()
exp.add(
    data=xd,
    metadata={
        "slide_id": xd.slide_id,
        "sample_id": xd.sample_id,
        "patient": "A"
    }
    )

In [7]:
exp

[1mInSituExperiment[0m with 1 samples:
           uid slide_id    sample_id patient
0     774da943  0001879  Replicate 1       A

In the same way also other datasets can be added. For demonstration purposes, we just add the same dataset again and change the metadata.

In [8]:
exp.add(
    data=xd,
    metadata={
        "slide_id": xd.slide_id,
        "sample_id": xd.sample_id,
        "patient": "A",
        "therapy": "drugB"
    }
    )

In [9]:
exp

[1mInSituExperiment[0m with 2 samples:
           uid slide_id    sample_id patient therapy
0     774da943  0001879  Replicate 1       A     NaN
1     fd8125a1  0001879  Replicate 1       A   drugB

#### Method 2: From config file

As config file either a csv file or an excel file can be used.

Example of a valid configuration file:

| directory         | experiment_name | date       | patient    |
|-------------------|-----------------|------------|------------|
| /path/to/dataset1 | Experiment 1    | 2023-09-01 | Patient A  |
| /path/to/dataset2 | Experiment 2    | 2023-09-02 | Patient B  |

In [10]:
exp = InSituExperiment.from_config(config_path="./demo_dataset/insituexperiment_config.csv")

In [11]:
exp

[1mInSituExperiment[0m with 2 samples:
           uid slide_id    sample_id patient therapy
0     02d08849  0001879  Replicate 1       A   drugA
1     08e5a39f  0001879  Replicate 1       B   drugB

#### Method 3: From regions

We can also use regions from an `InSituData` object to split the data into separate datasets and create an `InSituExperiment` from them. This can be used to select the most interesting regions and focus on them for the analysis or to split a TMA dataset into separate datasets for each core.

In [12]:
exp = InSituExperiment.from_region(
    data=xd, region_key="TMA"
)

In [13]:
exp

[1mInSituExperiment[0m with 6 samples:
           uid slide_id    sample_id region_key region_name
0     00821d96  0001879  Replicate 1        TMA         A-1
1     ccc02634  0001879  Replicate 1        TMA         A-2
2     bf5deb83  0001879  Replicate 1        TMA         A-3
3     e86c5a28  0001879  Replicate 1        TMA         B-1
4     eb9075a1  0001879  Replicate 1        TMA         B-2
5     f523d9d2  0001879  Replicate 1        TMA         B-3

In [128]:
exp.show(1)

Viewer(camera=Camera(center=(0.0, 499.90625, 499.8), zoom=0.2166514499109353, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 1000.025, 0.2125), (0.0, 999.8125, 0.2125)), current_step=(2352, 2352), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'nuclei' at 0x150d5febfa0>, <Image layer 'CD20' at 0x150d6573610>, <Image layer 'HER2' at 0x150d6d825b0>, <Image layer 'DAPI' at 0x1511e6ec8b0>, <Image layer 'HE' at 0x150cb3a3f10>], help='use <2> for transform', status='Ready', tooltip=Tooltip(visible=False, text=''), theme='dark', title='0001879: Replicate 1', mouse_over_canvas=False, mouse_move_callbacks=[], mouse_drag_callbacks=[], mouse_double_click_callbacks=[], mouse_wheel_callbacks=[<function dims_scroll at 0x0000014FB08681F0>], _persiste

## Add new metadata

In [115]:
exp = InSituExperiment.from_config(config_path="./demo_dataset/insituexperiment_config.csv")
exp

[1mInSituExperiment[0m with 2 samples:
           uid slide_id    sample_id patient therapy
0     b9b4d307  0001879  Replicate 1       A   drugA
1     73b67a95  0001879  Replicate 1       B   drugB

In [116]:

exp.append_metadata(
    new_metadata="./demo_dataset/insituexperiment_new_metadata.csv",
    by="patient", overwrite=True
)

In [117]:
exp

[1mInSituExperiment[0m with 2 samples:
           uid slide_id    sample_id patient therapy   organ      test
0     b9b4d307  0001879  Replicate 1       A   drugC  spleen  positive
1     73b67a95  0001879  Replicate 1       B   drugD   liver  negative

In [118]:

exp = InSituExperiment.from_config(config_path="./demo_dataset/insituexperiment_config.csv")

exp.append_metadata(
    new_metadata="./demo_dataset/insituexperiment_new_metadata.csv",
    by="patient", overwrite=False
)

In [119]:
exp

[1mInSituExperiment[0m with 2 samples:
           uid slide_id    sample_id patient therapy   organ      test
0     19f22696  0001879  Replicate 1       A   drugA  spleen  positive
1     a551a3ed  0001879  Replicate 1       B   drugB   liver  negative

In [39]:
import pandas as pd

In [71]:
df1 = exp.metadata
df2 = pd.read_csv("./demo_dataset/insituexperiment_new_metadata.csv")

In [72]:
df1

Unnamed: 0,uid,slide_id,sample_id,patient,therapy
0,7ace2a8d,1879,Replicate 1,A,drugA
1,339cd55f,1879,Replicate 1,B,drugB


In [73]:
df2

Unnamed: 0,patient,therapy,organ,test
0,A,drugA,spleen,positive
1,B,drugB,liver,negative


In [95]:
df1.columns.difference(df2.columns)

Index(['sample_id', 'slide_id', 'uid'], dtype='object')

In [97]:
df1.columns.intersection(df2.columns)

Index(['patient', 'therapy'], dtype='object')

In [96]:
df2.columns.difference(df1.columns)

Index(['organ', 'test'], dtype='object')

In [98]:
df2.columns.intersection(df1.columns)

Index(['patient', 'therapy'], dtype='object')

In [82]:
l = list(df1.columns.intersection(df2.columns))

In [83]:
["test"] + l

['test', 'patient', 'therapy']

In [58]:
df1["patient"].is_unique

True

In [63]:
pd.merge(
    left=df1,
    right=df2,
    left_index=True,
    right_index=True,
    how="outer"
)

Unnamed: 0,uid,slide_id,sample_id,patient_x,therapy_x,patient_y,therapy_y,organ,test
0,57f683b4,1879,Replicate 1,A,drugA,A,drugA,spleen,positive
1,903732a0,1879,Replicate 1,B,drugB,B,drugB,liver,negative


In [None]:
pd.merge(
    left=df1,
    right=df2,
    on="patient",
    how="left"
)

Unnamed: 0,uid,slide_id,sample_id,patient,therapy_x,therapy_y,organ,test
0,57f683b4,1879,Replicate 1,A,drugA,drugA,spleen,positive
1,903732a0,1879,Replicate 1,B,drugB,drugB,liver,negative


In [38]:
exp

[1mInSituExperiment[0m with 2 samples:
           uid slide_id    sample_id patient therapy
0     57f683b4  0001879  Replicate 1       A   drugA
1     903732a0  0001879  Replicate 1       B   drugB

In [14]:
exp

[1mInSituExperiment[0m with 6 samples:
           uid slide_id    sample_id region_key region_name
0     00821d96  0001879  Replicate 1        TMA         A-1
1     ccc02634  0001879  Replicate 1        TMA         A-2
2     bf5deb83  0001879  Replicate 1        TMA         A-3
3     e86c5a28  0001879  Replicate 1        TMA         B-1
4     eb9075a1  0001879  Replicate 1        TMA         B-2
5     f523d9d2  0001879  Replicate 1        TMA         B-3