In [33]:
import os
import json
from urllib.parse import quote_plus
from os.path import join
from anndata import AnnData
import scanpy as sc
import pandas as pd
import numpy as np
import boto3

from vitessce import (
    VitessceWidget,
    VitessceConfig,
    Component as cm,
    CoordinationType as ct,
    AnnDataWrapper,
)

## Load the data

In [81]:
df = pd.read_csv(join("data", "CellByFeature_Matrix.csv"), index_col=0)

In [82]:
df.head()

Unnamed: 0_level_0,Nucleus location (X),Nucleus location (Y),Nucleus perimeter,EOMES,POLR2A,SOX2,DLX6,SATB2,BCL11B,CPLX3,...,TUBA4A,NR2F1,NEFL,NR4A2,CBLN2,MEF2E,PPP1R1B,RUNX1T1,LPL,GABRA5
Cell No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5115,2287,142.1,0,2,3,0,2,2,0,...,0,0,2,0,0,2,1,0,0,0
2,5330,1764,376.3,1,1,3,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4168,3771,111.3,0,1,1,3,5,0,0,...,0,0,0,0,0,0,0,0,0,1
4,3738,3051,387.0,1,6,7,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
5,5447,2056,191.3,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1


## Put the data in an AnnData object

In [93]:
X = df[df.columns.values.tolist()[3:]].values

In [84]:
X_spatial = df[["Nucleus location (X)", "Nucleus location (Y)"]].values.astype('uint16')

In [85]:
obs = pd.DataFrame(index=df.index.values.tolist())
var = pd.DataFrame(index=df.columns.values.tolist()[3:])
adata = AnnData(X=X, obs=obs, var=var, obsm={ "X_spatial": X_spatial })



## Optionally, process the data

Here you may want to run dimensionality reduction or clustering methods with ScanPy.

In [86]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)



In [87]:
sc.tl.tsne(adata)



In [88]:
adata.obs.index = adata.obs.index.astype(str)

## Create a Vitessce configuration

For more details about how to use the `vitessce` Python package please visit the documentation at https://vitessce.github.io/vitessce-python/.

In [92]:
vc = VitessceConfig("Developing human neocortex")
dataset = vc.add_dataset("Gestational week 20").add_object(
    AnnDataWrapper(
        adata,
        mappings_obsm=["X_tsne"],
        mappings_obsm_names=["t-SNE"],
        expression_matrix="X",
        spatial_centroid_obsm="X_spatial"
    )
)
spatial_plot = vc.add_view(dataset, cm.SPATIAL)
tsne_plot = vc.add_view(dataset, cm.SCATTERPLOT, mapping="t-SNE")
heatmap = vc.add_view(dataset, cm.HEATMAP)
genes_list = vc.add_view(dataset, cm.GENES)
vc.layout((spatial_plot | tsne_plot) / (heatmap | genes_list));

## Export the files

The above `add_object()` function converts the AnnData data structure into a [Zarr](https://zarr.readthedocs.io/en/stable/) store that is compatible with Vitessce.
We want to save these converted files to the `data` directory.

In [97]:
LOCAL_BASE_URL = 'http://localhost:3000'

In [98]:
config_dict = vc.export(to='files', base_url=LOCAL_BASE_URL, out_dir='data')

We also want to save the Vitessce configuration to a file.

In [95]:
with open(join("data", "vitessce_config.json"), "w") as f:
    json.dump(config_dict, f)

## Start the local web server

We can use Vitessce with local files by running a local web server.


In a terminal, `cd` into the `data` directory.

Then, run `http-server` on port 3000 with this terminal command:
```sh
http-server ./ --cors -p 3000
```

In [100]:
vitessce_url = f"http://vitessce.io/?url={LOCAL_BASE_URL}/vitessce_config.json"
print(vitessce_url)

http://vitessce.io/?url=http://localhost:3000/vitessce_config.json


After running the notebook cell above, a link to `vitessce.io` will appear. While the local HTTP server is running and serving the exported files in the `data` directory, you can open this link in a web browser to view the Vitessce visualization.

If you would like to make this visualization public on the web, you can copy the files in `data` to a cloud storage provider such as AWS S3, Google Cloud Storage, or GitHub Pages: http://beta.vitessce.io/docs/data-hosting/.

## Upload data to an existing AWS S3 bucket

To store your data in an AWS S3 bucket, make sure the bucket permissions and CORS settings match those here: https://github.com/vitessce/vitessce/blob/keller-mark/docs/docs/docs/data-hosting.md#bucket-policy

In [123]:
BUCKET_NAME = "my_bucket" # Replace with your bucket name
BUCKET_PREFIX = "gestational_week_20" # Replace with a file path prefix you would like to use for each dataset

In a terminal, set the environment variables for the AWS S3 bucket:

```sh
export AWS_ACCESS_KEY_ID=my_access_key_id
export AWS_SECRET_ACCESS_KEY=my_secret_access_key
export AWS_DEFAULT_REGION=us-east-1
```

Note: these environment variables need to be set in the terminal before starting JupyterLab with the `jupyter lab` terminal command. (So you may need to exit the notebook, run these 3 lines, and then start the notebook again by running `jupyter lab`)

In [124]:
s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
    aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],
)

KeyError: 'AWS_ACCESS_KEY_ID'

The following two lines upload both the converted data files and the Vitessce configuration JSON file to the S3 bucket.

In [118]:
config_dict = vc.export(to='S3', s3=s3, bucket_name=BUCKET_NAME, prefix=BUCKET_PREFIX)

Uploading vitessce-export-examples:gestational_week_20/A/0


In [119]:
s3.Bucket(BUCKET_NAME).put_object(Key=f"{BUCKET_PREFIX}/vitessce_config.json", Body=json.dumps(config_dict).encode())

s3.Object(bucket_name='vitessce-export-examples', key='gestational_week_20/vitessce_config.json')

In [120]:
S3_BASE_URL = base_url = f"https://{BUCKET_NAME}.s3.amazonaws.com/{BUCKET_PREFIX}"

In [121]:
vitessce_url = f"http://vitessce.io/?url={S3_BASE_URL}/vitessce_config.json"
print(vitessce_url)

http://vitessce.io/?url=https://vitessce-export-examples.s3.amazonaws.com/gestational_week_20/vitessce_config.json


The Vitessce URL above can be shared with others since the converted Vitessce-compatible data files are stored in the AWS S3 bucket which is public on the web. The visualizations should look the same as when you tested them with the local server above.