# Demo notebook

In [None]:
import scanpy as sc
import pathlib
import configparser

import data_IO
import data_analysis
import data_plotting

## Setup  
- Read the configuration file (*configuration.txt*)
- Extract parameters
- Prepare working directories

In [None]:
# Reading configuration file
config = configparser.ConfigParser()
config.read('configuration.txt')

# Configure directories
data_source = pathlib.Path(config.get('paths', 'data_source'))
coordinates_source = pathlib.Path(config.get('paths', 'coordinates_source'))
figures_dir = pathlib.Path(config.get('paths', 'figures_dir'))
temp_dir = pathlib.Path(config.get('paths', 'temp_dir'))

# Configure parameters
PCA_params = dict(config.items('scanpy_PCA'))
NEIGHBORS_params = dict(config.items('scanpy_NEIGHBORS'))
UMAP_params = dict(config.items('scanpy_UMAP'))
PHENOGRAPH_params = dict(config.items('scanpy_PHENOGRAPH'))

PCA_plot_params = dict(config.items('scanpy_PCA_plot'))
UMAP_plot_params = dict(config.items('scanpy_UMAP_plot'))
CLUSTERS_plot_params = dict(config.items('scanpy_CLUSTERS_plot'))

# Scanpy parameters
sc.settings.verbose = 3     # Min = 0, Max = 3
sc.set_figure_params(config.items('scanpy_figures'))
sc.settings.figdir = figures_dir

# Prepare workspace
pathlib.Path(figures_dir).mkdir(parents = True, exist_ok = True)
pathlib.Path(temp_dir).mkdir(parents = True, exist_ok = True)


# Data reading  
Read data from *data_source* and *coordinates_source* and store them as two separate files into *temp_dir*. *data_source* will be stored as *.h5ad* file, while *coordinates_source* as a *.csv* file. Such files are named with unique identifier stored in *filename*, which will be used in the following I/O steps.

In [None]:
filename = data_IO.READ_SOURCE(data_source, coordinates_source, temp_dir)

# Data analysis  
## PCA  
Perform PCA on the data stored in *adata*, using the parameters contained in the dictionary *PCA_params*.  
After this block, you may want to store the results by overwriting the the work data in *temp_dir*. To do so, run a block containing data_IO.WRITE_ADATA() function.  
To visualize the results, run the corresponding plotting block.

In [None]:
# Read work data
adata = data_IO.READ_ADATA(filename, temp_dir)

In [None]:
adata = data_analysis.PCA(adata, PCA_params)

## Neighbors graph and UMAP  
Construct the neighbors graph and then perform UMAP on the data stored in *adata*, using the parameters contained in the dictionaries *NEIGHBORS_params* and *UMAP_params*.  
After this block, you may want to store the results by overwriting the the work data in *temp_dir*. To do so, run a block containing data_IO.WRITE_ADATA() function.  
To visualize the results, run the corresponding plotting block.

In [None]:
adata = data_analysis.NEIGHBORS(adata, NEIGHBORS_params)
adata = data_analysis.UMAP(adata, UMAP_params)

## PhenoGraph  
Identify the clusters by performing PhenoGraph on the data stored in *adata*, using the parameters contained in the dictionary *PHENOGRAPH_params*.  
After this block, you will need to store the results by overwriting the the work data in *temp_dir*. To do so, run the block containing data_IO.WRITE_ADATA() function.  
To visualize the results, run the corresponding plotting block.

In [None]:
adata, communities, graph, Q = data_analysis.PHENOGRAPH(adata, PHENOGRAPH_params)

In [None]:
# Overwrite work data
filename = data_IO.WRITE_ADATA(adata, filename, temp_dir)

# Plotting  
## PCA representation  
Represent previous PCA results.  
The plot is generated considering the parameters contained in the dictionary *PCA_plot_params*. A *.png* version of the plot is also stored in the *figures_dir*.

In [None]:
# Read work data
#adata = data_IO.READ_ADATA(filename, temp_dir)

#data_plotting.PCA(adata, filename, figures_dir, PCA_plot_params)

## UMAP representation  
Represent previous UMAP reduction, colored considering PhenoGraph clustering results.  
The plot is generated considering the parameters contained in the dictionary *UMAP_plot_params*. A *.png* version of the plot is also stored in the *figures_dir*.

In [None]:
# Read work data
adata = data_IO.READ_ADATA(filename, temp_dir)

data_plotting.UMAP(adata, filename, figures_dir, UMAP_plot_params)

## Clusters representation  
Represent spatial distribution of cells, colored considering PhenoGraph clustering results.  
The plot is generated considering the parameters contained in the dictionary *CLUSTERS_plot_params*. An interactive *.html* version of the plot is also stored in the *figures_dir*.

In [None]:
# Read work data
adata = data_IO.READ_ADATA(filename, temp_dir)
coordinates = data_IO.READ_COORDINATES(filename, temp_dir)

data_plotting.CLUSTERS(adata, coordinates, filename, figures_dir, CLUSTERS_plot_params)