In [4]:
%run ../SAMI/utils.py
%run ../SAMI/preprocessing.py
%run ../SAMI/correlation.py
%run ../SAMI/clustering.py
%run ../SAMI/markers.py
%run ../SAMI/pathway.py
%run ../SAMI/norm.py
%run ../SAMI/clustermapping.py
import warnings
warnings.filterwarnings("ignore")

# Data Preprocessing

## Normalization

In [2]:
data_path = '../datasets/'
for file in os.listdir(data_path):
    name, ext = os.path.splitext(file)
    if re.match(r'^\w+raw\.csv',file):
        df = pd.read_csv(os.path.join(data_path,f"{file}"))
        df_norm = Normalization(df, first_compound_idx=3, rowNorm='SumNorm', transNorm='LogTrans2', c=1, log_base=2)
        name = name.replace('raw','norm')
        df_norm.to_csv(os.path.join(data_path,f"{name}{ext}"),index=False)

Normalization to constant sum 1.
Log2 Transformation.
Scaling: N/A.
Normalization to constant sum 1.
Log2 Transformation.
Scaling: N/A.
Normalization to constant sum 1.
Log2 Transformation.
Scaling: N/A.


## Convert .csv to .h5ad and split datasets by regions

In [3]:
csv2h5ad(data_path=data_path,pattern=r'^\w+norm\.csv',split=True)

## Pool three omics data

In [4]:
pooldata(data_path=data_path,pattern=r'^\w+norm\.csv',split=True) 

In [5]:
list_files(data_path,'pool','.h5ad')

['brain1ad_pool.h5ad',
 'brain1wt_pool.h5ad',
 'brain2ad_pool.h5ad',
 'brain2wt_pool.h5ad',
 'brain3ad_pool.h5ad',
 'brain3wt_pool.h5ad',
 'brain_pool.h5ad']

# Clustering

## brain1

In [7]:
clusters = Clusters('brain1wt','pool',1)
clusters.clustering()

clusters = Clusters('brain1ad','pool',1)
clusters.clustering()

In [45]:
integration = Cluster_Integration('brain1ad_pool_1.h5ad','brain1wt_pool_1.h5ad')

integration.integrate()
integration.plot_umap_cluster('brain1ad',size=60)
integration.plot_umap_cluster('brain1wt',size=60)
integration.plot_overlap_umap(show=False)

## brain2

In [46]:
clusters = Clusters('brain2wt','pool',1)
clusters.clustering()

clusters = Clusters('brain2ad','pool',1)
clusters.clustering()

In [47]:
integration = Cluster_Integration('brain2ad_pool_1.h5ad','brain2wt_pool_1.h5ad')

integration.integrate()
integration.plot_umap_cluster('brain2ad',size=60)
integration.plot_umap_cluster('brain2wt',size=60)
integration.plot_overlap_umap(show=False)

## brain3

In [48]:
clusters = Clusters('brain3wt','pool',1)
clusters.clustering()

clusters = Clusters('brain3ad','pool',1)
clusters.clustering()

In [16]:
integration = Cluster_Integration('brain3ad_pool_1.h5ad','brain3wt_pool_1.h5ad')

#integration.integrate()
#integration.plot_umap_cluster('brain3ad',size=60)
#integration.plot_umap_cluster('brain3wt',size=60)
#integration.plot_overlap_umap(show=False)

In [91]:
for i in range(17):
    integration.plot_select_cluster('brain3wt',cluster=i, size=60)

In [17]:
for i in range(17):
    integration.plot_select_cluster('brain3ad',cluster=i, size=60)

# Markers

In [2]:
adata1 = sc.read('../results/clustering/brain1wt_integrated.h5ad')
adata_filtered1 = adata_filter(adata1,0.1,0.1)

adata2 = sc.read('../results/clustering/brain1ad_integrated.h5ad')
adata_filtered2 = adata_filter(adata2,0.1,0.1)

In [3]:
markers = Markers('brain1')

markers.findmarkers(adata=adata_filtered1,adata2=adata_filtered2,adj_pval_cutoff=0.05,top=50)

markers.circular_tree(top=5, show=False)

for i in range(21):
    markers.volcano_plot(i, show=False)

In [4]:
adata1 = sc.read('../results/clustering/brain2wt_integrated.h5ad')
adata_filtered1 = adata_filter(adata1,0.1,0.1)

adata2 = sc.read('../results/clustering/brain2ad_integrated.h5ad')
adata_filtered2 = adata_filter(adata2,0.1,0.1)

In [5]:
markers = Markers('brain2')

markers.findmarkers(adata=adata_filtered1,adata2=adata_filtered2,adj_pval_cutoff=0.05,top=50)

markers.circular_tree(top=5, show=False)

for i in range(16):
    markers.volcano_plot(i, show=False)

In [75]:
adata1 = sc.read('../results/clustering/brain3wt_integrated.h5ad')
adata_filtered1 = adata_filter(adata1,0.1,0.1)

adata2 = sc.read('../results/clustering/brain3ad_integrated.h5ad')
adata_filtered2 = adata_filter(adata2,0.1,0.1)

In [77]:
markers = Markers('brain3')

markers.findmarkers(adata=adata_filtered1,adata2=adata_filtered2,adj_pval_cutoff=0.05,top=50)

markers.circular_tree(top=5, show=False)

for i in range(17):
    markers.volcano_plot(i, show=False)

# Pathway

In [6]:
pathway = Pathway('brain3','metabolomics')

Metabolomics and Glycomics are combined for pathway enrichment analysis.


In [7]:
pathway.findpathway()

for i in range(17):
    pathway.plot_dot(cluster=i,scale=30,height=10,top=20,show=False)

    pathway.pathway_network(cluster=i,top=20,show=False)

In [11]:
pathway = Pathway('brain3','lipidomics')

In [12]:
pathway.findpathway()

for i in range(17):
    pathway.plot_dot(cluster=i,scale=0.4,height=10,top=20,show=False)

[1] "MetaboAnalyst R objects initialized ..."
[1] "1"                                                                              
[2] "Name matching OK, please inspect (and manual correct) the results then proceed."
[1] "MetaboAnalyst R objects initialized ..."
[1] "1"                                                                              
[2] "Name matching OK, please inspect (and manual correct) the results then proceed."
[1] "MetaboAnalyst R objects initialized ..."
[1] "1"                                                                              
[2] "Name matching OK, please inspect (and manual correct) the results then proceed."
[1] "MetaboAnalyst R objects initialized ..."
[1] "1"                                                                              
[2] "Name matching OK, please inspect (and manual correct) the results then proceed."
[1] "MetaboAnalyst R objects initialized ..."
[1] "1"                                                                           