In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy import stats

from q2_gglasso._func import robust_PCA, remove_biom_header

In [None]:
!python setup.py install

In [None]:
!qiime dev refresh-cache

### Import data

In [None]:
!qiime tools import \
    --input-path example/data/238_otu_table.biom \
    --output-path example/data/88soils.biom.qza \
    --type FeatureTable[Frequency]

### Select top-100 OTUs

In [None]:
!qiime feature-table filter-features \
    --i-table example/data/88soils.biom.qza \
    --o-filtered-table example/data/88soils_filt100.biom.qza \
    --p-min-frequency 100

### Change zeros to 1

In [None]:
!qiime composition add-pseudocount \
                    --i-table example/data/88soils_filt100.biom.qza \
                    --p-pseudocount 1 \
                    --o-composition-table example/data/88soils_composition.biom.qza

### Make data compositional and transform it with CLR

In [None]:
!qiime gglasso transform-features \
     --p-transformation clr \
     --i-table example/data/88soils_composition.biom.qza \
     --o-transformed-table example/data/88soils_clr.biom.qza

### Export clr-transformed data

In [None]:
!qiime tools export \
  --input-path example/data/88soils_clr.biom.qza \
  --output-path example/data/test_soil/88soils_clr

In [None]:
!biom convert -i example/data/test_soil/88soils_clr/feature-table.biom -o example/data/test_soil/88soils_clr/clr_feature-table.tsv --to-tsv
remove_biom_header(file_path="example/data/test_soil/88soils_clr/clr_feature-table.tsv")

### Calculate covariance matrix (scaled)

In [None]:
#correlation
!qiime gglasso calculate-covariance \
     --p-method scaled \
     --i-table example/data/88soils_clr.biom.qza \
     --o-covariance-matrix example/data/88soils_corr.qza

### Estimate low-rank solution with optimal lambda

In [None]:
!qiime gglasso solve-problem \
     --p-lambda1 0.22758 \
     --i-covariance-matrix example/data/88soils_corr.qza \
     --o-inverse-covariance-matrix example/data/88soils_low.qza

### Export low-rank solution

In [None]:
!qiime tools export \
  --input-path example/data/88soils_low.qza \
  --output-path example/data/test_soil/88soils_low

### Metadata

In [None]:
mapping = pd.read_table('example/data/88soils_metadata.txt', index_col=0)
mapping['ph_rounded'] = mapping.ph.apply(int)

#original clr-transformed data
df = pd.read_csv(str("example/data/test_soil/88soils_clr/clr_feature-table.tsv"), index_col=0, sep='\t')

ph = mapping['ph'].reindex(df.index)
temperature = mapping["annual_season_temp"].reindex(ph.index)

depth = df.sum(axis=1)

### robust PCA

In [None]:
### low-rank solution
L = pd.read_csv(str("example/data/test_soil/88soils_low/pairwise_comparisons.tsv"), index_col=0, sep='\t')

proj, loadings, eigv = robust_PCA(df, L, inverse=True)
r = np.linalg.matrix_rank(L)

### Plot pH

In [None]:
fig, ax = plt.subplots(1,1)
im = ax.scatter(proj[:,0], ph, c = depth, cmap = plt.cm.Blues, vmin = 0)
cbar = fig.colorbar(im)
cbar.set_label("Sampling depth")
ax.set_xlabel(f"PCA component 1 with eigenvalue {eigv[0]}")
ax.set_ylabel("pH")
plt.savefig('example/data/ph.png')

In [None]:
print("Spearman correlation between pH and 1st component: {0}, p-value: {1}".format(stats.spearmanr(ph, proj[:,0])[0],
                                                                              stats.spearmanr(ph, proj[:,0])[1]))

### Plot temperature

In [None]:
fig, ax = plt.subplots(1,1)
im = ax.scatter(proj[:,1], temperature, c = depth, cmap = plt.cm.Blues, vmin = 0)
cbar = fig.colorbar(im)
cbar.set_label("Sampling depth")
ax.set_xlabel(f"PCA component 2 with eigenvalue {eigv[1]}")
ax.set_ylabel("Temperature")
plt.savefig('example/data/temp.png')

In [None]:
print("Spearman correlation between temperature and 2nd component: {0}, p-value: {1}".format(stats.spearmanr(temperature, proj[:,1])[0],
                                                                              stats.spearmanr(temperature, proj[:,1])[1]))