### 1. Import required packages

In [1]:
import scanpy as sc
import anndata as ad

### 2. Load in the data

In [2]:
# Load the data
adata = ad.read_h5ad(
    "/home/fotakis/myScratch/sc_results/10X_BD/Prostate_RAW.h5ad"
)

### 3. Subset the dataset for all cells sequenced with 10X Chromium platform
#### 3.1. Define the cell IDs we need to keep (10X)

In [3]:
cell_ids = (
    adata.obs
         .loc[lambda x: x["platform"] == "10X"]
    ).index
adata[cell_ids, :]

View of AnnData object with n_obs × n_vars = 29197 × 19756
    obs: 'report', 'platform', 'patient', 'sample', 'Batch', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt'
    var: 'gene_ids-0', 'feature_types-0', 'gene_ids-1', 'feature_types-1', 'gene_ids-2', 'feature_types-2', 'gene_ids-3', 'feature_types-3', 'gene_ids-4', 'feature_types-4', 'gene_ids-5', 'feature_types-5', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
    uns: 'platform_colors', 'sample_colors'

#### 3.2.Copy the subseted 10X anndata object to a new one

In [4]:
chromium_adata = adata[cell_ids, :].copy()

#### 3.3. Save the (transposed) 10X counts matrix to a CSV file for downstream analysis

In [None]:
chromium_adata.T.to_df().to_csv('./tables/10X_counts_matrix.csv')

### 4. Subsample the 10X dataset to 18000 cells

In [None]:
chromium_adata_subsam = chromium_adata.copy()
target_cells = 18000
sc.pp.subsample(chromium_adata_subsam, n_obs=target_cells)

#### 4.1. Save the subsampled (transposed) 10X counts matrix to CSV file for downstream analysis

In [None]:
chromium_adata_subsam.T.to_df().to_csv('./tables/10X_counts_matrix_subsampled.csv')

### 5. Subset the dataset for all cells sequenced with the BD Rhapsody platform
#### 5.1. Define the cell IDs we need to keep (BD)

In [None]:
cell_ids2 = (
    adata.obs
         .loc[lambda x: x["platform"] == "BD"]
    ).index
adata[cell_ids2, :]

#### 5.2. Copy the subseted BD anndata object to a new one

In [None]:
bd_adata = adata[cell_ids2, :].copy()

#### 5.3. Save the (transposed) BD counts matrix to a CSV file for downstream analysis

In [None]:
bd_adata.T.to_df().to_csv('./tables/BD_counts_matrix.csv')

### 6. Subsample the BD dataset to 18000 cells

In [None]:
bd_adata_subsam = bd_adata.copy()
target_cells = 18000
sc.pp.subsample(bd_adata_subsam, n_obs=target_cells)

#### 6.1. Save the subsampled (transposed) BD counts matrix to CSV file for downstream analysis

In [None]:
bd_adata_subsam.T.to_df().to_csv('./tables/BD_counts_matrix_subsampled.csv')