In [1]:
#| hide



![LongReadTools](https://raw.githubusercontent.com/cobioda/longreadtools/master/longreadtools/white_bg_log_hd.png)


## Install

```sh
pip install git+https://github.com/cobioda/longreadtools.git

```

## How to use

Here we will use the `isomatrix_tools` module to convert our isomatrix
txt files in bulk into `anndata` objects using the
[`multiple_isomatrix_conversion`](https://cobioda.github.io/longreadtools/isomatrix_tools.html#multiple_isomatrix_conversion)
function . Then, we will utilize our specialized
[`concatenate_anndata`](https://cobioda.github.io/longreadtools/isomatrix_tools.html#concatenate_anndata)
function to generate a concatenated `anndata` for downstream analysis.

First lets get the list of isomatrix files we want to convert.

In [2]:
# Importing required libraries
import os
import re

# Defining the directory path
directory = '/data/analysis/data_mcandrew/000-sclr-discovair/'

# Defining the regular expression pattern to match the required files
pattern = re.compile('.*(_BIOP_INT|BIOP_NAS)$')

# Getting a list of all files in the directory
all_files = os.listdir(directory)

# Filtering the list to include only files that match the pattern
matching_files = [os.path.join(directory, f) for f in all_files if pattern.match(f)]

# Printing the list of matching files
print(matching_files)

# Assigning the list of matching files to the variable 'individual_runs'
individual_runs = matching_files

# Adding '_isomatrix.txt' to each file name in the 'individual_runs' list
individual_runs = [f'{run}_isomatrix.txt' for run in individual_runs]

# Creating a list of paths for each isomatrix file in the 'matching_files' list
isomatrix_paths = [os.path.join(run, f'{os.path.basename(run)}_isomatrix.txt') for run in matching_files]

['/data/analysis/data_mcandrew/000-sclr-discovair/D498_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D492_BIOP_NAS', '/data/analysis/data_mcandrew/000-sclr-discovair/D494_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D500_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D494_BIOP_NAS', '/data/analysis/data_mcandrew/000-sclr-discovair/D496_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D499_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D493_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D493_BIOP_NAS', '/data/analysis/data_mcandrew/000-sclr-discovair/D534_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D490_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D500_BIOP_NAS', '/data/analysis/data_mcandrew/000-sclr-discovair/D495_BIOP_INT', '/data/analysis/data_mcandrew/000-sclr-discovair/D492_BIOP_INT']


Lets import isomatool and convert them to Anndata objects 

In [3]:
from longreadtools.isomatool import *
import scanpy as sc

In [4]:
converted_isomatrix_paths = multiple_isomatrix_conversion(isomatrix_paths, verbose=True, return_paths = True)

File /data/analysis/data_mcandrew/000-sclr-discovair/D498_BIOP_INT/D498_BIOP_INT_isomatrix.h5ad was successfully written to disk.
File /data/analysis/data_mcandrew/000-sclr-discovair/D500_BIOP_NAS/D500_BIOP_NAS_isomatrix.h5ad was successfully written to disk.
File /data/analysis/data_mcandrew/000-sclr-discovair/D500_BIOP_INT/D500_BIOP_INT_isomatrix.h5ad was successfully written to disk.
File /data/analysis/data_mcandrew/000-sclr-discovair/D493_BIOP_NAS/D493_BIOP_NAS_isomatrix.h5ad was successfully written to disk.
File /data/analysis/data_mcandrew/000-sclr-discovair/D494_BIOP_NAS/D494_BIOP_NAS_isomatrix.h5ad was successfully written to disk.
File /data/analysis/data_mcandrew/000-sclr-discovair/D493_BIOP_INT/D493_BIOP_INT_isomatrix.h5ad was successfully written to disk.


In [None]:
andata_concat = concatenate_anndata(converted_isomatrix_paths, verbose = True)

Reading .h5ad files...
Applying feature set standardization...


Standardizing anndata features via union: 100%|██████████| 14/14 [01:04<00:00,  4.61s/it]


Concatenating AnnData objects and adding batch keys with scanpy...


  utils.warn_names_duplicates("obs")
  np.array(self.categories._na_value).astype(dtype)


Setting .var attribute...
Final Check...


  if pd.api.types.is_categorical_dtype(df[col]):
  if pd.api.types.is_categorical_dtype(df[col]):
  if pd.api.types.is_categorical_dtype(df[col]):
  if pd.api.types.is_categorical_dtype(df[col]):


NameError: name 'issparse' is not defined

In [None]:
andata_concat.X

In [None]:
andata_concat.var

In [None]:
andata_concat.obs

In [None]:
andata_concat.write_h5ad('discovair_long_read_transcript_matrix.h5ad')

In [None]:
isoform_anndata_from_long_reads = sc.read_h5ad("discovair_long_read_transcript_matrix.h5ad")
gene_anndata_from_short_reads = sc.read_h5ad("/data/analysis/data_mcandrew/000-sclr-discovair/integrated_V10.h5ad")

In [None]:
isoform_anndata_from_long_reads

In [None]:
gene_anndata_from_short_reads

In [None]:
gene_anndata_from_short_reads.obs

In [None]:
from longreadtools.Standardization import *
isoform_matrix = subset_common_cells(isoform_anndata_from_long_reads, gene_anndata_from_short_reads)

In [None]:
gene_matrtrix  = subset_common_cells(gene_anndata_from_short_reads, isoform_matrix)

In [None]:
annotated_isoform_matrix = transfer_obs(gene_matrtrix, isoform_matrix)