# Importing Transcripts and Queries with `TranscriptMapper`

In [1]:
from nvta.transcript_utils import TranscriptMapper

### Example Inputs

In [2]:
fileTranscriptInput = "./tests/resources/example_transcript_input.tsv"
fileQueryInput = "./tests/resources/example_query.tsv"
outputFile = "./test_result.tsv"

### Importing Transcripts and Queries

In [3]:
transcriptMapper = TranscriptMapper()

transcriptMapper.import_transcripts(fileTranscriptInput)

transcriptMapper.import_queries(fileQueryInput)

### Inspecting Imported Information

In [4]:
# Retrieve all queries
transcriptMapper.get_queries()

[{'name': 'TR1', 'queryPos': 4},
 {'name': 'TR2', 'queryPos': 0},
 {'name': 'TR3', 'queryPos': 0},
 {'name': 'TR1', 'queryPos': 13},
 {'name': 'TR2', 'queryPos': 10},
 {'name': 'TR3', 'queryPos': 9}]

In [5]:
# Retrive a single query
transcriptMapper.get_queries(index = 1)

{'name': 'TR2', 'queryPos': 0}

In [6]:
# Retrieve all transcripts
transcriptMapper.get_transcripts()

{'TR1': <nvta.transcript_utils.Transcript at 0x103c48ac8>,
 'TR2': <nvta.transcript_utils.Transcript at 0x103c48d30>,
 'TR3': <nvta.transcript_utils.Transcript at 0x103c48f60>}

In [7]:
# Retrieve a single transcript
transcriptMapper.get_transcripts(name = "TR3").get_info()

{'name': 'TR3',
 'chrom': 'CHR1',
 'startPos': 43,
 'cigar': '8M7D6M2I2M11D7M',
 'direction': '-'}

### Running Queries and Inspecting Results

#### Running a custom query (results are not saved to object)

In [8]:
transcriptMapper.run_single_query(name="TR3", queryPos=8)

{'name': 'TR3', 'inputPos': 8, 'chrom': 'CHR1', 'refPos': 24, 'direction': '-'}

#### Running all imported queries (results are saved to object)

In [9]:
transcriptMapper.run_all_queries()

#### Inspect results

In [10]:
# Inspect a single query result
transcriptMapper.get_query_results(index = 1)

{'name': 'TR2', 'inputPos': 0, 'chrom': 'CHR2', 'refPos': 10, 'direction': '+'}

In [11]:
# Get all query results
transcriptMapper.get_query_results()

[{'name': 'TR1',
  'inputPos': 4,
  'chrom': 'CHR1',
  'refPos': 7,
  'direction': '+'},
 {'name': 'TR2',
  'inputPos': 0,
  'chrom': 'CHR2',
  'refPos': 10,
  'direction': '+'},
 {'name': 'TR3',
  'inputPos': 0,
  'chrom': 'CHR1',
  'refPos': 43,
  'direction': '-'},
 {'name': 'TR1',
  'inputPos': 13,
  'chrom': 'CHR1',
  'refPos': 23,
  'direction': '+'},
 {'name': 'TR2',
  'inputPos': 10,
  'chrom': 'CHR2',
  'refPos': 20,
  'direction': '+'},
 {'name': 'TR3',
  'inputPos': 9,
  'chrom': 'CHR1',
  'refPos': 24.1,
  'direction': '-'}]

#### Exporting all query results to an output file

In [12]:
transcriptMapper.export_query_results(outputFile = outputFile)

# Using `Transcript` for Flexibility

### Initiating a single Transcript object with user supplied information

In [13]:
from nvta.transcript_utils import Transcript

In [14]:
singleTranscript = Transcript(name = "TR1", 
                              chrom = "CHR1", 
                              startPos = 3, 
                              cigar = "8M7D6M2I2M11D7M", 
                              direction = "+")

In [15]:
singleTranscript.get_info()

{'name': 'TR1',
 'chrom': 'CHR1',
 'startPos': 3,
 'cigar': '8M7D6M2I2M11D7M',
 'direction': '+'}

### Translating transcript coordinates with a single transcript

In [16]:
singleTranscript.translate_coordinates(0)

{'name': 'TR1', 'inputPos': 0, 'chrom': 'CHR1', 'refPos': 3, 'direction': '+'}

In [17]:
singleTranscript.translate_coordinates(9)

{'name': 'TR1', 'inputPos': 9, 'chrom': 'CHR1', 'refPos': 19, 'direction': '+'}

In [18]:
singleTranscript.translate_coordinates(14)

{'name': 'TR1',
 'inputPos': 14,
 'chrom': 'CHR1',
 'refPos': 23.1,
 'direction': '+'}

In [19]:
singleTranscript.translate_coordinates(15)

{'name': 'TR1',
 'inputPos': 15,
 'chrom': 'CHR1',
 'refPos': 23.2,
 'direction': '+'}

In [20]:
singleTranscript.translate_coordinates(24)

{'name': 'TR1',
 'inputPos': 24,
 'chrom': 'CHR1',
 'refPos': 43,
 'direction': '+'}

### Utility function to verify CIGAR 

#### Valid CIGAR String

In [21]:
Transcript.verify_cigar("8M10D20I")

'8M10D20I'

#### Invalid CIGAR Strings

In [22]:
Transcript.verify_cigar("8MM2D")

Parsed information does not match original CIGAR, potentially malformatted CIGAR string.
Input CIGAR = 8MM2D
Parsed CIGAR = 8M2D


ValueError: Malformatted CIGAR string

In [23]:
Transcript.verify_cigar("8M2D10U")

Invalid CIGAR string character detected U


ValueError: Invalid CIGAR string character

#### Default behavior is to raise an error when H, S, or P operations are encountered

In [24]:
Transcript.verify_cigar("8M8H")

CIGAR parser does not support logic for H


ValueError: Unsupported CIGAR character

#### For verifying CIGARs that are not going to be used for coordinate translation, H, S, and P operations can be ignored

In [25]:
Transcript.verify_cigar("8M8H",ignoreHSP = True)

'8M8H'