# Introduction to Dochap-Tool
## Abstract:
* Simple tool created for comparing exons and domains between transcripts

## Requeirments:
* Python 3.6

## Installation
```
git clone https://github.com/nitzanel/dochap_tool.git
cd dochap_tool
python setup.py install --user
```

In [None]:
# coloring json for ease on the eyes
from pygments import highlight, lexers, formatters
import json
def format_and_color(param):
    formatted_json = json.dumps(param,indent=4)
    colorful_json = highlight(formatted_json, lexers.JsonLexer(), formatters.TerminalFormatter())
    return colorful_json

# Querying the database

## Getting transcript ids of gene symbols

In [None]:
from dochap_tool.common_utils import utils
from dochap_tool.compare_utils import compare_exons
conn = utils.get_connection_object('data','Mus_musculus')
with conn:
    t_ids1 = compare_exons.get_transcript_ids_of_gene_symbol(conn,'Xkr5')
    t_ids2 = compare_exons.get_transcript_ids_of_gene_symbol(conn,'Xkr4')
    t_ids3 = compare_exons.get_transcript_ids_of_gene_symbol(conn,'noactualgene')
    print('t_ids1:',format_and_color(t_ids1))
    print('t_ids2:',format_and_color(t_ids2))
    print('t_ids3:',format_and_color(t_ids3))

## Getting exons from the database

### By transcript id

In [None]:
exons = compare_exons.get_exons_from_transcript_id('data','Mus_musculus','uc009kzx.3')
print (format_and_color(exons))

### By gene symbol

In [None]:
exons = compare_exons.get_exons_from_gene_symbol('data','Mus_musculus','Xkr5')
print (format_and_color(exons))

## Getting domains from the database

In [None]:
domains = compare_exons.get_domains_of_gene_symbol('data','Mus_musculus','Xkr5')
print(format_and_color(domains))

## Getting intersections between exons and domains

In [None]:
gene_symbol = 'Tlr7'
exons_variants = compare_exons.get_exons_from_gene_symbol('data','Mus_musculus',gene_symbol)
domains_variants = compare_exons.get_domains_of_gene_symbol('data','Mus_musculus',gene_symbol)
print(f'intersections by transcript ids and domain variations for gene symbol {gene_symbol}:\n')
if not exons_variants:
    print(f'no exons for {gene_symbol}')
if not domains_variants:
    print(f'no domains for {gene_symbol}')
for transcript_id, exons_variant in exons_variants.items():
    for domains_variant_index,domains_variant in enumerate(domains_variants):
        intersections = compare_exons.get_domains_intersections_in_exons(domains_variant,exons_variant)
        print(f'transcript_id:{transcript_id}, domain_variant:{domains_variant_index}\n',format_and_color(intersections))

# Parsing gtf files

In [None]:
from dochap_tool.gtf_utils import parser as gtf_parser
transcripts = gtf_parser.parse_gtf_file('gtf_testing/test.gtf')
items = [(key,value) for key,value in transcripts.items()]
sub_items = {key:value for (key,value) in items[10:20]}
print(format_and_color(sub_items))

# Displaying data and comparing

## Drawing exons and domains

In [None]:
from dochap_tool.draw_utils import draw_tool
from IPython.core.display import SVG,display
gene_symbol='rxrg'
exons_variants = compare_exons.get_exons_from_gene_symbol('data','Mus_musculus',gene_symbol)
other_exons = gtf_parser.parse_gtf_file('gtf_testing/transcripts.gtf')
other_exons = gtf_parser.get_transcripts_by_gene_symbol(other_exons,gene_symbol)
domains_variants = compare_exons.get_domains_of_gene_symbol('data','Mus_musculus',gene_symbol)
for t_id in exons_variants:
    if t_id in other_exons:
        t_id_text = f'user transcript id:{t_id}'
        display(SVG(data=draw_tool.draw_exons(other_exons[t_id],t_id_text)))
        del other_exons[t_id]
    t_id_text = f'transcript id: {t_id}'
    display(SVG(data=draw_tool.draw_exons(exons_variants[t_id],t_id_text)))
    for index,domain_variant in enumerate(domains_variants):
        variant_text = f'domain variant: {index+1}'
        display(SVG(data=draw_tool.draw_domains(domain_variant,variant_text)))
for exon_variant in other_exons:
    t_id_text = f'user transcript id:{exon_variant}'
    display(SVG(data=draw_tool.draw_exons(other_exons[exon_variant],t_id_text)))

# Downloading species data

## Downloading from ncbi

In [None]:
from dochap_tool.ncbi_utils import downloader as ncbi_downloader
ncbi_downloader.download_specie_from_ncbi('data','Mus_musculus')

### Updating data from ncbi

In [None]:
from dochap_tool.ncbi_utils import updater
updater.check_for_updates('data')

## Downloading from ucsc

In [None]:
from dochap_tool.ucsc_utils import downloader as ucsc_downloader
ucsc_downloader.download_specie_from_ucsc('data','Mus_musculus')

# Creating the database

In [None]:
# create Mus_musculus database for testing functions
from dochap_tool.db_utils import create_db
# create_db needs a folder for storing the db files, and a specie to create a db for.
create_db.create_db('data','Mus_musculus')