In [39]:
import numpy as np

import shephard
from shephard import Proteome
from shephard.tools import domain_tools

In [40]:
# define empty proteome
new_prot = Proteome([])

# add a example protein
seq = 'NQGSNMGGGMNFGAFSINPAMMAAAQAALQSSWGMMGMLASQQNQSGPSGNNQNQGNMQREPNQAFGSGNNSYSGSNSGAAIGWGSASNAGSGSGFNGGFGSSMDSKSSGWGM'
new_prot.add_protein(seq, 'seq1', 'seq_001')

In [41]:
# add several domains
protein = new_prot.protein('seq_001')

protein.add_domain(1,5,'N-Terminus')
protein.add_domain(23,52,'target_region')
protein.add_domain(45,101,'other_region')
protein.add_domain(100,113,'C-Terminus')

print(protein.domain_names)

['N-Terminus_1_5', 'target_region_23_52', 'other_region_45_101', 'C-Terminus_100_113']


#### evaluate for domain overlap

In [49]:
## USE BUILD IN DOMAIN FUNCTION ##

# overlap between C-terminus and target_region
result1 = protein.domain('target_region_23_52').domain_overlap(protein.domain('C-Terminus_100_113'))
print('overlap of domains  C-terminus  and target_region:', result1)

# overlap between C-terminus and target_region
result2 = protein.domain('other_region_45_101').domain_overlap(protein.domain('target_region_23_52'))
print('overlap of domains other_region and target_region:', result2)

overlap of domains  C-terminus  and target_region: False
overlap of domains other_region and target_region: True


In [53]:
## USE DOMAIN TOOLS DIRECTLY ##

## by position ##

domain1 = protein.domain('target_region_23_52')
domain2 = protein.domain('C-Terminus_100_113')
domain3 = protein.domain('other_region_45_101')

# overlap between C-terminus and target_region
result1 = domain_tools.domain_overlap_by_position(domain1.start, domain1.end, domain2.start, domain2.end)
print('overlap of domains  C-terminus  and target_region:', result1)

# overlap between C-terminus and target_region
result2 = domain_tools.domain_overlap_by_position(domain3.start, domain3.end, domain1.start, domain1.end)
print('overlap of domains other_region and target_region:', result2)

## by overlap fraction ##

domain1 = protein.domain('target_region_23_52')
domain2 = protein.domain('C-Terminus_100_113')
domain3 = protein.domain('other_region_45_101')

# overlap between C-terminus and target_region
result1 = domain_tools.domain_overlap_fraction(domain1, domain2)
print('overlap fraction of domains  C-terminus  and target_region:', result1)

# overlap between C-terminus and target_region
result2 = domain_tools.domain_overlap_fraction(domain3, domain1)
print('overlap fraction of domains other_region and target_region:', result2)

overlap of domains  C-terminus  and target_region: False
overlap of domains other_region and target_region: True
overlap fraction of domains  C-terminus  and target_region: 0.0
overlap fraction of domains other_region and target_region: -0.23333333333333334
