# MHC allele protein sequence provider

- Protein fasta download URL

In [24]:
%run 'commons.ipynb'
%run 'mhcinfo.ipynb'

import os
import urllib.request
import ssl
from IPython.core.debugger import Tracer
 
class MHCAlleleSeqProvider(object):
    class FastaSeqLoader(FastaSeqParseListener):
        def __init__(self, seq_provider):
            self._seq_provider = seq_provider
            
        def on_seq_read(self, header=None, seq=None):
            allele_name = self._get_allele_name(header)  
#             print('on_seq_read: header:%s, allele:%s, seq:%s' % (header, allele_name, seq))
            self._seq_provider._set_allele_seq(allele_name, seq)
            
        def _get_allele_name(self, header):
#             Tracer()()
            allele_name = None
            if header.startswith('HLA'):
                tokens = header.split()
                allele_name = 'HLA-' + tokens[1].strip()
            else:
                tokens = header.split(',')
                allele_name = tokens[1].strip()
            
            allele_name = MHCAlleleName.std_name(allele_name)
            return MHCAlleleName.parse(allele_name).format()
                        
    
    SPECIES_FASTA_BASEURL_MAP ={
        'Patr': 'https://www.ebi.ac.uk/ipd/mhc/group/NHP/download/Patr',
        'Mamu': 'https://www.ebi.ac.uk/ipd/mhc/group/NHP/download/Mamu',
        'Gogo': 'https://www.ebi.ac.uk/ipd/mhc/group/NHP/download/Mamu',
        'Eqca': 'https://www.ebi.ac.uk/ipd/mhc/group/ELA/download/Eqca',
        'SLA': 'https://www.ebi.ac.uk/ipd/mhc/group/SLA/download/Susc',
        'BoLA': 'https://www.ebi.ac.uk/ipd/mhc/group/BoLA/download/BoLA',
        'Rano': 'https://www.ebi.ac.uk/ipd/mhc/group/RT1/download/Rano'
    }
    
    # Class variable
    _allele_seq_map = {}
            
    def protein_seq(self, allele_name):
        if allele_name not in self._allele_seq_map:
            allele = MHCAlleleName.parse(allele_name)
            self._load_protein_fasta_by_gene(species=allele.species, gene=allele.gene)
        seq = self._allele_seq_map[allele_name]
        return seq
    
            
    def _load_protein_fasta_by_gene(self, species, gene):
        fn = 'datasets/mhcinfo/prot/%s/%s.fa' % (species, gene)
        url = self._get_fasta_url(species, gene)

        if not os.path.exists(fn):
            print('Downloading from %s' % url)
            if url is None:
                raise ValueError('Undefined URL for %s, %s' % (species, gene))
            # Download and save as the file
            RemoteUtils.download_to(url, decode='utf-8', fnout=fn)
            
        parser = FastaSeqParser()
        parser.add_parse_listener(MHCAlleleSeqProvider.FastaSeqLoader(self))
        
        with open(fn, 'r') as fin:
            print('Loading from file %s' % fn)
            parser.parse(fin)
    
    def _set_allele_seq(self, allele, seq):
        if allele not in self._allele_seq_map:
            self._allele_seq_map[allele] = seq

    def _get_fasta_url(self, species, gene):
        url = None
        if species in self.SPECIES_FASTA_BASEURL_MAP:
            baseurl = self.SPECIES_FASTA_BASEURL_MAP[species]
            url = '%s/%s?type=protein' % (baseurl, gene)
        return url
        
###################################################
from unittest import *

class MHCAlleleSeqProviderTest(TestCase):
    def setUp(self):
        self.classI_alleles = [
            'BoLA-1*023:01', 'BoLA-2*012:01', 'BoLA-3*001:01', 'BoLA-3*002:01',
            'BoLA-6*013:01', 'BoLA-6*041:01', 'BoLA-T2c', 'H2-Db', 'H2-Dd',
            'H2-Kb', 'H2-Kd', 'H2-Kk', 'H2-Ld', 'HLA-A*01:01', 'HLA-A*02:01',
            'HLA-A*02:02', 'HLA-A*02:03', 'HLA-A*02:05', 'HLA-A*02:06',
            'HLA-A*02:07', 'HLA-A*02:11', 'HLA-A*02:12', 'HLA-A*02:16',
            'HLA-A*02:17', 'HLA-A*02:19', 'HLA-A*02:50', 'HLA-A*03:01',
            'HLA-A*03:19', 'HLA-A*11:01', 'HLA-A*23:01', 'HLA-A*24:02',
            'HLA-A*24:03', 'HLA-A*25:01', 'HLA-A*26:01', 'HLA-A*26:02',
            'HLA-A*26:03', 'HLA-A*29:02', 'HLA-A*30:01', 'HLA-A*30:02',
            'HLA-A*31:01', 'HLA-A*32:01', 'HLA-A*32:07', 'HLA-A*32:15',
            'HLA-A*33:01', 'HLA-A*66:01', 'HLA-A*68:01', 'HLA-A*68:02',
            'HLA-A*68:23', 'HLA-A*69:01', 'HLA-A*80:01', 'HLA-B*07:02',
            'HLA-B*08:01', 'HLA-B*08:02', 'HLA-B*08:03', 'HLA-B*14:01',
            'HLA-B*14:02', 'HLA-B*15:01', 'HLA-B*15:02', 'HLA-B*15:03',
            'HLA-B*15:09', 'HLA-B*15:17', 'HLA-B*18:01', 'HLA-B*27:05',
            'HLA-B*27:20', 'HLA-B*35:01', 'HLA-B*35:03', 'HLA-B*37:01',
            'HLA-B*38:01', 'HLA-B*39:01', 'HLA-B*40:01', 'HLA-B*40:02',
            'HLA-B*40:13', 'HLA-B*42:01', 'HLA-B*44:02', 'HLA-B*44:03',
            'HLA-B*45:01', 'HLA-B*46:01', 'HLA-B*48:01', 'HLA-B*51:01',
            'HLA-B*53:01', 'HLA-B*54:01', 'HLA-B*57:01', 'HLA-B*57:03',
            'HLA-B*58:01', 'HLA-B*58:02', 'HLA-B*73:01', 'HLA-B*81:01',
            'HLA-B*83:01', 'HLA-C*03:03', 'HLA-C*04:01', 'HLA-C*05:01',
            'HLA-C*06:02', 'HLA-C*07:01', 'HLA-C*07:02', 'HLA-C*08:02',
            'HLA-C*12:03', 'HLA-C*14:02', 'HLA-C*15:02', 'HLA-E*01:01',
            'HLA-E*01:03', 'Mamu-A1*001:01', 'Mamu-A1*002:01', 'Mamu-A1*007:01',
            'Mamu-A1*011:01', 'Mamu-A1*022:01', 'Mamu-A1*026:01',
            'Mamu-A2*01:02', 'Mamu-A7*01:03', 'Mamu-B*001:01', 'Mamu-B*003:01',
            'Mamu-B*008:01', 'Mamu-B*010:01', 'Mamu-B*017:01', 'Mamu-B*039:01',
            'Mamu-B*052:01', 'Mamu-B*066:01', 'Mamu-B*084:01', 'Mamu-B*087:01',
            'Patr-A*01:01', 'Patr-A*03:01', 'Patr-A*04:01', 'Patr-A*07:01',
            'Patr-A*09:01', 'Patr-B*01:01', 'Patr-B*13:01', 'Patr-B*24:01',
            'Rano-A1*b', 'SLA-1*04:01', 'SLA-1*07:01', 'SLA-2*04:01','SLA-3*04:01']
        
        self.classII_alleles = [
            'HLA-DPA1*01:03/DPB1*02:01', 'HLA-DPA1*01:03/DPB1*04:01', 'HLA-DPA1*02:01/DPB1*01:01', 
            'HLA-DPA1*02:01/DPB1*05:01', 'HLA-DPA1*03:01/DPB1*04:02', 'HLA-DQA1*01:01/DQB1*05:01', 
            'HLA-DQA1*01:02/DQB1*06:02', 'HLA-DQA1*03:01/DQB1*03:02', 'HLA-DQA1*04:01/DQB1*04:02', 
            'HLA-DQA1*05:01/DQB1*02:01', 'HLA-DQA1*05:01/DQB1*03:01', 'HLA-DRB1*01:01', 'HLA-DRB1*03:01', 
            'HLA-DRB1*03:02', 'HLA-DRB1*04:01', 'HLA-DRB1*04:04', 'HLA-DRB1*04:05', 'HLA-DRB1*07:01', 
            'HLA-DRB1*08:02', 'HLA-DRB1*08:06', 'HLA-DRB1*08:13', 'HLA-DRB1*08:19', 'HLA-DRB1*09:01', 
            'HLA-DRB1*11:01', 'HLA-DRB1*12:01', 'HLA-DRB1*12:02', 'HLA-DRB1*13:02', 'HLA-DRB1*14:02', 
            'HLA-DRB1*14:04', 'HLA-DRB1*14:12', 'HLA-DRB1*15:01', 'HLA-DRB3*01:01', 'HLA-DRB3*03:01', 
            'HLA-DRB4*01:01', 'HLA-DRB5*01:01', 'H2-IAb', 'H2-IAd']        
    
    def test_protein_seq(self):
        seq_provider = MHCAlleleSeqProvider()
        for i, old_name in enumerate(self.classI_alleles):
            std_name = MHCAlleleName.std_name(old_name)
            print('Protein seq for %s:' % (std_name))
            seq = seq_provider.protein_seq(std_name)
            self.assertTrue(seq is not None)
#             print('Seq: %s' % seq)
            self.assertTrue(SeqUtils.is_all_amino_acids(seq))
            
############
# suite = TestSuite()
# suite.addTests(TestLoader().loadTestsFromTestCase(MHCAlleleSeqProviderTest))
# TextTestRunner(verbosity=3).run(suite)

test_protein_seq (__main__.MHCAlleleSeqProviderTest) ... 

Protein seq for BoLA-1*023:01:
Loading from file datasets/mhcinfo/prot/BoLA/1.fa
Protein seq for BoLA-2*012:01:
Loading from file datasets/mhcinfo/prot/BoLA/2.fa
Protein seq for BoLA-3*001:01:
Loading from file datasets/mhcinfo/prot/BoLA/3.fa
Protein seq for BoLA-3*002:01:
Protein seq for BoLA-6*013:01:
Loading from file datasets/mhcinfo/prot/BoLA/6.fa
Protein seq for BoLA-6*041:01:
Protein seq for BoLA-T2c:
Loading from file datasets/mhcinfo/prot/BoLA/T2c.fa
Protein seq for H2-Db:
Loading from file datasets/mhcinfo/prot/H2/D.fa
Protein seq for H2-Dd:
Protein seq for H2-Kb:
Loading from file datasets/mhcinfo/prot/H2/K.fa
Protein seq for H2-Kd:
Protein seq for H2-Kk:
Protein seq for H2-Ld:
Loading from file datasets/mhcinfo/prot/H2/L.fa
Protein seq for HLA-A*01:01:
Loading from file datasets/mhcinfo/prot/HLA/A.fa
Protein seq for HLA-A*02:01:
Protein seq for HLA-A*02:02:
Protein seq for HLA-A*02:03:
Protein seq for HLA-A*02:05:
Protein seq for HLA-A*02:06:
Protein seq for HLA-A*02:07:
Pr

ok

----------------------------------------------------------------------
Ran 1 test in 0.253s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>

## Mouse MHC alleles
- Two mouse MHC-II alleles: H2-IAb and H2-IAd

<code>\>sp|P14434|HA2B_MOUSE H-2 class II histocompatibility antigen, A-B alpha chain OS=Mus musculus GN=H2-Aa PE=1 SV=2
MPRSRALILGVLALTTMLSLCGGEDDIEADHVGTYGISVYQSPGDIGQYTFEFDGDELFY
VDLDKKETVWMLPEFGQLASFDPQGGLQNIAVVKHNLGVLTKRSNSTPATNEAPQATVFP
KSPVLLGQPNTLICFVDNIFPPVINITWLRNSKSVADGVYETSFFVNRDYSFHKLSYLTF
IPSDDDIYDCKVEHWGLEEPVLKHWEPEIPAPMSELTETVVCALGLSVGLVGIVVGTIFI
IQGLRSGGTSRHPGPL
\>sp|P14483|HB2A_MOUSE H-2 class II histocompatibility antigen, A beta chain OS=Mus musculus GN=H2-Ab1 PE=1 SV=1
MALQIPSLLLSAAVVVLMVLSSPGTEGGDSERHFVYQFMGECYFTNGTQRIRYVTRYIYN
REEYVRYDSDVGEHRAVTELGRPDAEYWNSQPEILERTRAELDTVCRHNYEGPETHTSLR
RLEQPNVVISLSRTEALNHHNTLVCSVTDFYPAKIKVRWFRNGQEETVGVSSTQLIRNGD
WTFQVLVMLEMTPRRGEVYTCHVEHPSLKSPITVEWRAQSESAWSKMLSGIGGCVLGVIF
LGLGLFIRHRSQKGPRGPPPAGLLQ
\>sp|P04228|HA2D_MOUSE H-2 class II histocompatibility antigen, A-D alpha chain OS=Mus musculus GN=H2-Aa PE=1 SV=1
MPCSRALILGVLALNTMLSLCGGEDDIEADHVGFYGTTVYQSPGDIGQYTHEFDGDELFY
VDLDKKKTVWRLPEFGQLILFEPQGGLQNIAAEKHNLGILTKRSNFTPATNEAPQATVFP
KSPVLLGQPNTLICFVDNIFPPVINITWLRNSKSVTDGVYETSFLVNRDHSFHKLSYLTF
IPSDDDIYDCKVEHWGLEEPVLKHWEPEIPAPMSELTETVVCALGLSVGLVGIVVGTIFI
IQGLRSGGTSRHPGPL
\>sp|P01921|HB2D_MOUSE H-2 class II histocompatibility antigen, A-D beta chain OS=Mus musculus GN=H2-Ab1 PE=1 SV=1
MALQIPSLLLSAAVVVLMVLSSPRTEGGNSERHFVVQFKGECYYTNGTQRIRLVTRYIYN
REEYVRYDSDVGEYRAVTELGRPDAEYWNSQPEILERTRAEVDTACRHNYEGPETSTSLR
RLEQPNVAISLSRTEALNHHNTLVCSVTDFYPAKIKVRWFRNGQEETVGVSSTQLIRNGD
WTFQVLVMLEMTPHQGEVYTCHVEHPSLKSPITVEWRAQSESARSKMLSGIGGCVLGVIF
LGLGLFIRHRSQKGPRGPPPAGLLQ</code>

In [5]:
# MOUSE_ALLELE_SEQ_MAP = {
#     MHCAlleleName.H2_IAb: ('MPRSRALILGVLALTTMLSLCGGEDDIEADHVGTYGISVYQSPGDIGQYTFEFDGDELFYVDLDKKETVWMLPEFGQLASFDPQGGLQNIAVVKHNLGVLTKRSNSTPATNEAPQATVFPKSPVLLGQPNTLICFVDNIFPPVINITWLRNSKSVADGVYETSFFVNRDYSFHKLSYLTFIPSDDDIYDCKVEHWGLEEPVLKHWEPEIPAPMSELTETVVCALGLSVGLVGIVVGTIFIIQGLRSGGTSRHPGPL', 'MALQIPSLLLSAAVVVLMVLSSPGTEGGDSERHFVYQFMGECYFTNGTQRIRYVTRYIYNREEYVRYDSDVGEHRAVTELGRPDAEYWNSQPEILERTRAELDTVCRHNYEGPETHTSLRRLEQPNVVISLSRTEALNHHNTLVCSVTDFYPAKIKVRWFRNGQEETVGVSSTQLIRNGDWTFQVLVMLEMTPRRGEVYTCHVEHPSLKSPITVEWRAQSESAWSKMLSGIGGCVLGVIFLGLGLFIRHRSQKGPRGPPPAGLLQ'),
#     MHCAlleleName.H2_IAd: ('MPCSRALILGVLALNTMLSLCGGEDDIEADHVGFYGTTVYQSPGDIGQYTHEFDGDELFYVDLDKKKTVWRLPEFGQLILFEPQGGLQNIAAEKHNLGILTKRSNFTPATNEAPQATVFPKSPVLLGQPNTLICFVDNIFPPVINITWLRNSKSVTDGVYETSFLVNRDHSFHKLSYLTFIPSDDDIYDCKVEHWGLEEPVLKHWEPEIPAPMSELTETVVCALGLSVGLVGIVVGTIFIIQGLRSGGTSRHPGPL', 'MALQIPSLLLSAAVVVLMVLSSPRTEGGNSERHFVVQFKGECYYTNGTQRIRLVTRYIYNREEYVRYDSDVGEYRAVTELGRPDAEYWNSQPEILERTRAEVDTACRHNYEGPETSTSLRRLEQPNVAISLSRTEALNHHNTLVCSVTDFYPAKIKVRWFRNGQEETVGVSSTQLIRNGDWTFQVLVMLEMTPHQGEVYTCHVEHPSLKSPITVEWRAQSESARSKMLSGIGGCVLGVIFLGLGLFIRHRSQKGPRGPPPAGLLQ')}

# MOUSE_ALLELE_DOMAIN_OFFSET_MAP = {
#     MHCAlleleName.H2_IAb: (26, 27),
#     MHCAlleleName.H2_IAd: (26, 27)
# }

## MHC allele sequence provider 

In [6]:
# import warnings


# class MHCAlleleSeqProvider(object):
    
#     def allele_seq(self, allele, domain_offset=True):
#         if allele.startswith(MHCAlleleName.HLA):
#             if MHCAlleleName.is_hla_classI(allele):
#                 return self._get_hla_psa(allele).allele_seq(allele, domain_offset=domain_offset)
#             else: # HLA class II
#                 alleleA, alleleB = MHCAlleleName.split_hla2(allele)
#                 seqA = self._get_hla_psa(alleleA).allele_seq(alleleA, domain_offset=domain_offset)
#                 seqB = self._get_hla_psa(alleleB).allele_seq(alleleB, domain_offset=domain_offset)
#                 return seqA, seqB
                
#         elif allele.startswith(MHCAlleleName.H2): # mouse allele
#             seqA, seqB = MOUSE_ALLELE_SEQ_MAP[allele]
#             offsetA = offsetB = 0
#             if domain_offset:
#                 offsetA, offsetB = MOUSE_ALLELE_DOMAIN_OFFSET_MAP[allele]
#             return seqA[offsetA:], seqB[offsetB:]
#         else:
#             raise ValueError('Unknown allele: %s' % allele)
                    
#     def _get_hla_psa(self, allele):
#         hlagene = MHCAlleleName.sub_name(allele, level=1)
#         return IMGTHLAProteinSequenceAlignment.get_psa(hlagene)
        
# #         hlagene = MHCAlleleName.sub_name(allele, level=1)
# #         psa = MHCAlleleSeqProvider.get_psa(hlagene)
# #         seq = psa.allele_seq(allele)
# #         offset = MHCAlleleSeqProvider.DOMAIN_OFFSET_MAP[hlagene] if use_offset else 0
# #         return seq[offset:]
            

# #################################
# from unittest import *

# class MHCAlleleSeqProviderTest(TestCase):
#     def setUp(self):
#         self.seq_provider = MHCAlleleSeqProvider()
#         self.hlaI_alleles =  [
#             'HLA-A*01:01', 'HLA-A*02:01', 'HLA-A*02:02', 'HLA-A*02:03', 'HLA-A*02:05', 'HLA-A*02:06', 'HLA-A*02:07', 
#             'HLA-A*02:11', 'HLA-A*02:12', 'HLA-A*02:16', 'HLA-A*02:17', 'HLA-A*02:19', 'HLA-A*02:50', 'HLA-A*03:01', 
#             'HLA-A*03:19', 'HLA-A*11:01', 'HLA-A*23:01', 'HLA-A*24:02', 'HLA-A*24:03', 'HLA-A*25:01', 'HLA-A*26:01', 
#             'HLA-A*26:02', 'HLA-A*26:03', 'HLA-A*29:02', 'HLA-A*30:01', 'HLA-A*30:02', 'HLA-A*31:01', 'HLA-A*32:01', 
#             'HLA-A*32:07', 'HLA-A*32:15', 'HLA-A*33:01', 'HLA-A*66:01', 'HLA-A*68:01', 'HLA-A*68:02', 'HLA-A*68:23', 
#             'HLA-A*69:01', 'HLA-A*80:01', 'HLA-B*07:02', 'HLA-B*08:01', 'HLA-B*08:02', 'HLA-B*08:03', 'HLA-B*14:01', 
#             'HLA-B*14:02', 'HLA-B*15:01', 'HLA-B*15:02', 'HLA-B*15:03', 'HLA-B*15:09', 'HLA-B*15:17', 'HLA-B*18:01', 
#             'HLA-B*27:05', 'HLA-B*27:20', 'HLA-B*35:01', 'HLA-B*35:03', 'HLA-B*37:01', 'HLA-B*38:01', 'HLA-B*39:01', 
#             'HLA-B*40:01', 'HLA-B*40:02', 'HLA-B*40:13', 'HLA-B*42:01', 'HLA-B*44:02', 'HLA-B*44:03', 'HLA-B*45:01', 
#             'HLA-B*45:06', 'HLA-B*46:01', 'HLA-B*48:01', 'HLA-B*51:01', 'HLA-B*53:01', 'HLA-B*54:01', 'HLA-B*57:01', 
#             'HLA-B*57:03', 'HLA-B*58:01', 'HLA-B*58:02', 'HLA-B*73:01', 'HLA-B*81:01', 'HLA-B*83:01', 'HLA-C*01:02', 
#             'HLA-C*01:03', 'HLA-C*02:02']
#         self.hlaII_alleles = ['HLA-DPA1*01:03/DPB1*02:01', 'HLA-DPA1*01:03/DPB1*04:01', 'HLA-DPA1*02:01/DPB1*01:01', 
#                               'HLA-DPA1*02:01/DPB1*05:01', 'HLA-DPA1*03:01/DPB1*04:02', 'HLA-DQA1*01:01/DQB1*05:01', 
#                               'HLA-DQA1*01:02/DQB1*06:02', 'HLA-DQA1*03:01/DQB1*03:02', 'HLA-DQA1*04:01/DQB1*04:02', 
#                               'HLA-DQA1*05:01/DQB1*02:01', 'HLA-DQA1*05:01/DQB1*03:01', 'HLA-DRB1*01:01', 
#                               'HLA-DRB1*03:01', 'HLA-DRB1*03:02', 'HLA-DRB1*04:01', 'HLA-DRB1*04:04', 'HLA-DRB1*04:05', 
#                               'HLA-DRB1*07:01', 'HLA-DRB1*08:02', 'HLA-DRB1*08:06', 'HLA-DRB1*08:13', 'HLA-DRB1*08:19', 
#                               'HLA-DRB1*09:01', 'HLA-DRB1*11:01', 'HLA-DRB1*12:01', 'HLA-DRB1*12:02', 'HLA-DRB1*13:02', 
#                               'HLA-DRB1*14:02', 'HLA-DRB1*14:04', 'HLA-DRB1*14:12', 'HLA-DRB1*15:01', 'HLA-DRB3*01:01', 
#                               'HLA-DRB3*03:01', 'HLA-DRB4*01:01', 'HLA-DRB5*01:01']
#         self.mouse_alleles = ['H2-IAb', 'H2-IAd']
    
#     def test_allele_seq_for_hlaI(self):
#         for allele in self.hlaI_alleles:
#             seq = self.seq_provider.allele_seq(allele)
#             self.assertTrue(seq is not None)
#             self.assertTrue(len(seq) > 0)
#             self.assertTrue(SeqUtils.is_all_amino_acids(seq, allow_dummy=True))

#     def test_allele_seq_for_hlaII(self):
#         for allele in self.hlaII_alleles:
#             alpha_seq, beta_seq = self.seq_provider.allele_seq(allele)
#             self.assertTrue(alpha_seq is not None)
#             self.assertTrue(len(alpha_seq) > 0)
#             self.assertTrue(SeqUtils.is_all_amino_acids(alpha_seq, allow_dummy=True))
            
#             self.assertTrue(beta_seq is not None)
#             self.assertTrue(len(beta_seq) > 0)
#             self.assertTrue(SeqUtils.is_all_amino_acids(beta_seq, allow_dummy=True))
    
#     def test_allele_seq_for_mouse_alleles(self):
#         for allele in self.mouse_alleles:
#             alpha_seq, beta_seq = self.seq_provider.allele_seq(allele)
#             self.assertTrue(alpha_seq is not None)
#             self.assertTrue(len(alpha_seq) > 0)
#             self.assertTrue(SeqUtils.is_all_amino_acids(alpha_seq, allow_dummy=True))
            
#             self.assertTrue(beta_seq is not None)
#             self.assertTrue(len(beta_seq) > 0)
#             self.assertTrue(SeqUtils.is_all_amino_acids(beta_seq, allow_dummy=True))
        
# # ###########
# # suite = TestSuite()
# # suite.addTests(TestLoader().loadTestsFromTestCase(MHCAlleleSeqProviderTest))
# # TextTestRunner(verbosity=3).run(suite)

In [41]:
# seq_provider = MHCAlleleSeqProvider()
# allele = 'HLA-DRB1*01:01'
# seqA, seqB = seq_provider.allele_seq(allele)
# seqA = np.array(list(seqA))
# seqB = np.array(list(seqB))

# posA = [9, 22, 24, 32, 50, 51, 52, 53, 54, 58, 59, 62, 65, 66, 69, 72]
# refA = 'QFFFRFASFGANVDNI'

# posB =  [11, 13, 56, 57, 60, 61, 70, 71, 78, 81, 82, 85]
# refB = 'LFPDYWQRYHNV'

# posA = np.subtract(posA, 1)
# posB = np.subtract(posB, 1)

# print(''.join(seqA))
# print(''.join(seqA[posA]))
# print(refA, len(refA))
# print(''.join(seqB))
# print(''.join(seqB[posB]))
# print(refB, len(refB))

IKEEHVIIQAEFYLNPDQSGEFMFDFDGDEIFHVDMAKKETVWRLEEFGRFASFEAQGALANIAVDKANLEIMTKRSNYTPITNVPPEVTVLTNSPVELREPNVLICFIDKFTPPVVNVTWLRNGKPVTTGVSETVFLPREDHLFRKFHYLPFLPSTEDVYDCRVEHWGLDEPLLKHWEFDAPSPLPETTENVVCALGLTVGLVGIIIGTIFIIKGVRKSNAAERRGPL
QFFFRFASFGANVDNI
QFFFRFASFGANVDNI 16
GDTRPRFLWQLKFECHFFNGTERVRLLERCIYNQEESVRFDSDVGEYRAVTELGRPDAEYWNSQKDLLEQRRAAVDTYCRHNYGVGESFTVQRRVEPKVTVYPSKTQPLQHHNLLVCSVSGFYPGSIEVRWFRNGQEEKAGVVSTGLIQNGDWTFQTLVMLETVPRSGEVYTCQVEHPSVTSPLTVEWRARSESAQSKMLSGVGGFVLGLLFLGAGLFIYFRNQKGHSGLQPTGFLS
LFPDYWQRYHNV
LFPDYWQRYHNV 12


H-2-IAb	YSYFLASGGQVVHVLYFGYTYHDIRTETVHGPHT
H-2-IAd	YTYHLILGGQAEHILVFGLTYYDIRTETAHGPST
HLA-DPA10103-DPB10201	YAFFMFSGGAILNTLFGQFEYFDIEEVRMHLGMT

In [11]:
# import numpy as np

# allele = 'HLA-DPA1*01:03/DPB1*01:01'
# mhcseq_provider = MHCAlleleSeqProvider()
# alpha_seq, beta_seq = mhcseq_provider.allele_seq(allele)
# beta_seq = beta_seq[:22] + Const.DUMMY_AA + Const.DUMMY_AA + beta_seq[22:]

# alpha_seq = np.array(list(alpha_seq))
# beta_seq = np.array(list(beta_seq))

# refseq = 'YAFFMFSGGAILNTLYGQFEYFAIEKVRVHLDVT'
# posA = np.subtract([9, 11, 22, 24, 31, 52, 53, 58, 59, 61, 65, 66, 68, 72, 73], 1)
# posB = np.subtract([9, 11, 13, 26, 28, 30, 47, 57, 67, 70, 71, 74, 77, 78, 81, 85, 86, 89 ,90], 1)

# print('Allele:', allele)
# print('Alpha:', ''.join(alpha_seq))
# print('ref:', refseq[:15])
# print('chk:', ''.join(alpha_seq[posA]))

# print('Beta:', ''.join(beta_seq))
# print('ref:', refseq[15:])
# print('chk:', ''.join(beta_seq[posB]))

# H-2-IAb	YSYFLASGGQVVHVLYFGYTYHDIRTETVHGPHT
# H-2-IAd	YTYHLILGGQAEHILVFGLTYYDIRTETAHGPST

# allele = 'H2-IAb'
# alpha_seq, beta_seq = mhcseq_provider.allele_seq(allele)
# alpha_seq = np.array(list(alpha_seq))
# beta_seq = np.array(list(beta_seq))

# refseq = 'YSYFLASGGQVVHVLYFGYTYHDIRTETVHGPHT'
# posA = np.subtract([9, 11, 22, 24, 31, 52, 53, 58, 59, 61, 65, 66, 68, 72, 73], 1)
# posB = np.subtract([9, 11, 13, 26, 28, 30, 47, 57, 67, 70, 71, 74, 77, 78, 81, 85, 86, 89 ,90], 1)

# print('Allele:', allele)
# print('Alpha:', ''.join(alpha_seq))
# print('ref:', refseq[:15])
# print('chk:', ''.join(alpha_seq[posA]))

# print('Beta:', ''.join(beta_seq))
# print('ref:', refseq[15:])
# print('chk:', ''.join(beta_seq[posB]))

# allele = 'H2-IAd'
# alpha_seq, beta_seq = mhcseq_provider.allele_seq(allele)
# alpha_seq = np.array(list(alpha_seq))
# beta_seq = np.array(list(beta_seq))

# refseq = 'YTYHLILGGQAEHILVFGLTYYDIRTETAHGPST'
# posA = np.subtract([9, 11, 22, 24, 31, 52, 53, 58, 59, 61, 65, 66, 68, 72, 73], 1)
# posB = np.subtract([9, 11, 13, 26, 28, 30, 47, 57, 67, 70, 71, 74, 77, 78, 81, 85, 86, 89 ,90], 1)

# print('Allele:', allele)
# print('Alpha:', ''.join(alpha_seq))
# print('ref:', refseq[:15])
# print('chk:', ''.join(alpha_seq[posA]))

# print('Beta:', ''.join(beta_seq))
# print('ref:', refseq[15:])
# print('chk:', ''.join(beta_seq[posB]))


"\nimport numpy as np\n\nallele = 'HLA-DPA1*01:03/DPB1*01:01'\nmhcseq_provider = MHCAlleleSeqProvider()\nalpha_seq, beta_seq = mhcseq_provider.allele_seq(allele)\nbeta_seq = beta_seq[:22] + Const.DUMMY_AA + Const.DUMMY_AA + beta_seq[22:]\n\nalpha_seq = np.array(list(alpha_seq))\nbeta_seq = np.array(list(beta_seq))\n\nrefseq = 'YAFFMFSGGAILNTLYGQFEYFAIEKVRVHLDVT'\nposA = np.subtract([9, 11, 22, 24, 31, 52, 53, 58, 59, 61, 65, 66, 68, 72, 73], 1)\nposB = np.subtract([9, 11, 13, 26, 28, 30, 47, 57, 67, 70, 71, 74, 77, 78, 81, 85, 86, 89 ,90], 1)\n\nprint('Allele:', allele)\nprint('Alpha:', ''.join(alpha_seq))\nprint('ref:', refseq[:15])\nprint('chk:', ''.join(alpha_seq[posA]))\n\nprint('Beta:', ''.join(beta_seq))\nprint('ref:', refseq[15:])\nprint('chk:', ''.join(beta_seq[posB]))\n\n# H-2-IAb\tYSYFLASGGQVVHVLYFGYTYHDIRTETVHGPHT\n# H-2-IAd\tYTYHLILGGQAEHILVFGLTYYDIRTETAHGPST\n\n# allele = 'H2-IAb'\n# alpha_seq, beta_seq = mhcseq_provider.allele_seq(allele)\n# alpha_seq = np.array(list(alph

In [145]:
'''
allele = 'H2-IAd'
alpha_seq, beta_seq = mhcseq_provider.allele_seq(allele)
print(alpha_seq)
newseq = alpha_seq[:9] + alpha_seq[10:]
print(newseq)

allele = 'HLA-DPA1*01:03/DPB1*04:01'
alpha_seq, beta_seq = mhcseq_provider.allele_seq(allele)
print(beta_seq[21:])
newseq = beta_seq[:22] + Const.DUMMY_AA + Const.DUMMY_AA + beta_seq[22:]
print(newseq)
'''

IEADHVGFYGTTVYQSPGDIGQYTHEFDGDELFYVDLDKKKTVWRLPEFGQLILFEPQGGLQNIAAEKHNLGILTKRSNFTPATNEAPQATVFPKSPVLLGQPNTLICFVDNIFPPVINITWLRNSKSVTDGVYETSFLVNRDHSFHKLSYLTFIPSDDDIYDCKVEHWGLEEPVLKHWEPEIPAPMSELTETVVCALGLSVGLVGIVVGTIFIIQGLRSGGTSRHPGPL
IEADHVGFYTTVYQSPGDIGQYTHEFDGDELFYVDLDKKKTVWRLPEFGQLILFEPQGGLQNIAAEKHNLGILTKRSNFTPATNEAPQATVFPKSPVLLGQPNTLICFVDNIFPPVINITWLRNSKSVTDGVYETSFLVNRDHSFHKLSYLTFIPSDDDIYDCKVEHWGLEEPVLKHWEPEIPAPMSELTETVVCALGLSVGLVGIVVGTIFIIQGLRSGGTSRHPGPL
QRFLERYIYNREEFARFDSDVGEFRAVTELGRPAAEYWNSQKDILEEKRAVPDRMCRHNYELGGPMTLQRRVQPRVNVSPSKKGPLQHHNLLVCHVTDFYPGSIQVRWFLNGQEETAGVVSTNLIRNGDWTFQILVMLEMTPQQGDVYTCQVEHTSLDSPVTVEWKAQSDSARSKTLTGAGGFVLGLIICGVGIFMHRRSKKVQRGSA
RATPENYLFQGRQECYAFNGTQXXRFLERYIYNREEFARFDSDVGEFRAVTELGRPAAEYWNSQKDILEEKRAVPDRMCRHNYELGGPMTLQRRVQPRVNVSPSKKGPLQHHNLLVCHVTDFYPGSIQVRWFLNGQEETAGVVSTNLIRNGDWTFQILVMLEMTPQQGDVYTCQVEHTSLDSPVTVEWKAQSDSARSKTLTGAGGFVLGLIICGVGIFMHRRSKKVQRGSA
