In [1]:
import sys
sys.path.append("../")
from mir.common import parser, Repertoire, SegmentLibrary
from mir.distances import ClonotypeAligner, AlignGermline
from mir.comparative import DenseMatch

In [2]:
lib = SegmentLibrary.load_default()
db = Repertoire(parser.parse_olga('assets/olga_humanTRB.txt', lib=lib, n=1000))
print(db)

Repertoire of 1000 clonotypes and 1000 cells:
Clonotype 0 CASSWGKGRGLRTDTQYF TGCGCCAGCAGCTGGGGAAAGGGGAGGGGCCTCCGCACAGATACGCAGTATTTT
Clonotype 1 CASSIIVRGIQNTEAFF TGTGCCAGTAGTATTATCGTCAGGGGGATTCAGAACACTGAAGCTTTCTTT
Clonotype 2 CASSLAWGPRNQPQHF TGTGCCAGCAGTTTAGCTTGGGGACCCCGCAATCAGCCCCAGCATTTT
Clonotype 3 CASSLARGAYEQYF TGCGCCAGCAGCTTGGCTCGGGGGGCCTACGAGCAGTACTTC
Clonotype 4 CASSQAVLYEKLFF TGCGCCAGCAGCCAGGCGGTACTTTATGAAAAACTGTTTTTT
...


In [11]:
tenxref = "https://www.10xgenomics.com/resources/application-notes/a-new-way-of-exploring-immunity-linking-highly-multiplexed-antigen-recognition-to-immune-repertoire-and-phenotype/#"
data = parser.parse_vdjdb_slim('assets/vdjdb.slim.txt', lib=lib, gene='TRB', species='HomoSapiens', 
                               warn=False, 
                               filter=lambda x: x[x['reference.id'] != tenxref])
print(data)
len(data)

[Clonotype 7 CASSAFPCREGRNNEQFF, Clonotype 13 CASSHGVGQGPYEQYF, Clonotype 14 CASSLTTESGEQYF, Clonotype 26 CASSLGTLEETQYF, Clonotype 34 CASIPEGGRETQYF, Clonotype 43 CASSLDSLNTIYF, Clonotype 44 CASSQDTASSYEQYF, Clonotype 48 CASSLSYRGNSPLHF, Clonotype 58 CSADGLPISSYNEQFF, Clonotype 59 CASTRADTGELFF, Clonotype 61 CASSFRQGAFGDTQYF, Clonotype 63 CASSFGPRAGTTGELFF, Clonotype 64 CASSKDRNQPQHF, Clonotype 68 CASSTQGSPDEQYF, Clonotype 69 CASSLVDREELFF, Clonotype 70 CSASTTTGLAPVEQYF, Clonotype 71 CASEDSSDGANYGYTF, Clonotype 75 CASRTGASNEQFF, Clonotype 79 CASSVVGNEQFF, Clonotype 92 CASSWGPGSLYGYTF, Clonotype 94 CASSELGARVYEQYF, Clonotype 95 CSASPLLEQYF, Clonotype 98 CASSYGTGKDFEQFF, Clonotype 102 CASSLDLAGITSYNEQFF, Clonotype 103 CASSGASGANVLTF, Clonotype 105 CASSEDRRGSYEQYF, Clonotype 115 CASSEGRISPGELFF, Clonotype 119 CASSIRSGWEQYF, Clonotype 125 CSAENYRLAGITDTQYF, Clonotype 128 CASSLGQAYEQYF, Clonotype 130 CAAGVNTGELFF, Clonotype 131 CASSQEGQQWGRDGYTF, Clonotype 138 CASSFGQGGYEQYF, Clonotype 139

23732

In [12]:
valign = AlignGermline.from_seqs(lib.get_seqaas(gene='TRB', stype='V'))
jalign = AlignGermline.from_seqs(lib.get_seqaas(gene='TRB', stype='J'))
aligner = ClonotypeAligner(v_aligner=valign, j_aligner=jalign)

In [13]:
matcher = DenseMatch(db, aligner)
print(''.join(str(x) for x in matcher.match_single(data[0])))
print([m.__dict__() for m in matcher.match_single(data[0])])
print([m.__dict__() for m in matcher.match(data[0:1])])
print(matcher.match_to_df(data[0:10]))

(v:168.0,j:62.0,cdr3:-2.0)(v:269.0,j:40.0,cdr3:-9.0)(v:303.0,j:44.0,cdr3:-9.0)(v:168.0,j:59.0,cdr3:-9.0)(v:168.0,j:40.0,cdr3:-1.0)(v:266.0,j:48.0,cdr3:-8.0)(v:150.0,j:34.0,cdr3:-13.0)(v:148.0,j:42.0,cdr3:-11.0)(v:283.0,j:48.0,cdr3:-5.0)(v:145.0,j:50.0,cdr3:-12.0)(v:162.0,j:85.0,cdr3:-9.0)(v:283.0,j:59.0,cdr3:-15.0)(v:148.0,j:59.0,cdr3:-2.0)(v:165.0,j:34.0,cdr3:14.0)(v:171.0,j:62.0,cdr3:-3.0)(v:266.0,j:59.0,cdr3:-14.0)(v:269.0,j:44.0,cdr3:9.0)(v:107.0,j:48.0,cdr3:-4.0)(v:107.0,j:40.0,cdr3:-1.0)(v:283.0,j:59.0,cdr3:-6.0)(v:413.0,j:40.0,cdr3:2.0)(v:135.0,j:40.0,cdr3:1.0)(v:145.0,j:62.0,cdr3:-6.0)(v:339.0,j:34.0,cdr3:-14.0)(v:145.0,j:41.0,cdr3:-13.0)(v:144.0,j:40.0,cdr3:-8.0)(v:135.0,j:44.0,cdr3:-7.0)(v:171.0,j:41.0,cdr3:-1.0)(v:145.0,j:35.0,cdr3:7.0)(v:107.0,j:59.0,cdr3:-12.0)(v:142.0,j:85.0,cdr3:-5.0)(v:501.0,j:59.0,cdr3:-23.0)(v:153.0,j:59.0,cdr3:-2.0)(v:144.0,j:59.0,cdr3:1.0)(v:501.0,j:40.0,cdr3:-9.0)(v:283.0,j:48.0,cdr3:-11.0)(v:151.0,j:62.0,cdr3:-17.0)(v:154.0,j:40.0,cdr3:-2.0)(v:303

In [15]:
res = matcher.match_to_df(data, 10, 10)