In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
import itertools
import networkx as nx
from scipy.stats import poisson

%matplotlib inline

In [2]:
data_pd = pd.read_csv('data/MergedData.csv', index_col=0)
data_pd = data_pd.loc[data_pd['species'] == 'HomoSapiens']
data_pd = data_pd.loc[data_pd['database'] == 'tcr_ab_pairs']
# data_pd.index = np.arange(data_pd.shape[0])

In [None]:
antiindex = np.zeros((1 + data_pd.index[-1]), dtype=int)
for i, x in enumerate(data_pd.index):
    antiindex[x] = i

In [3]:
vgene_grouper = {'TRAV12-1':'TRAV12', 'TRAV12-2':'TRAV12', 'TRAV12-3':'TRAV12'}

vgene_lists = data_pd['alpha.v'].values
for i in range(vgene_lists.shape[0]):
    vgenes = vgene_lists[i].split(',')
    for j in range(len(vgenes)):
        if vgenes[j] in vgene_grouper:
            vgenes[j] = vgene_grouper[vgenes[j]]
    vgene_lists[i] = ','.join(list(set(vgenes)))

data_pd['alpha.v'] = vgene_lists

In [4]:
acdr3 = data_pd['alpha.cdr3'].values
bcdr3 = data_pd['beta.cdr3'].values
acdr3_length = np.array([len(x) for x in acdr3])
bcdr3_length = np.array([len(x) for x in bcdr3])

In [5]:
auseless_positions = {7: [6],
 8: [2],
 9: [1],
 10: [4, 9],
 11: [2, 4, 10],
 12: [2, 11],
 13: [4],
 14: [4, 13],
 15: [4],
 16: [4, 5, 15],
 17: [4, 6, 16],
 18: [5, 6, 17],
 19: [4]}

buseless_positions = {7: [],
 8: [],
 9: [2],
 10: [4, 9],
 11: [4, 6],
 12: [4, 11],
 13: [4],
 14: [4, 5, 6, 13],
 15: [4, 14],
 16: [4, 15],
 17: [4, 16],
 18: [7, 8, 11, 17],
 19: []}

In [6]:
acdr3f = np.zeros((acdr3.shape[0]), dtype=object)
for i in range(acdr3.shape[0]):
    arr = np.array(list(acdr3[i]))
    if len(arr) in auseless_positions.keys():
        arr[auseless_positions[len(arr)]] = 'X'
    acdr3f[i] = ''.join(arr)
    
bcdr3f = np.zeros((bcdr3.shape[0]), dtype=object)
for i in range(bcdr3.shape[0]):
    arr = np.array(list(bcdr3[i]))
    if len(arr) in buseless_positions.keys():
        arr[buseless_positions[len(arr)]] = 'X'
    bcdr3f[i] = ''.join(arr)

acdr3 = acdr3f
bcdr3 = bcdr3f

In [33]:
def aapClusters(sequences, indexing, mmm=1, delimeter='*'):
    l = len(sequences[0])
    clusters = []
    masks = itertools.combinations(np.arange(l), mmm)
    for mask in masks:
        mask = [-1] + list(mask) + [l]
        masker = lambda x: delimeter.join([x[mask[i] + 1:mask[i + 1]] for i in range(mmm + 1)])
        factor = set([masker(x) for x in sequences])
        c = {m:[] for m in factor}
        for i, x in enumerate(sequences):
            c[masker(x)].append(indexing[i])
        for m in factor:
            if len(c[m]) > 1:
                clusters.append(c[m])
    return clusters

def EdgeListfromClusters(n, clusters):
    edges = set([])
    for cluster in clusters:
        for x, y in itertools.combinations(cluster, 2):
            edges.add((x, y))
    return list(edges)

In [34]:
aedges = []
for l in range(6, 20):
    indexes = np.arange(acdr3.shape[0])[acdr3_length == l]
    a = acdr3[indexes]
    clusters = aapClusters(a, indexes)
    e = EdgeListfromClusters(acdr3.shape[0], clusters)
    aedges += e

bedges = []
for l in range(6, 20):
    indexes = np.arange(bcdr3.shape[0])[bcdr3_length == l]
    b = bcdr3[indexes]
    clusters = aapClusters(b, indexes)
    e = EdgeListfromClusters(bcdr3.shape[0], clusters)
    bedges += e

In [35]:
aadj = [[] for i in range(acdr3.shape[0])]
badj = [[] for i in range(acdr3.shape[0])]

for i in aedges:
    aadj[i[0]].append(i[1])
    aadj[i[1]].append(i[0])

for i in bedges:
    badj[i[0]].append(i[1])
    badj[i[1]].append(i[0])

In [36]:
superadj = [list(set(aadj[i]) & set(badj[i]))for i in range(acdr3.shape[0])]

In [37]:
superdeg = np.array([len(x) for x in superadj])
multdeg = np.array([len(aadj[i]) * len(badj[i]) for i in range(acdr3.shape[0])])

In [38]:
max(multdeg), max(superdeg)

(1012860, 9)

In [39]:
superratio = sum(superdeg) / sum(multdeg)

In [49]:
e = superratio * multdeg + 1
cdf = poisson.cdf(k=superdeg, mu=e)

In [7]:
aa = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', 'X']
aa2num = {aa[i]:i for i in range(21)}

class PWM:
    def __init__(self, sequences):
        assert len(sequences) > 0
        self.l = len(sequences[0])
        PPM = np.ones((self.l, 21), dtype=float)
        for seq in sequences:
            assert len(seq) == self.l
            for pos in range(self.l):
                PPM[pos, aa2num[seq[pos]]] += 1
        self.PWM = np.log2(PPM)
    
    def Score(self, seq):
        assert self.l == len(seq)
        return sum([self.PWM[pos, aa2num[seq[pos]]] for pos in range(self.l)])

In [134]:
m = 14

anchor_indexes = np.argsort(cdf)[-m:]
anchor_indexes

array([ 13671, 124947, 168881, 160368, 107865,  47074, 118403, 157116,
        35720,  18048,  35001,  64398,  26329,  89920])

In [135]:
scores = np.zeros((m))
ship_indexes = np.zeros((m), dtype=object)
a_count, b_count = np.zeros((m)), np.zeros((m))

In [143]:
for i in range(m):
    center = anchor_indexes[i]

    core = np.concatenate([superadj[center], [center]])

    pwm_a = PWM(acdr3[core])
    pwm_b = PWM(bcdr3[core])

    indexes_bool = [(len(acdr3[j]) == len(acdr3[center])) & 
                    (len(bcdr3[j]) == len(bcdr3[center])) for j in range(acdr3.shape[0])]
    indexes = np.where(indexes_bool)[0]
    acdr3i = acdr3[indexes]
    bcdr3i = bcdr3[indexes]

    ascores = np.array([pwm_a.Score(a) for a in acdr3i])
    bscores = np.array([pwm_b.Score(b) for b in bcdr3i])

    ascores = ascores - max(ascores)
    bscores = bscores - max(bscores)

    maxscore = 0
    maxathr, maxbthr = 0, 0
    for athr, bthr in itertools.product(np.arange(-10, 0), np.arange(-10, 0)):
        a, b = np.where(ascores > athr)[0], np.where(bscores > bthr)[0]
        indx = np.array(list(set(a) & set(b)))
        mu = a.shape[0] * b.shape[0] / acdr3i.shape[0]
        score = poisson.cdf(k=indx.shape[0], mu=mu)
        if maxscore < score:
            maxathr, maxbthr = athr, bthr
            maxscore = score

    athr, bthr = maxathr, maxbthr
    a, b = np.where(ascores > athr)[0], np.where(bscores > bthr)[0]
    indx = np.array(list(set(a) & set(b)))
    mu = a.shape[0] * b.shape[0] / acdr3i.shape[0]
    score = poisson.cdf(k=indx.shape[0], mu=mu)

    scores[i] = score
    ship_indexes[i] = indexes[indx]
    a_count[i] = a.shape[0]
    b_count[i] = b.shape[0]

In [145]:
data_pd.loc[data_pd.index[ship_indexes[1]]]

Unnamed: 0,database,species,sample,epitope,antigen,tissue,cell_subset,alpha.v,alpha.j,alpha.cdr3,beta.v,beta.d,beta.j,beta.cdr3,index
155298,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPVNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF,155298
47694,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPSNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF,47694
20520,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPINSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF,20520
159057,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGGVNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF,159057
127941,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPVNSGYSTLTF,TRBV7-9,"TRBD2,TRBD1",TRBJ2-1,CASSLAGTYNEQFF,127941
162439,tcr_ab_pairs,HomoSapiens,2,,,,,TRAV25,TRAJ11,CAGPTNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLSGSYNEQFF,162439
167053,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPQNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF,167053
168937,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPGNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF,168937


In [19]:
ind = [155298, 47694, 20520, 159057, 127941, 162439, 167053, 168937]
core = antiindex[ind]

In [21]:
pwm_a = PWM(acdr3[core])
pwm_b = PWM(bcdr3[core])

indexes_bool = [(len(acdr3[j]) == len(acdr3[core[0]])) & 
                (len(bcdr3[j]) == len(bcdr3[core[0]])) for j in range(acdr3.shape[0])]
indexes = np.where(indexes_bool)[0]
acdr3i = acdr3[indexes]
bcdr3i = bcdr3[indexes]

ascores = np.array([pwm_a.Score(a) for a in acdr3i])
bscores = np.array([pwm_b.Score(b) for b in bcdr3i])

ascores = ascores - max(ascores)
bscores = bscores - max(bscores)

maxscore = 0
maxathr, maxbthr = 0, 0
for athr, bthr in itertools.product(np.arange(-10, 0), np.arange(-10, 0)):
    a, b = np.where(ascores > athr)[0], np.where(bscores > bthr)[0]
    indx = np.array(list(set(a) & set(b)))
    mu = a.shape[0] * b.shape[0] / acdr3i.shape[0]
    score = poisson.cdf(k=indx.shape[0], mu=mu)
    if maxscore < score:
        maxathr, maxbthr = athr, bthr
        maxscore = score

athr, bthr = maxathr, maxbthr
a, b = np.where(ascores > athr)[0], np.where(bscores > bthr)[0]
indx = np.array(list(set(a) & set(b)))
mu = a.shape[0] * b.shape[0] / acdr3i.shape[0]
score = poisson.cdf(k=indx.shape[0], mu=mu)

In [None]:
indexes

In [27]:
a.shape[0], b.shape[0], mu, indx.shape[0], acdr3i.shape[0]

(9, 69, 0.06700474751834268, 8, 9268)

In [35]:
indx_set = set(indx)
for i in a:
    if not i in indx_set:
        print(data_pd.index[indexes[i]])

161003


In [37]:
data_pd.loc[data_pd.index[indexes[a]]]

Unnamed: 0,database,species,sample,epitope,antigen,tissue,cell_subset,alpha.v,alpha.j,alpha.cdr3,beta.v,beta.d,beta.j,beta.cdr3
20520,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPINSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
47694,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPSNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
127941,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPVNSGYSTLTF,TRBV7-9,"TRBD2,TRBD1",TRBJ2-1,CASSLAGTYNEQFF
155298,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPVNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
159057,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGGVNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
161003,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV27,TRAJ11,CAGGGNSGYSTLTF,"TRBV5-6,TRBV5-7,TRBV5-5","TRBD2,TRBD1",TRBJ1-2,CASSLGTAIYGYTF
162439,tcr_ab_pairs,HomoSapiens,2,,,,,TRAV25,TRAJ11,CAGPTNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLSGSYNEQFF
167053,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPQNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
168937,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPGNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF


In [47]:
trbv79 = data_pd['beta.v'] == 'TRBV7-9'
trav25 = data_pd['alpha.v'] == 'TRAV25'
traj11 = data_pd['alpha.j'] == 'TRAJ11'
e = np.sum(trbv79) * np.sum(trav25) / data_pd.shape[0]
print(np.sum(trbv79), np.sum(trav25), np.sum(trbv79 & trav25), data_pd.shape[0], e)
# data_pd.loc[trav25 & trbv79 & traj11]

5251 3459 120 169568 107.1146029911304


Unnamed: 0,database,species,sample,epitope,antigen,tissue,cell_subset,alpha.v,alpha.j,alpha.cdr3,beta.v,beta.d,beta.j,beta.cdr3
20520,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPINSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
47694,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPSNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
64808,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPSEGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
127941,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPVNSGYSTLTF,TRBV7-9,"TRBD2,TRBD1",TRBJ2-1,CASSLAGTYNEQFF
155298,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPVNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
159057,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGGVNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
162439,tcr_ab_pairs,HomoSapiens,2,,,,,TRAV25,TRAJ11,CAGPTNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLSGSYNEQFF
167053,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV25,TRAJ11,CAGPQNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
168937,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPGNSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF


In [58]:
print('trav25', np.sum(trav25))
print('traj11', np.sum(traj11))
print('trbv79', np.sum(trbv79))

e = np.sum(trav25) * np.sum(traj11) / data_pd.shape[0]
print('trav25 & traj11', np.sum(trav25 & traj11), ';expected=', e)
e = np.sum(trbv79) * np.sum(traj11) / data_pd.shape[0]
print('traj11 & trbv79', np.sum(traj11 & trbv79), ';expected=', e)
e = np.sum(trav25) * np.sum(trbv79) / data_pd.shape[0]
print('trbv79 & trav25', np.sum(trbv79 & trav25), ';expected=', e)

e = np.sum(trav25) * np.sum(trbv79) * np.sum(traj11) / data_pd.shape[0] ** 2
print('trbv79 & trav25 & traj11', np.sum(trav25 & trbv79 & traj11), ';expected=', e)

trav25 3459
traj11 2694
trbv79 5251
trav25 & traj11 61 ;expected= 54.95462587280619
traj11 & trbv79 91 ;expected= 83.42490328363843
trbv79 & trav25 120 ;expected= 107.1146029911304
trbv79 & trav25 & traj11 9 ;expected= 1.7017759274043764


Also let's note that trbj becomes also fixed.

In [36]:
data_pd.loc[data_pd.index[indexes[b]]]

Unnamed: 0,database,species,sample,epitope,antigen,tissue,cell_subset,alpha.v,alpha.j,alpha.cdr3,beta.v,beta.d,beta.j,beta.cdr3
1263,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-2,TRAJ12,CAEPHLDSSYKLIF,TRBV19,TRBD1,TRBJ2-1,CASSRGDSYNEQFF
4119,tcr_ab_pairs,HomoSapiens,2,,,,,TRAV12,TRAJ24,CVVNFDDSWGKLQF,"TRBV4-2,TRBV4-3",TRBD2,TRBJ2-1,CASSGGSSYNEQFF
5252,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV6,TRAJ49,CALGGGRAGNQFYF,"TRBV4-3,TRBV4-2,TRBV4-1",,TRBJ2-1,CASSQDLSYNEQFF
5377,tcr_ab_pairs,HomoSapiens,5,,,,,TRAV13-2,TRAJ23,CAENGNNQGGKLIF,"TRBV12-4,TRBV12-3",,TRBJ2-1,CASSTRSSYNEQFF
9525,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV8-1,TRAJ16,CAVIGRSDGQKLLF,"TRBV6-1,TRBV6-6,TRBV6-9,TRBV6-8,TRBV6-5",,TRBJ2-1,CASSYSGSYNEQFF
14606,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV1-1,TRAJ11,CAVRSILGYSTLTF,TRBV5-1,,TRBJ2-1,CASSLDSSYNEQFF
20194,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV3,TRAJ11,CAVRDLTGYSTLTF,"TRBV6-1,TRBV6-6,TRBV6-8,TRBV6-5,TRBV6-9",,TRBJ2-1,CASSDRGSYNEQFF
20304,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV35,TRAJ52,CAGHGGTSYGKLTF,TRBV5-1,,TRBJ2-1,CASSRDITYNEQFF
20520,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV25,TRAJ11,CAGPINSGYSTLTF,TRBV7-9,,TRBJ2-1,CASSLAGSYNEQFF
22774,tcr_ab_pairs,HomoSapiens,5,,,,,TRAV12,TRAJ15,CAMSARQAGTALIF,TRBV27,,TRBJ2-1,CASSRQSSYNEQFF


In [155]:
data_pd.loc[data_pd.index[ship_indexes[5]]]

Unnamed: 0,database,species,sample,epitope,antigen,tissue,cell_subset,alpha.v,alpha.j,alpha.cdr3,beta.v,beta.d,beta.j,beta.cdr3,index
164239,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAASLSGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSFDRQYGYTF,164239
123359,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAASFSGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSFDRADGYTF,123359
155197,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAASIFGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSVDRNYGYTF,155197
13985,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAASLAGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSFDRSYGYTF,13985
48185,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAASLAGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSFDRAYGYTF,48185
21210,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAASLSGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSFDRNYGYTF,21210
110435,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAAVLRGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSFDRSYGYTF,110435
13043,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV13-1,TRAJ52,CAASLNGGTSYGKLTF,TRBV27,TRBD1,TRBJ1-2,CASSFDRNYGYTF,13043
172951,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV13-1,TRAJ52,CAASLYGGTSYGKLTF,TRBV27,,TRBJ1-2,CASSFDRTYGYTF,172951


In [157]:
data_pd.loc[data_pd.index[ship_indexes[6]]]

Unnamed: 0,database,species,sample,epitope,antigen,tissue,cell_subset,alpha.v,alpha.j,alpha.cdr3,beta.v,beta.d,beta.j,beta.cdr3,index
65932,tcr_ab_pairs,HomoSapiens,2,,,,,TRAV12,TRAJ26,CVVGYGQNFVF,TRBV9,,TRBJ1-2,CASSVDPNYGYTF,65932
35812,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV12,TRAJ26,CVEVYGQNFVF,"TRBV4-1,TRBV4-3,TRBV4-2,TRBV4-1,TRBV4-3",,TRBJ1-2,CASSQDGNYGYTF,35812
160904,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV12,TRAJ26,CVVSYGQNFVF,"TRBV4-1,TRBV4-3",,TRBJ1-2,CASSQDGNYGYTF,160904
26923,tcr_ab_pairs,HomoSapiens,32,,,,,TRAV12,TRAJ26,CVSSYGQNFVF,TRBV9,,TRBJ1-2,CASSVDPNYGYTF,26923
92065,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV12,TRAJ26,CVGVYGQNFVF,TRBV9,,TRBJ1-2,CASSEDPNYGYTF,92065
36547,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV12,TRAJ26,CVAPYGQNFVF,TRBV9,,TRBJ1-2,CASSVDGNYGYTF,36547
18458,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV12,TRAJ26,CVVLYGQNFVF,"TRBV5-4,TRBV5-8",,TRBJ1-2,CASSPDGNYGYTF,18458
121246,tcr_ab_pairs,HomoSapiens,3,,,,,TRAV12,TRAJ26,CVGNYGQNFVF,TRBV4-1,,TRBJ1-2,CASSQDGNYGYTF,121246


Other 11 group are clones of these three.