In [22]:
import argparse
import glob
import json
import numpy as np
import pandas as pd
import tqdm
from pythainlp.ulmfit import rm_useless_spaces

import tensorflow_hub as hub
import tensorflow_text
import tensorflow as tf #tensorflow 2.1.0

#debug
class A:
    def __init__(self):
        self.max_n=3
        self.use_thres=0.7
        self.bs = 10000
        self.en_dir = 'data/en_docs/'
        self.th_dir = 'data/th_docs/'
        self.output_path = 'data/wiki_sentences.csv'
args = A()

def stitch_sentences(sent, max_n = 3):
    res = []
    for n in range(max_n+1):
        for i in range(len(sent)-n+1):
            r = ' '.join(sent[i:(i+n)])
            r = rm_useless_spaces(r.replace('\n',' ').strip())
            res.append((i,r))
    return res[(len(sent)+1):]

def match_sentences(lang1_sentences, lang2_sentences, model):
    embedding_1 = model(lang1_sentences)
    embedding_2 = model(lang2_sentences)
    distance_matrix_12 = tf.matmul(embedding_1, embedding_2, transpose_b=True)
    print(embedding_1.shape, embedding_2.shape, distance_matrix_12.shape)
    best_distances = tf.argmax(distance_matrix_12, axis=1).numpy()
    
    matched_sentences_lang2 = []
    scores = []
    for i, lang2_idx in enumerate(best_distances):
        score = distance_matrix_12[i][lang2_idx].numpy()
        scores.append(score)
        matched_sentences_lang2.append(lang2_sentences[lang2_idx])
    return matched_sentences_lang2, scores




In [2]:
print('loading model...')
#_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3")
_model = hub.load('https://tfhub.dev/google/universal-sentence-encoder-multilingual/3')
print('model loaded')

en_paths = sorted(glob.glob(f'{args.en_dir}*.sent'))
th_paths = sorted(glob.glob(f'{args.th_dir}*.sent'))
if len(en_paths)!=len(th_paths):
    raise ValueError('must have equal number of documents')
print(f'there are {len(en_paths)} parallel docs')

res_en_ths = []
for en_path, th_path in tqdm.tqdm_notebook(zip(en_paths,th_paths)):
    print(en_path)
    print(th_path)
    with open(en_path,'r') as f:
        sent_en = f.readlines()
        tup_en = stitch_sentences(sent_en,args.max_n)
        sent_en2 = [i[1] for i in tup_en]
        id_en = [i[0] for i in tup_en]
    with open(th_path,'r') as f:
        sent_th = f.readlines()
        tup_th = stitch_sentences(sent_th,args.max_n)
        sent_th2 = [i[1] for i in tup_th]
        id_th = [i[0] for i in tup_th]

#     print(f'''
#     {en_path}
#     en sentences: {len(sent_en)}
#     th sentences: {len(sent_th)}
#     stitched en sentences (max_n = {args.max_n}): {len(tup_en)}
#     stiched th sentences (max_n = {args.max_n}): {len(tup_th)}
#     ''')
    if (len(sent_en)==1)|(len(sent_th)==1):
        print('skipping...')
        continue

    matched_sentences_th, scores = match_sentences(sent_en2,\
                                                   sent_th2, _model)
    res_en_th = pd.DataFrame({'en_text':sent_en2,'th_text':matched_sentences_th,'use_score':scores,'id_en':id_en})
    res_en_th = res_en_th[(res_en_th.use_score>args.use_thres)]
    res_en_th['src'] = en_path
    res_en_ths.append(res_en_th)
    print(f'{res_en_th.shape[0]} sentences above {args.use_thres} threshold')

df = pd.concat(res_en_ths).dropna().drop_duplicates().reset_index(drop=True)
df.to_csv('x.csv',index=False)
df['rnk'] = df.sort_values('use_score',ascending=False).groupby('id_en').cumcount()+1
df = df[df.rnk==1]
print(f'saving {df.shape} to {args.output_path}')
df.to_csv(args.output_path,index=False)

loading model...
model loaded
there are 13852 parallel docs


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  del sys.path[0]


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

data/en_docs/id_0.sent
data/th_docs/id_0.sent
(15, 512) (3, 512) (15, 3)
5 sentences above 0.7 threshold
data/en_docs/id_1.sent
data/th_docs/id_1.sent
(15, 512) (3, 512) (15, 3)
5 sentences above 0.7 threshold
data/en_docs/id_10.sent
data/th_docs/id_10.sent
(21, 512) (3, 512) (21, 3)
5 sentences above 0.7 threshold
data/en_docs/id_100.sent
data/th_docs/id_100.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1000.sent
data/th_docs/id_1000.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10000.sent
data/th_docs/id_10000.sent
(9, 512) (3, 512) (9, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10001.sent
data/th_docs/id_10001.sent
(9, 512) (3, 512) (9, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10002.sent
data/th_docs/id_10002.sent
(6, 512) (3, 512) (6, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10003.sent
data/th_docs/id_10003.sent
(123, 512) (54, 512) (123, 54)
36 sentences above 0.7 threshold
data/en_docs/id_

1 sentences above 0.7 threshold
data/en_docs/id_10063.sent
data/th_docs/id_10063.sent
(18, 512) (15, 512) (18, 15)
1 sentences above 0.7 threshold
data/en_docs/id_10064.sent
data/th_docs/id_10064.sent
(18, 512) (102, 512) (18, 102)
1 sentences above 0.7 threshold
data/en_docs/id_10065.sent
data/th_docs/id_10065.sent
(18, 512) (66, 512) (18, 66)
1 sentences above 0.7 threshold
data/en_docs/id_10066.sent
data/th_docs/id_10066.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10067.sent
data/th_docs/id_10067.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10068.sent
data/th_docs/id_10068.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10069.sent
data/th_docs/id_10069.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1007.sent
data/th_docs/id_1007.sent
(165, 512) (21, 512) (165, 21)
26 sentences above 0.7 threshold
data/en_docs/id_10070.sent
data/th_docs/id_10070.sent
(3, 512

13 sentences above 0.7 threshold
data/en_docs/id_10130.sent
data/th_docs/id_10130.sent
(9, 512) (3, 512) (9, 3)
5 sentences above 0.7 threshold
data/en_docs/id_10131.sent
data/th_docs/id_10131.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10132.sent
data/th_docs/id_10132.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10133.sent
data/th_docs/id_10133.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10134.sent
data/th_docs/id_10134.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10135.sent
data/th_docs/id_10135.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10136.sent
data/th_docs/id_10136.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10137.sent
data/th_docs/id_10137.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10138.sent
data/th_docs/id_10138.sent
(3, 512) (3, 512) (3, 3)


(399, 512) (120, 512) (399, 120)
73 sentences above 0.7 threshold
data/en_docs/id_10197.sent
data/th_docs/id_10197.sent
(33, 512) (3, 512) (33, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10198.sent
data/th_docs/id_10198.sent
(711, 512) (12, 512) (711, 12)
11 sentences above 0.7 threshold
data/en_docs/id_10199.sent
data/th_docs/id_10199.sent
(3, 512) (21, 512) (3, 21)
1 sentences above 0.7 threshold
data/en_docs/id_102.sent
data/th_docs/id_102.sent
(3, 512) (21, 512) (3, 21)
1 sentences above 0.7 threshold
data/en_docs/id_1020.sent
data/th_docs/id_1020.sent
(111, 512) (24, 512) (111, 24)
1 sentences above 0.7 threshold
data/en_docs/id_10200.sent
data/th_docs/id_10200.sent
(540, 512) (87, 512) (540, 87)
15 sentences above 0.7 threshold
data/en_docs/id_10201.sent
data/th_docs/id_10201.sent
(36, 512) (45, 512) (36, 45)
2 sentences above 0.7 threshold
data/en_docs/id_10202.sent
data/th_docs/id_10202.sent
(15, 512) (6, 512) (15, 6)
3 sentences above 0.7 threshold
data/en_docs/id_1020

1 sentences above 0.7 threshold
data/en_docs/id_10268.sent
data/th_docs/id_10268.sent
(24, 512) (168, 512) (24, 168)
1 sentences above 0.7 threshold
data/en_docs/id_10269.sent
data/th_docs/id_10269.sent
(69, 512) (63, 512) (69, 63)
46 sentences above 0.7 threshold
data/en_docs/id_1027.sent
data/th_docs/id_1027.sent
(324, 512) (15, 512) (324, 15)
11 sentences above 0.7 threshold
data/en_docs/id_10270.sent
data/th_docs/id_10270.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_10271.sent
data/th_docs/id_10271.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10272.sent
data/th_docs/id_10272.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10273.sent
data/th_docs/id_10273.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10274.sent
data/th_docs/id_10274.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10275.sent
data/th_docs/id_10275.sent
(3, 512) (

(399, 512) (33, 512) (399, 33)
7 sentences above 0.7 threshold
data/en_docs/id_10334.sent
data/th_docs/id_10334.sent
(72, 512) (3, 512) (72, 3)
4 sentences above 0.7 threshold
data/en_docs/id_10335.sent
data/th_docs/id_10335.sent
(3, 512) (15, 512) (3, 15)
1 sentences above 0.7 threshold
data/en_docs/id_10336.sent
data/th_docs/id_10336.sent
(3, 512) (27, 512) (3, 27)
1 sentences above 0.7 threshold
data/en_docs/id_10337.sent
data/th_docs/id_10337.sent
(66, 512) (9, 512) (66, 9)
7 sentences above 0.7 threshold
data/en_docs/id_10338.sent
data/th_docs/id_10338.sent
(15, 512) (108, 512) (15, 108)
1 sentences above 0.7 threshold
data/en_docs/id_10339.sent
data/th_docs/id_10339.sent
skipping...
data/en_docs/id_1034.sent
data/th_docs/id_1034.sent
(447, 512) (9, 512) (447, 9)
6 sentences above 0.7 threshold
data/en_docs/id_10340.sent
data/th_docs/id_10340.sent
(63, 512) (36, 512) (63, 36)
1 sentences above 0.7 threshold
data/en_docs/id_10341.sent
data/th_docs/id_10341.sent
(687, 512) (6, 512) 

(426, 512) (33, 512) (426, 33)
58 sentences above 0.7 threshold
data/en_docs/id_104.sent
data/th_docs/id_104.sent
(846, 512) (3, 512) (846, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1040.sent
data/th_docs/id_1040.sent
(105, 512) (3, 512) (105, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10400.sent
data/th_docs/id_10400.sent
(297, 512) (18, 512) (297, 18)
5 sentences above 0.7 threshold
data/en_docs/id_10401.sent
data/th_docs/id_10401.sent
(171, 512) (12, 512) (171, 12)
6 sentences above 0.7 threshold
data/en_docs/id_10402.sent
data/th_docs/id_10402.sent
(399, 512) (6, 512) (399, 6)
8 sentences above 0.7 threshold
data/en_docs/id_10403.sent
data/th_docs/id_10403.sent
(168, 512) (6, 512) (168, 6)
6 sentences above 0.7 threshold
data/en_docs/id_10404.sent
data/th_docs/id_10404.sent
(279, 512) (24, 512) (279, 24)
28 sentences above 0.7 threshold
data/en_docs/id_10405.sent
data/th_docs/id_10405.sent
(666, 512) (12, 512) (666, 12)
16 sentences above 0.7 threshold
data/en_docs

(66, 512) (3, 512) (66, 3)
5 sentences above 0.7 threshold
data/en_docs/id_10466.sent
data/th_docs/id_10466.sent
(717, 512) (3, 512) (717, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10467.sent
data/th_docs/id_10467.sent
(78, 512) (3, 512) (78, 3)
6 sentences above 0.7 threshold
data/en_docs/id_10468.sent
data/th_docs/id_10468.sent
(93, 512) (3, 512) (93, 3)
6 sentences above 0.7 threshold
data/en_docs/id_10469.sent
data/th_docs/id_10469.sent
(30, 512) (3, 512) (30, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1047.sent
data/th_docs/id_1047.sent
(279, 512) (3, 512) (279, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10470.sent
data/th_docs/id_10470.sent
(174, 512) (3, 512) (174, 3)
4 sentences above 0.7 threshold
data/en_docs/id_10471.sent
data/th_docs/id_10471.sent
(18, 512) (9, 512) (18, 9)
13 sentences above 0.7 threshold
data/en_docs/id_10472.sent
data/th_docs/id_10472.sent
(42, 512) (30, 512) (42, 30)
14 sentences above 0.7 threshold
data/en_docs/id_10473.sent
da

3 sentences above 0.7 threshold
data/en_docs/id_10530.sent
data/th_docs/id_10530.sent
(222, 512) (45, 512) (222, 45)
16 sentences above 0.7 threshold
data/en_docs/id_10531.sent
data/th_docs/id_10531.sent
(312, 512) (24, 512) (312, 24)
37 sentences above 0.7 threshold
data/en_docs/id_10532.sent
data/th_docs/id_10532.sent
(171, 512) (12, 512) (171, 12)
21 sentences above 0.7 threshold
data/en_docs/id_10533.sent
data/th_docs/id_10533.sent
(87, 512) (18, 512) (87, 18)
9 sentences above 0.7 threshold
data/en_docs/id_10534.sent
data/th_docs/id_10534.sent
(153, 512) (33, 512) (153, 33)
1 sentences above 0.7 threshold
data/en_docs/id_10535.sent
data/th_docs/id_10535.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_10536.sent
data/th_docs/id_10536.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10537.sent
data/th_docs/id_10537.sent
(975, 512) (66, 512) (975, 66)
96 sentences above 0.7 threshold
data/en_docs/id_10538.sent
data/th_docs/id

(30, 512) (3, 512) (30, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1060.sent
data/th_docs/id_1060.sent
(12, 512) (57, 512) (12, 57)
1 sentences above 0.7 threshold
data/en_docs/id_10600.sent
data/th_docs/id_10600.sent
(24, 512) (12, 512) (24, 12)
6 sentences above 0.7 threshold
data/en_docs/id_10601.sent
data/th_docs/id_10601.sent
(36, 512) (12, 512) (36, 12)
3 sentences above 0.7 threshold
data/en_docs/id_10602.sent
data/th_docs/id_10602.sent
(897, 512) (9, 512) (897, 9)
13 sentences above 0.7 threshold
data/en_docs/id_10603.sent
data/th_docs/id_10603.sent
(33, 512) (12, 512) (33, 12)
7 sentences above 0.7 threshold
data/en_docs/id_10604.sent
data/th_docs/id_10604.sent
(78, 512) (12, 512) (78, 12)
13 sentences above 0.7 threshold
data/en_docs/id_10605.sent
data/th_docs/id_10605.sent
(183, 512) (24, 512) (183, 24)
7 sentences above 0.7 threshold
data/en_docs/id_10606.sent
data/th_docs/id_10606.sent
(210, 512) (3, 512) (210, 3)
5 sentences above 0.7 threshold
data/en_docs/id_106

(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10665.sent
data/th_docs/id_10665.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10666.sent
data/th_docs/id_10666.sent
(405, 512) (6, 512) (405, 6)
1 sentences above 0.7 threshold
data/en_docs/id_10667.sent
data/th_docs/id_10667.sent
(1257, 512) (78, 512) (1257, 78)
20 sentences above 0.7 threshold
data/en_docs/id_10668.sent
data/th_docs/id_10668.sent
(387, 512) (84, 512) (387, 84)
96 sentences above 0.7 threshold
data/en_docs/id_10669.sent
data/th_docs/id_10669.sent
(387, 512) (3, 512) (387, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1067.sent
data/th_docs/id_1067.sent
(30, 512) (3, 512) (30, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10670.sent
data/th_docs/id_10670.sent
(120, 512) (18, 512) (120, 18)
10 sentences above 0.7 threshold
data/en_docs/id_10671.sent
data/th_docs/id_10671.sent
(105, 512) (12, 512) (105, 12)
9 sentences above 0.7 threshold
data/en_docs/id_10

3 sentences above 0.7 threshold
data/en_docs/id_10733.sent
data/th_docs/id_10733.sent
(525, 512) (144, 512) (525, 144)
25 sentences above 0.7 threshold
data/en_docs/id_10734.sent
data/th_docs/id_10734.sent
(90, 512) (15, 512) (90, 15)
5 sentences above 0.7 threshold
data/en_docs/id_10735.sent
data/th_docs/id_10735.sent
(3, 512) (6, 512) (3, 6)
1 sentences above 0.7 threshold
data/en_docs/id_10736.sent
data/th_docs/id_10736.sent
(3, 512) (42, 512) (3, 42)
1 sentences above 0.7 threshold
data/en_docs/id_10737.sent
data/th_docs/id_10737.sent
(57, 512) (3, 512) (57, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10738.sent
data/th_docs/id_10738.sent
(90, 512) (3, 512) (90, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10739.sent
data/th_docs/id_10739.sent
(639, 512) (6, 512) (639, 6)
19 sentences above 0.7 threshold
data/en_docs/id_1074.sent
data/th_docs/id_1074.sent
(60, 512) (117, 512) (60, 117)
4 sentences above 0.7 threshold
data/en_docs/id_10740.sent
data/th_docs/id_10740.sen

(99, 512) (3, 512) (99, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10802.sent
data/th_docs/id_10802.sent
(51, 512) (3, 512) (51, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10803.sent
data/th_docs/id_10803.sent
(12, 512) (3, 512) (12, 3)
4 sentences above 0.7 threshold
data/en_docs/id_10804.sent
data/th_docs/id_10804.sent
(12, 512) (3, 512) (12, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10805.sent
data/th_docs/id_10805.sent
(18, 512) (6, 512) (18, 6)
3 sentences above 0.7 threshold
data/en_docs/id_10806.sent
data/th_docs/id_10806.sent
(21, 512) (3, 512) (21, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10807.sent
data/th_docs/id_10807.sent
(21, 512) (3, 512) (21, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10808.sent
data/th_docs/id_10808.sent
(51, 512) (3, 512) (51, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10809.sent
data/th_docs/id_10809.sent
(12, 512) (3, 512) (12, 3)
4 sentences above 0.7 threshold
data/en_docs/id_1081.sent
data/th_doc

6 sentences above 0.7 threshold
data/en_docs/id_10869.sent
data/th_docs/id_10869.sent
(33, 512) (72, 512) (33, 72)
1 sentences above 0.7 threshold
data/en_docs/id_1087.sent
data/th_docs/id_1087.sent
(252, 512) (3, 512) (252, 3)
4 sentences above 0.7 threshold
data/en_docs/id_10870.sent
data/th_docs/id_10870.sent
(3, 512) (189, 512) (3, 189)
1 sentences above 0.7 threshold
data/en_docs/id_10871.sent
data/th_docs/id_10871.sent
(117, 512) (9, 512) (117, 9)
9 sentences above 0.7 threshold
data/en_docs/id_10872.sent
data/th_docs/id_10872.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_10873.sent
data/th_docs/id_10873.sent
(105, 512) (3, 512) (105, 3)
1 sentences above 0.7 threshold
data/en_docs/id_10874.sent
data/th_docs/id_10874.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_10875.sent
data/th_docs/id_10875.sent
(54, 512) (9, 512) (54, 9)
4 sentences above 0.7 threshold
data/en_docs/id_10876.sent
data/th_docs/id_10876.sent
(3, 51

(534, 512) (6, 512) (534, 6)
5 sentences above 0.7 threshold
data/en_docs/id_10934.sent
data/th_docs/id_10934.sent
(6, 512) (6, 512) (6, 6)
2 sentences above 0.7 threshold
data/en_docs/id_10935.sent
data/th_docs/id_10935.sent
(3, 512) (42, 512) (3, 42)
1 sentences above 0.7 threshold
data/en_docs/id_10936.sent
data/th_docs/id_10936.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_10937.sent
data/th_docs/id_10937.sent
(150, 512) (60, 512) (150, 60)
20 sentences above 0.7 threshold
data/en_docs/id_10938.sent
data/th_docs/id_10938.sent
(15, 512) (45, 512) (15, 45)
5 sentences above 0.7 threshold
data/en_docs/id_10939.sent
data/th_docs/id_10939.sent
(30, 512) (54, 512) (30, 54)
9 sentences above 0.7 threshold
data/en_docs/id_1094.sent
data/th_docs/id_1094.sent
(192, 512) (9, 512) (192, 9)
10 sentences above 0.7 threshold
data/en_docs/id_10940.sent
data/th_docs/id_10940.sent
(6, 512) (51, 512) (6, 51)
1 sentences above 0.7 threshold
data/en_docs/id_10941.sent
da

(24, 512) (3, 512) (24, 3)
6 sentences above 0.7 threshold
data/en_docs/id_110.sent
data/th_docs/id_110.sent
(402, 512) (78, 512) (402, 78)
77 sentences above 0.7 threshold
data/en_docs/id_1100.sent
data/th_docs/id_1100.sent
(75, 512) (3, 512) (75, 3)
5 sentences above 0.7 threshold
data/en_docs/id_11000.sent
data/th_docs/id_11000.sent
(78, 512) (3, 512) (78, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11001.sent
data/th_docs/id_11001.sent
(78, 512) (3, 512) (78, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11002.sent
data/th_docs/id_11002.sent
(54, 512) (3, 512) (54, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11003.sent
data/th_docs/id_11003.sent
(24, 512) (3, 512) (24, 3)
6 sentences above 0.7 threshold
data/en_docs/id_11004.sent
data/th_docs/id_11004.sent
(402, 512) (3, 512) (402, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11005.sent
data/th_docs/id_11005.sent
(15, 512) (9, 512) (15, 9)
8 sentences above 0.7 threshold
data/en_docs/id_11006.sent
data/th_d

(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11073.sent
data/th_docs/id_11073.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11074.sent
data/th_docs/id_11074.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11075.sent
data/th_docs/id_11075.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11076.sent
data/th_docs/id_11076.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11077.sent
data/th_docs/id_11077.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11078.sent
data/th_docs/id_11078.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11079.sent
data/th_docs/id_11079.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1108.sent
data/th_docs/id_1108.sent
(54, 512) (24, 512) (54, 24)
1 sentences above 0.7 threshold
data/en_docs/id_11080.sent
data/th_docs/id_11080.sent

9 sentences above 0.7 threshold
data/en_docs/id_11139.sent
data/th_docs/id_11139.sent
(762, 512) (327, 512) (762, 327)
253 sentences above 0.7 threshold
data/en_docs/id_1114.sent
data/th_docs/id_1114.sent
(165, 512) (3, 512) (165, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11140.sent
data/th_docs/id_11140.sent
(111, 512) (468, 512) (111, 468)
9 sentences above 0.7 threshold
data/en_docs/id_11141.sent
data/th_docs/id_11141.sent
(3, 512) (15, 512) (3, 15)
1 sentences above 0.7 threshold
data/en_docs/id_11142.sent
data/th_docs/id_11142.sent
(24, 512) (6, 512) (24, 6)
1 sentences above 0.7 threshold
data/en_docs/id_11143.sent
data/th_docs/id_11143.sent
(237, 512) (6, 512) (237, 6)
22 sentences above 0.7 threshold
data/en_docs/id_11144.sent
data/th_docs/id_11144.sent
(36, 512) (3, 512) (36, 3)
6 sentences above 0.7 threshold
data/en_docs/id_11145.sent
data/th_docs/id_11145.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_11146.sent
data/th_docs/id_11146.

(18, 512) (21, 512) (18, 21)
2 sentences above 0.7 threshold
data/en_docs/id_11207.sent
data/th_docs/id_11207.sent
(105, 512) (12, 512) (105, 12)
13 sentences above 0.7 threshold
data/en_docs/id_11208.sent
data/th_docs/id_11208.sent
(12, 512) (3, 512) (12, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11209.sent
data/th_docs/id_11209.sent
(12, 512) (3, 512) (12, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1121.sent
data/th_docs/id_1121.sent
(33, 512) (9, 512) (33, 9)
8 sentences above 0.7 threshold
data/en_docs/id_11210.sent
data/th_docs/id_11210.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11211.sent
data/th_docs/id_11211.sent
(342, 512) (69, 512) (342, 69)
15 sentences above 0.7 threshold
data/en_docs/id_11212.sent
data/th_docs/id_11212.sent
(117, 512) (54, 512) (117, 54)
2 sentences above 0.7 threshold
data/en_docs/id_11213.sent
data/th_docs/id_11213.sent
(63, 512) (141, 512) (63, 141)
2 sentences above 0.7 threshold
data/en_docs/id_11214

45 sentences above 0.7 threshold
data/en_docs/id_11274.sent
data/th_docs/id_11274.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11275.sent
data/th_docs/id_11275.sent
(9, 512) (3, 512) (9, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11276.sent
data/th_docs/id_11276.sent
(15, 512) (3, 512) (15, 3)
5 sentences above 0.7 threshold
data/en_docs/id_11277.sent
data/th_docs/id_11277.sent
(285, 512) (15, 512) (285, 15)
6 sentences above 0.7 threshold
data/en_docs/id_11278.sent
data/th_docs/id_11278.sent
(3, 512) (90, 512) (3, 90)
1 sentences above 0.7 threshold
data/en_docs/id_11279.sent
data/th_docs/id_11279.sent
(24, 512) (39, 512) (24, 39)
1 sentences above 0.7 threshold
data/en_docs/id_1128.sent
data/th_docs/id_1128.sent
(168, 512) (9, 512) (168, 9)
10 sentences above 0.7 threshold
data/en_docs/id_11280.sent
data/th_docs/id_11280.sent
(84, 512) (27, 512) (84, 27)
25 sentences above 0.7 threshold
data/en_docs/id_11281.sent
data/th_docs/id_11281.sent
(63

17 sentences above 0.7 threshold
data/en_docs/id_1134.sent
data/th_docs/id_1134.sent
(60, 512) (3, 512) (60, 3)
6 sentences above 0.7 threshold
data/en_docs/id_11340.sent
data/th_docs/id_11340.sent
(3, 512) (9, 512) (3, 9)
1 sentences above 0.7 threshold
data/en_docs/id_11341.sent
data/th_docs/id_11341.sent
(141, 512) (6, 512) (141, 6)
1 sentences above 0.7 threshold
data/en_docs/id_11342.sent
data/th_docs/id_11342.sent
(1269, 512) (6, 512) (1269, 6)
4 sentences above 0.7 threshold
data/en_docs/id_11343.sent
data/th_docs/id_11343.sent
(891, 512) (24, 512) (891, 24)
4 sentences above 0.7 threshold
data/en_docs/id_11344.sent
data/th_docs/id_11344.sent
(249, 512) (21, 512) (249, 21)
5 sentences above 0.7 threshold
data/en_docs/id_11345.sent
data/th_docs/id_11345.sent
(75, 512) (9, 512) (75, 9)
1 sentences above 0.7 threshold
data/en_docs/id_11346.sent
data/th_docs/id_11346.sent
(150, 512) (12, 512) (150, 12)
10 sentences above 0.7 threshold
data/en_docs/id_11347.sent
data/th_docs/id_11347

(297, 512) (33, 512) (297, 33)
17 sentences above 0.7 threshold
data/en_docs/id_11406.sent
data/th_docs/id_11406.sent
(447, 512) (6, 512) (447, 6)
5 sentences above 0.7 threshold
data/en_docs/id_11407.sent
data/th_docs/id_11407.sent
(24, 512) (54, 512) (24, 54)
5 sentences above 0.7 threshold
data/en_docs/id_11408.sent
data/th_docs/id_11408.sent
(687, 512) (9, 512) (687, 9)
16 sentences above 0.7 threshold
data/en_docs/id_11409.sent
data/th_docs/id_11409.sent
(690, 512) (12, 512) (690, 12)
16 sentences above 0.7 threshold
data/en_docs/id_1141.sent
data/th_docs/id_1141.sent
(48, 512) (9, 512) (48, 9)
6 sentences above 0.7 threshold
data/en_docs/id_11410.sent
data/th_docs/id_11410.sent
(33, 512) (3, 512) (33, 3)
1 sentences above 0.7 threshold
data/en_docs/id_11411.sent
data/th_docs/id_11411.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11412.sent
data/th_docs/id_11412.sent
(81, 512) (15, 512) (81, 15)
1 sentences above 0.7 threshold
data/en_docs/id_11413.

4 sentences above 0.7 threshold
data/en_docs/id_11472.sent
data/th_docs/id_11472.sent
(234, 512) (12, 512) (234, 12)
1 sentences above 0.7 threshold
data/en_docs/id_11473.sent
data/th_docs/id_11473.sent
(3, 512) (132, 512) (3, 132)
1 sentences above 0.7 threshold
data/en_docs/id_11474.sent
data/th_docs/id_11474.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11475.sent
data/th_docs/id_11475.sent
(27, 512) (66, 512) (27, 66)
3 sentences above 0.7 threshold
data/en_docs/id_11476.sent
data/th_docs/id_11476.sent
(108, 512) (51, 512) (108, 51)
23 sentences above 0.7 threshold
data/en_docs/id_11477.sent
data/th_docs/id_11477.sent
(3, 512) (12, 512) (3, 12)
1 sentences above 0.7 threshold
data/en_docs/id_11478.sent
data/th_docs/id_11478.sent
(51, 512) (6, 512) (51, 6)
6 sentences above 0.7 threshold
data/en_docs/id_11479.sent
data/th_docs/id_11479.sent
(222, 512) (150, 512) (222, 150)
107 sentences above 0.7 threshold
data/en_docs/id_1148.sent
data/th_docs/id_114

1 sentences above 0.7 threshold
data/en_docs/id_11537.sent
data/th_docs/id_11537.sent
(126, 512) (3, 512) (126, 3)
1 sentences above 0.7 threshold
data/en_docs/id_11538.sent
data/th_docs/id_11538.sent
(126, 512) (6, 512) (126, 6)
2 sentences above 0.7 threshold
data/en_docs/id_11539.sent
data/th_docs/id_11539.sent
(60, 512) (3, 512) (60, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1154.sent
data/th_docs/id_1154.sent
(2844, 512) (57, 512) (2844, 57)
80 sentences above 0.7 threshold
data/en_docs/id_11540.sent
data/th_docs/id_11540.sent
(126, 512) (21, 512) (126, 21)
4 sentences above 0.7 threshold
data/en_docs/id_11541.sent
data/th_docs/id_11541.sent
(1572, 512) (6, 512) (1572, 6)
1 sentences above 0.7 threshold
data/en_docs/id_11542.sent
data/th_docs/id_11542.sent
(99, 512) (3, 512) (99, 3)
6 sentences above 0.7 threshold
data/en_docs/id_11543.sent
data/th_docs/id_11543.sent
(186, 512) (9, 512) (186, 9)
11 sentences above 0.7 threshold
data/en_docs/id_11544.sent
data/th_docs/id_1

19 sentences above 0.7 threshold
data/en_docs/id_11602.sent
data/th_docs/id_11602.sent
(318, 512) (3, 512) (318, 3)
1 sentences above 0.7 threshold
data/en_docs/id_11603.sent
data/th_docs/id_11603.sent
(90, 512) (93, 512) (90, 93)
1 sentences above 0.7 threshold
data/en_docs/id_11604.sent
data/th_docs/id_11604.sent
(126, 512) (27, 512) (126, 27)
9 sentences above 0.7 threshold
data/en_docs/id_11605.sent
data/th_docs/id_11605.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11606.sent
data/th_docs/id_11606.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11607.sent
data/th_docs/id_11607.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11608.sent
data/th_docs/id_11608.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11609.sent
data/th_docs/id_11609.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1161.sent
data/th_docs/id_1161.sent
(99, 512) (33

61 sentences above 0.7 threshold
data/en_docs/id_11669.sent
data/th_docs/id_11669.sent
(2046, 512) (129, 512) (2046, 129)
1 sentences above 0.7 threshold
data/en_docs/id_1167.sent
data/th_docs/id_1167.sent
(609, 512) (18, 512) (609, 18)
1 sentences above 0.7 threshold
data/en_docs/id_11670.sent
data/th_docs/id_11670.sent
(135, 512) (15, 512) (135, 15)
16 sentences above 0.7 threshold
data/en_docs/id_11671.sent
data/th_docs/id_11671.sent
(525, 512) (84, 512) (525, 84)
36 sentences above 0.7 threshold
data/en_docs/id_11672.sent
data/th_docs/id_11672.sent
(30, 512) (123, 512) (30, 123)
8 sentences above 0.7 threshold
data/en_docs/id_11673.sent
data/th_docs/id_11673.sent
(150, 512) (15, 512) (150, 15)
39 sentences above 0.7 threshold
data/en_docs/id_11674.sent
data/th_docs/id_11674.sent
(726, 512) (3, 512) (726, 3)
6 sentences above 0.7 threshold
data/en_docs/id_11675.sent
data/th_docs/id_11675.sent
(3, 512) (18, 512) (3, 18)
1 sentences above 0.7 threshold
data/en_docs/id_11676.sent
data/

(291, 512) (426, 512) (291, 426)
113 sentences above 0.7 threshold
data/en_docs/id_11735.sent
data/th_docs/id_11735.sent
(3, 512) (21, 512) (3, 21)
1 sentences above 0.7 threshold
data/en_docs/id_11736.sent
data/th_docs/id_11736.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11737.sent
data/th_docs/id_11737.sent
(69, 512) (18, 512) (69, 18)
2 sentences above 0.7 threshold
data/en_docs/id_11738.sent
data/th_docs/id_11738.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11739.sent
data/th_docs/id_11739.sent
(45, 512) (6, 512) (45, 6)
6 sentences above 0.7 threshold
data/en_docs/id_1174.sent
data/th_docs/id_1174.sent
(1284, 512) (585, 512) (1284, 585)
513 sentences above 0.7 threshold
data/en_docs/id_11740.sent
data/th_docs/id_11740.sent
(129, 512) (9, 512) (129, 9)
1 sentences above 0.7 threshold
data/en_docs/id_11741.sent
data/th_docs/id_11741.sent
(3, 512) (12, 512) (3, 12)
1 sentences above 0.7 threshold
data/en_docs/id_11742

20 sentences above 0.7 threshold
data/en_docs/id_1180.sent
data/th_docs/id_1180.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_11800.sent
data/th_docs/id_11800.sent
(108, 512) (33, 512) (108, 33)
9 sentences above 0.7 threshold
data/en_docs/id_11801.sent
data/th_docs/id_11801.sent
(243, 512) (15, 512) (243, 15)
9 sentences above 0.7 threshold
data/en_docs/id_11802.sent
data/th_docs/id_11802.sent
(75, 512) (12, 512) (75, 12)
6 sentences above 0.7 threshold
data/en_docs/id_11803.sent
data/th_docs/id_11803.sent
(267, 512) (36, 512) (267, 36)
32 sentences above 0.7 threshold
data/en_docs/id_11804.sent
data/th_docs/id_11804.sent
(192, 512) (27, 512) (192, 27)
24 sentences above 0.7 threshold
data/en_docs/id_11805.sent
data/th_docs/id_11805.sent
(156, 512) (75, 512) (156, 75)
10 sentences above 0.7 threshold
data/en_docs/id_11806.sent
data/th_docs/id_11806.sent
(378, 512) (54, 512) (378, 54)
28 sentences above 0.7 threshold
data/en_docs/id_11807.sent
data/th_do

4 sentences above 0.7 threshold
data/en_docs/id_11866.sent
data/th_docs/id_11866.sent
(24, 512) (45, 512) (24, 45)
4 sentences above 0.7 threshold
data/en_docs/id_11867.sent
data/th_docs/id_11867.sent
(33, 512) (27, 512) (33, 27)
2 sentences above 0.7 threshold
data/en_docs/id_11868.sent
data/th_docs/id_11868.sent
(78, 512) (72, 512) (78, 72)
7 sentences above 0.7 threshold
data/en_docs/id_11869.sent
data/th_docs/id_11869.sent
(1425, 512) (9, 512) (1425, 9)
7 sentences above 0.7 threshold
data/en_docs/id_1187.sent
data/th_docs/id_1187.sent
(111, 512) (12, 512) (111, 12)
1 sentences above 0.7 threshold
data/en_docs/id_11870.sent
data/th_docs/id_11870.sent
(366, 512) (3, 512) (366, 3)
5 sentences above 0.7 threshold
data/en_docs/id_11871.sent
data/th_docs/id_11871.sent
(174, 512) (18, 512) (174, 18)
12 sentences above 0.7 threshold
data/en_docs/id_11872.sent
data/th_docs/id_11872.sent
(735, 512) (57, 512) (735, 57)
93 sentences above 0.7 threshold
data/en_docs/id_11873.sent
data/th_docs/

(36, 512) (72, 512) (36, 72)
1 sentences above 0.7 threshold
data/en_docs/id_11930.sent
data/th_docs/id_11930.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_11931.sent
data/th_docs/id_11931.sent
(105, 512) (120, 512) (105, 120)
62 sentences above 0.7 threshold
data/en_docs/id_11932.sent
data/th_docs/id_11932.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_11933.sent
data/th_docs/id_11933.sent
(156, 512) (51, 512) (156, 51)
26 sentences above 0.7 threshold
data/en_docs/id_11934.sent
data/th_docs/id_11934.sent
(237, 512) (18, 512) (237, 18)
5 sentences above 0.7 threshold
data/en_docs/id_11935.sent
data/th_docs/id_11935.sent
(222, 512) (30, 512) (222, 30)
10 sentences above 0.7 threshold
data/en_docs/id_11936.sent
data/th_docs/id_11936.sent
(1176, 512) (36, 512) (1176, 36)
101 sentences above 0.7 threshold
data/en_docs/id_11937.sent
data/th_docs/id_11937.sent
(3, 512) (27, 512) (3, 27)
1 sentences above 0.7 threshold
data/en_do

1 sentences above 0.7 threshold
data/en_docs/id_11995.sent
data/th_docs/id_11995.sent
(156, 512) (6, 512) (156, 6)
12 sentences above 0.7 threshold
data/en_docs/id_11996.sent
data/th_docs/id_11996.sent
(84, 512) (12, 512) (84, 12)
6 sentences above 0.7 threshold
data/en_docs/id_11997.sent
data/th_docs/id_11997.sent
(33, 512) (45, 512) (33, 45)
14 sentences above 0.7 threshold
data/en_docs/id_11998.sent
data/th_docs/id_11998.sent
(102, 512) (57, 512) (102, 57)
4 sentences above 0.7 threshold
data/en_docs/id_11999.sent
data/th_docs/id_11999.sent
(117, 512) (30, 512) (117, 30)
34 sentences above 0.7 threshold
data/en_docs/id_12.sent
data/th_docs/id_12.sent
(42, 512) (3, 512) (42, 3)
4 sentences above 0.7 threshold
data/en_docs/id_120.sent
data/th_docs/id_120.sent
(921, 512) (15, 512) (921, 15)
52 sentences above 0.7 threshold
data/en_docs/id_1200.sent
data/th_docs/id_1200.sent
(57, 512) (9, 512) (57, 9)
3 sentences above 0.7 threshold
data/en_docs/id_12000.sent
data/th_docs/id_12000.sent


(153, 512) (9, 512) (153, 9)
3 sentences above 0.7 threshold
data/en_docs/id_12060.sent
data/th_docs/id_12060.sent
(402, 512) (15, 512) (402, 15)
13 sentences above 0.7 threshold
data/en_docs/id_12061.sent
data/th_docs/id_12061.sent
(90, 512) (3, 512) (90, 3)
6 sentences above 0.7 threshold
data/en_docs/id_12062.sent
data/th_docs/id_12062.sent
(63, 512) (3, 512) (63, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12063.sent
data/th_docs/id_12063.sent
(21, 512) (3, 512) (21, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12064.sent
data/th_docs/id_12064.sent
(18, 512) (3, 512) (18, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12065.sent
data/th_docs/id_12065.sent
(9, 512) (3, 512) (9, 3)
4 sentences above 0.7 threshold
data/en_docs/id_12066.sent
data/th_docs/id_12066.sent
(108, 512) (3, 512) (108, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12067.sent
data/th_docs/id_12067.sent
(33, 512) (3, 512) (33, 3)
5 sentences above 0.7 threshold
data/en_docs/id_12068.sent
dat

1 sentences above 0.7 threshold
data/en_docs/id_12129.sent
data/th_docs/id_12129.sent
(9, 512) (12, 512) (9, 12)
1 sentences above 0.7 threshold
data/en_docs/id_1213.sent
data/th_docs/id_1213.sent
(6, 512) (12, 512) (6, 12)
5 sentences above 0.7 threshold
data/en_docs/id_12130.sent
data/th_docs/id_12130.sent
(12, 512) (12, 512) (12, 12)
1 sentences above 0.7 threshold
data/en_docs/id_12131.sent
data/th_docs/id_12131.sent
(1215, 512) (45, 512) (1215, 45)
23 sentences above 0.7 threshold
data/en_docs/id_12132.sent
data/th_docs/id_12132.sent
(216, 512) (21, 512) (216, 21)
13 sentences above 0.7 threshold
data/en_docs/id_12133.sent
data/th_docs/id_12133.sent
(60, 512) (123, 512) (60, 123)
21 sentences above 0.7 threshold
data/en_docs/id_12134.sent
data/th_docs/id_12134.sent
(210, 512) (42, 512) (210, 42)
1 sentences above 0.7 threshold
data/en_docs/id_12135.sent
data/th_docs/id_12135.sent
(87, 512) (6, 512) (87, 6)
10 sentences above 0.7 threshold
data/en_docs/id_12136.sent
data/th_docs/id

3 sentences above 0.7 threshold
data/en_docs/id_122.sent
data/th_docs/id_122.sent
(99, 512) (6, 512) (99, 6)
1 sentences above 0.7 threshold
data/en_docs/id_1220.sent
data/th_docs/id_1220.sent
(1386, 512) (342, 512) (1386, 342)
165 sentences above 0.7 threshold
data/en_docs/id_12200.sent
data/th_docs/id_12200.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12201.sent
data/th_docs/id_12201.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12202.sent
data/th_docs/id_12202.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12203.sent
data/th_docs/id_12203.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12204.sent
data/th_docs/id_12204.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12205.sent
data/th_docs/id_12205.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12206.sent
data/th_docs/id_12206.sent
(3, 512) (3, 512) 

12 sentences above 0.7 threshold
data/en_docs/id_12270.sent
data/th_docs/id_12270.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12271.sent
data/th_docs/id_12271.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12272.sent
data/th_docs/id_12272.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12273.sent
data/th_docs/id_12273.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12274.sent
data/th_docs/id_12274.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12275.sent
data/th_docs/id_12275.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12276.sent
data/th_docs/id_12276.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12277.sent
data/th_docs/id_12277.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12278.sent
data/th_docs/id_12278.sent
(3, 512) (3, 512) (3, 3)


(183, 512) (3, 512) (183, 3)
5 sentences above 0.7 threshold
data/en_docs/id_1234.sent
data/th_docs/id_1234.sent
(9, 512) (12, 512) (9, 12)
1 sentences above 0.7 threshold
data/en_docs/id_12340.sent
data/th_docs/id_12340.sent
(135, 512) (18, 512) (135, 18)
2 sentences above 0.7 threshold
data/en_docs/id_12341.sent
data/th_docs/id_12341.sent
(165, 512) (15, 512) (165, 15)
2 sentences above 0.7 threshold
data/en_docs/id_12342.sent
data/th_docs/id_12342.sent
(720, 512) (18, 512) (720, 18)
13 sentences above 0.7 threshold
data/en_docs/id_12343.sent
data/th_docs/id_12343.sent
(87, 512) (120, 512) (87, 120)
2 sentences above 0.7 threshold
data/en_docs/id_12344.sent
data/th_docs/id_12344.sent
(54, 512) (21, 512) (54, 21)
2 sentences above 0.7 threshold
data/en_docs/id_12345.sent
data/th_docs/id_12345.sent
(21, 512) (36, 512) (21, 36)
1 sentences above 0.7 threshold
data/en_docs/id_12346.sent
data/th_docs/id_12346.sent
(9, 512) (36, 512) (9, 36)
1 sentences above 0.7 threshold
data/en_docs/id_

(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12406.sent
data/th_docs/id_12406.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12407.sent
data/th_docs/id_12407.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12408.sent
data/th_docs/id_12408.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12409.sent
data/th_docs/id_12409.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1241.sent
data/th_docs/id_1241.sent
(66, 512) (45, 512) (66, 45)
28 sentences above 0.7 threshold
data/en_docs/id_12410.sent
data/th_docs/id_12410.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12411.sent
data/th_docs/id_12411.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12412.sent
data/th_docs/id_12412.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_12413.sent
data/th_docs/id_12413.sen

3 sentences above 0.7 threshold
data/en_docs/id_12476.sent
data/th_docs/id_12476.sent
(15, 512) (9, 512) (15, 9)
5 sentences above 0.7 threshold
data/en_docs/id_12477.sent
data/th_docs/id_12477.sent
(195, 512) (138, 512) (195, 138)
53 sentences above 0.7 threshold
data/en_docs/id_12478.sent
data/th_docs/id_12478.sent
(249, 512) (132, 512) (249, 132)
114 sentences above 0.7 threshold
data/en_docs/id_12479.sent
data/th_docs/id_12479.sent
(6, 512) (3, 512) (6, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1248.sent
data/th_docs/id_1248.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12480.sent
data/th_docs/id_12480.sent
(192, 512) (168, 512) (192, 168)
86 sentences above 0.7 threshold
data/en_docs/id_12481.sent
data/th_docs/id_12481.sent
(300, 512) (117, 512) (300, 117)
16 sentences above 0.7 threshold
data/en_docs/id_12482.sent
data/th_docs/id_12482.sent
(42, 512) (12, 512) (42, 12)
1 sentences above 0.7 threshold
data/en_docs/id_12483.sent
data/th_docs

2 sentences above 0.7 threshold
data/en_docs/id_12546.sent
data/th_docs/id_12546.sent
(9, 512) (3, 512) (9, 3)
6 sentences above 0.7 threshold
data/en_docs/id_12547.sent
data/th_docs/id_12547.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_12548.sent
data/th_docs/id_12548.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12549.sent
data/th_docs/id_12549.sent
(219, 512) (162, 512) (219, 162)
96 sentences above 0.7 threshold
data/en_docs/id_1255.sent
data/th_docs/id_1255.sent
(12, 512) (57, 512) (12, 57)
7 sentences above 0.7 threshold
data/en_docs/id_12550.sent
data/th_docs/id_12550.sent
(120, 512) (6, 512) (120, 6)
1 sentences above 0.7 threshold
data/en_docs/id_12551.sent
data/th_docs/id_12551.sent
(63, 512) (27, 512) (63, 27)
28 sentences above 0.7 threshold
data/en_docs/id_12552.sent
data/th_docs/id_12552.sent
(48, 512) (9, 512) (48, 9)
5 sentences above 0.7 threshold
data/en_docs/id_12553.sent
data/th_docs/id_12553.sent
(63,

1 sentences above 0.7 threshold
data/en_docs/id_12617.sent
data/th_docs/id_12617.sent
(54, 512) (30, 512) (54, 30)
8 sentences above 0.7 threshold
data/en_docs/id_12618.sent
data/th_docs/id_12618.sent
(57, 512) (21, 512) (57, 21)
4 sentences above 0.7 threshold
data/en_docs/id_12619.sent
data/th_docs/id_12619.sent
(24, 512) (3, 512) (24, 3)
6 sentences above 0.7 threshold
data/en_docs/id_1262.sent
data/th_docs/id_1262.sent
(3, 512) (21, 512) (3, 21)
1 sentences above 0.7 threshold
data/en_docs/id_12620.sent
data/th_docs/id_12620.sent
(66, 512) (12, 512) (66, 12)
1 sentences above 0.7 threshold
data/en_docs/id_12621.sent
data/th_docs/id_12621.sent
(9, 512) (3, 512) (9, 3)
5 sentences above 0.7 threshold
data/en_docs/id_12622.sent
data/th_docs/id_12622.sent
skipping...
data/en_docs/id_12623.sent
data/th_docs/id_12623.sent
(66, 512) (42, 512) (66, 42)
3 sentences above 0.7 threshold
data/en_docs/id_12624.sent
data/th_docs/id_12624.sent
(66, 512) (354, 512) (66, 354)
3 sentences above 0.7 

26 sentences above 0.7 threshold
data/en_docs/id_12684.sent
data/th_docs/id_12684.sent
(24, 512) (3, 512) (24, 3)
6 sentences above 0.7 threshold
data/en_docs/id_12685.sent
data/th_docs/id_12685.sent
(6, 512) (39, 512) (6, 39)
1 sentences above 0.7 threshold
data/en_docs/id_12686.sent
data/th_docs/id_12686.sent
(3, 512) (102, 512) (3, 102)
1 sentences above 0.7 threshold
data/en_docs/id_12687.sent
data/th_docs/id_12687.sent
(30, 512) (24, 512) (30, 24)
1 sentences above 0.7 threshold
data/en_docs/id_12688.sent
data/th_docs/id_12688.sent
(51, 512) (12, 512) (51, 12)
3 sentences above 0.7 threshold
data/en_docs/id_12689.sent
data/th_docs/id_12689.sent
(255, 512) (27, 512) (255, 27)
18 sentences above 0.7 threshold
data/en_docs/id_1269.sent
data/th_docs/id_1269.sent
(36, 512) (3, 512) (36, 3)
4 sentences above 0.7 threshold
data/en_docs/id_12690.sent
data/th_docs/id_12690.sent
(66, 512) (6, 512) (66, 6)
7 sentences above 0.7 threshold
data/en_docs/id_12691.sent
data/th_docs/id_12691.sent


118 sentences above 0.7 threshold
data/en_docs/id_12751.sent
data/th_docs/id_12751.sent
(63, 512) (9, 512) (63, 9)
6 sentences above 0.7 threshold
data/en_docs/id_12752.sent
data/th_docs/id_12752.sent
(87, 512) (9, 512) (87, 9)
6 sentences above 0.7 threshold
data/en_docs/id_12753.sent
data/th_docs/id_12753.sent
(84, 512) (9, 512) (84, 9)
4 sentences above 0.7 threshold
data/en_docs/id_12754.sent
data/th_docs/id_12754.sent
(27, 512) (33, 512) (27, 33)
21 sentences above 0.7 threshold
data/en_docs/id_12755.sent
data/th_docs/id_12755.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_12756.sent
data/th_docs/id_12756.sent
(219, 512) (165, 512) (219, 165)
99 sentences above 0.7 threshold
data/en_docs/id_12757.sent
data/th_docs/id_12757.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12758.sent
data/th_docs/id_12758.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_12759.sent
data/th_docs/id_12759.sent
(3, 

(135, 512) (3, 512) (135, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12822.sent
data/th_docs/id_12822.sent
(51, 512) (27, 512) (51, 27)
9 sentences above 0.7 threshold
data/en_docs/id_12823.sent
data/th_docs/id_12823.sent
(219, 512) (111, 512) (219, 111)
39 sentences above 0.7 threshold
data/en_docs/id_12824.sent
data/th_docs/id_12824.sent
(225, 512) (6, 512) (225, 6)
8 sentences above 0.7 threshold
data/en_docs/id_12825.sent
data/th_docs/id_12825.sent
(24, 512) (27, 512) (24, 27)
1 sentences above 0.7 threshold
data/en_docs/id_12826.sent
data/th_docs/id_12826.sent
(309, 512) (30, 512) (309, 30)
22 sentences above 0.7 threshold
data/en_docs/id_12827.sent
data/th_docs/id_12827.sent
(102, 512) (129, 512) (102, 129)
4 sentences above 0.7 threshold
data/en_docs/id_12828.sent
data/th_docs/id_12828.sent
(27, 512) (3, 512) (27, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12829.sent
data/th_docs/id_12829.sent
(51, 512) (39, 512) (51, 39)
4 sentences above 0.7 threshold
data/en_d

(36, 512) (3, 512) (36, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12888.sent
data/th_docs/id_12888.sent
(12, 512) (9, 512) (12, 9)
1 sentences above 0.7 threshold
data/en_docs/id_12889.sent
data/th_docs/id_12889.sent
(15, 512) (3, 512) (15, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1289.sent
data/th_docs/id_1289.sent
(57, 512) (60, 512) (57, 60)
33 sentences above 0.7 threshold
data/en_docs/id_12890.sent
data/th_docs/id_12890.sent
(282, 512) (15, 512) (282, 15)
6 sentences above 0.7 threshold
data/en_docs/id_12891.sent
data/th_docs/id_12891.sent
(12, 512) (6, 512) (12, 6)
9 sentences above 0.7 threshold
data/en_docs/id_12892.sent
data/th_docs/id_12892.sent
(21, 512) (6, 512) (21, 6)
12 sentences above 0.7 threshold
data/en_docs/id_12893.sent
data/th_docs/id_12893.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12894.sent
data/th_docs/id_12894.sent
(66, 512) (12, 512) (66, 12)
9 sentences above 0.7 threshold
data/en_docs/id_12895.sent
data

1 sentences above 0.7 threshold
data/en_docs/id_12952.sent
data/th_docs/id_12952.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12953.sent
data/th_docs/id_12953.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_12954.sent
data/th_docs/id_12954.sent
(33, 512) (12, 512) (33, 12)
9 sentences above 0.7 threshold
data/en_docs/id_12955.sent
data/th_docs/id_12955.sent
(60, 512) (45, 512) (60, 45)
2 sentences above 0.7 threshold
data/en_docs/id_12956.sent
data/th_docs/id_12956.sent
(9, 512) (15, 512) (9, 15)
1 sentences above 0.7 threshold
data/en_docs/id_12957.sent
data/th_docs/id_12957.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_12958.sent
data/th_docs/id_12958.sent
(3, 512) (9, 512) (3, 9)
1 sentences above 0.7 threshold
data/en_docs/id_12959.sent
data/th_docs/id_12959.sent
(126, 512) (36, 512) (126, 36)
4 sentences above 0.7 threshold
data/en_docs/id_1296.sent
data/th_docs/id_1296.sent
(252, 512) (

(291, 512) (6, 512) (291, 6)
13 sentences above 0.7 threshold
data/en_docs/id_13020.sent
data/th_docs/id_13020.sent
(3, 512) (99, 512) (3, 99)
1 sentences above 0.7 threshold
data/en_docs/id_13021.sent
data/th_docs/id_13021.sent
(3, 512) (45, 512) (3, 45)
1 sentences above 0.7 threshold
data/en_docs/id_13022.sent
data/th_docs/id_13022.sent
(3, 512) (36, 512) (3, 36)
1 sentences above 0.7 threshold
data/en_docs/id_13023.sent
data/th_docs/id_13023.sent
(12, 512) (6, 512) (12, 6)
1 sentences above 0.7 threshold
data/en_docs/id_13024.sent
data/th_docs/id_13024.sent
(321, 512) (30, 512) (321, 30)
1 sentences above 0.7 threshold
data/en_docs/id_13025.sent
data/th_docs/id_13025.sent
(9, 512) (3, 512) (9, 3)
6 sentences above 0.7 threshold
data/en_docs/id_13026.sent
data/th_docs/id_13026.sent
(9, 512) (3, 512) (9, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13027.sent
data/th_docs/id_13027.sent
(15, 512) (3, 512) (15, 3)
7 sentences above 0.7 threshold
data/en_docs/id_13028.sent
data/th

(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13087.sent
data/th_docs/id_13087.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13088.sent
data/th_docs/id_13088.sent
(51, 512) (9, 512) (51, 9)
9 sentences above 0.7 threshold
data/en_docs/id_13089.sent
data/th_docs/id_13089.sent
(72, 512) (3, 512) (72, 3)
5 sentences above 0.7 threshold
data/en_docs/id_1309.sent
data/th_docs/id_1309.sent
(93, 512) (3, 512) (93, 3)
5 sentences above 0.7 threshold
data/en_docs/id_13090.sent
data/th_docs/id_13090.sent
(9, 512) (3, 512) (9, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13091.sent
data/th_docs/id_13091.sent
(9, 512) (9, 512) (9, 9)
7 sentences above 0.7 threshold
data/en_docs/id_13092.sent
data/th_docs/id_13092.sent
(12, 512) (9, 512) (12, 9)
7 sentences above 0.7 threshold
data/en_docs/id_13093.sent
data/th_docs/id_13093.sent
(9, 512) (12, 512) (9, 12)
9 sentences above 0.7 threshold
data/en_docs/id_13094.sent
data/th_docs/id_1309

8 sentences above 0.7 threshold
data/en_docs/id_13156.sent
data/th_docs/id_13156.sent
(18, 512) (9, 512) (18, 9)
1 sentences above 0.7 threshold
data/en_docs/id_13157.sent
data/th_docs/id_13157.sent
(39, 512) (9, 512) (39, 9)
11 sentences above 0.7 threshold
data/en_docs/id_13158.sent
data/th_docs/id_13158.sent
(12, 512) (3, 512) (12, 3)
6 sentences above 0.7 threshold
data/en_docs/id_13159.sent
data/th_docs/id_13159.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_1316.sent
data/th_docs/id_1316.sent
(291, 512) (3, 512) (291, 3)
5 sentences above 0.7 threshold
data/en_docs/id_13160.sent
data/th_docs/id_13160.sent
(45, 512) (21, 512) (45, 21)
3 sentences above 0.7 threshold
data/en_docs/id_13161.sent
data/th_docs/id_13161.sent
(15, 512) (3, 512) (15, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13162.sent
data/th_docs/id_13162.sent
(21, 512) (120, 512) (21, 120)
1 sentences above 0.7 threshold
data/en_docs/id_13163.sent
data/th_docs/id_13163.sent
(318,

(333, 512) (3, 512) (333, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13222.sent
data/th_docs/id_13222.sent
(480, 512) (3, 512) (480, 3)
5 sentences above 0.7 threshold
data/en_docs/id_13223.sent
data/th_docs/id_13223.sent
(105, 512) (3, 512) (105, 3)
6 sentences above 0.7 threshold
data/en_docs/id_13224.sent
data/th_docs/id_13224.sent
(36, 512) (3, 512) (36, 3)
6 sentences above 0.7 threshold
data/en_docs/id_13225.sent
data/th_docs/id_13225.sent
(342, 512) (321, 512) (342, 321)
165 sentences above 0.7 threshold
data/en_docs/id_13226.sent
data/th_docs/id_13226.sent
(27, 512) (54, 512) (27, 54)
8 sentences above 0.7 threshold
data/en_docs/id_13227.sent
data/th_docs/id_13227.sent
(18, 512) (9, 512) (18, 9)
1 sentences above 0.7 threshold
data/en_docs/id_13228.sent
data/th_docs/id_13228.sent
(279, 512) (24, 512) (279, 24)
32 sentences above 0.7 threshold
data/en_docs/id_13229.sent
data/th_docs/id_13229.sent
(27, 512) (33, 512) (27, 33)
15 sentences above 0.7 threshold
data/en_docs/

(12, 512) (18, 512) (12, 18)
1 sentences above 0.7 threshold
data/en_docs/id_13291.sent
data/th_docs/id_13291.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13292.sent
data/th_docs/id_13292.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13293.sent
data/th_docs/id_13293.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13294.sent
data/th_docs/id_13294.sent
(66, 512) (21, 512) (66, 21)
3 sentences above 0.7 threshold
data/en_docs/id_13295.sent
data/th_docs/id_13295.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13296.sent
data/th_docs/id_13296.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13297.sent
data/th_docs/id_13297.sent
(87, 512) (6, 512) (87, 6)
6 sentences above 0.7 threshold
data/en_docs/id_13298.sent
data/th_docs/id_13298.sent
(57, 512) (6, 512) (57, 6)
1 sentences above 0.7 threshold
data/en_docs/id_13299.sent
data/th_docs/id_

(255, 512) (156, 512) (255, 156)
138 sentences above 0.7 threshold
data/en_docs/id_13357.sent
data/th_docs/id_13357.sent
(66, 512) (6, 512) (66, 6)
1 sentences above 0.7 threshold
data/en_docs/id_13358.sent
data/th_docs/id_13358.sent
(54, 512) (102, 512) (54, 102)
1 sentences above 0.7 threshold
data/en_docs/id_13359.sent
data/th_docs/id_13359.sent
(42, 512) (12, 512) (42, 12)
1 sentences above 0.7 threshold
data/en_docs/id_1336.sent
data/th_docs/id_1336.sent
(528, 512) (9, 512) (528, 9)
7 sentences above 0.7 threshold
data/en_docs/id_13360.sent
data/th_docs/id_13360.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13361.sent
data/th_docs/id_13361.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13362.sent
data/th_docs/id_13362.sent
(30, 512) (12, 512) (30, 12)
1 sentences above 0.7 threshold
data/en_docs/id_13363.sent
data/th_docs/id_13363.sent
(174, 512) (9, 512) (174, 9)
11 sentences above 0.7 threshold
data/en_docs/id_13364.

1 sentences above 0.7 threshold
data/en_docs/id_13421.sent
data/th_docs/id_13421.sent
(9, 512) (3, 512) (9, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13422.sent
data/th_docs/id_13422.sent
(15, 512) (6, 512) (15, 6)
1 sentences above 0.7 threshold
data/en_docs/id_13423.sent
data/th_docs/id_13423.sent
(45, 512) (12, 512) (45, 12)
3 sentences above 0.7 threshold
data/en_docs/id_13424.sent
data/th_docs/id_13424.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13425.sent
data/th_docs/id_13425.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13426.sent
data/th_docs/id_13426.sent
(234, 512) (33, 512) (234, 33)
10 sentences above 0.7 threshold
data/en_docs/id_13427.sent
data/th_docs/id_13427.sent
(33, 512) (114, 512) (33, 114)
5 sentences above 0.7 threshold
data/en_docs/id_13428.sent
data/th_docs/id_13428.sent
(24, 512) (3, 512) (24, 3)
6 sentences above 0.7 threshold
data/en_docs/id_13429.sent
data/th_docs/id_13429.sent
(3, 5

2 sentences above 0.7 threshold
data/en_docs/id_13493.sent
data/th_docs/id_13493.sent
(15, 512) (12, 512) (15, 12)
7 sentences above 0.7 threshold
data/en_docs/id_13494.sent
data/th_docs/id_13494.sent
(18, 512) (63, 512) (18, 63)
1 sentences above 0.7 threshold
data/en_docs/id_13495.sent
data/th_docs/id_13495.sent
skipping...
data/en_docs/id_13496.sent
data/th_docs/id_13496.sent
(24, 512) (3, 512) (24, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13497.sent
data/th_docs/id_13497.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13498.sent
data/th_docs/id_13498.sent
(117, 512) (66, 512) (117, 66)
71 sentences above 0.7 threshold
data/en_docs/id_13499.sent
data/th_docs/id_13499.sent
(9, 512) (3, 512) (9, 3)
1 sentences above 0.7 threshold
data/en_docs/id_135.sent
data/th_docs/id_135.sent
(249, 512) (99, 512) (249, 99)
70 sentences above 0.7 threshold
data/en_docs/id_1350.sent
data/th_docs/id_1350.sent
skipping...
data/en_docs/id_13500.sent
data/th_docs/i

(129, 512) (30, 512) (129, 30)
7 sentences above 0.7 threshold
data/en_docs/id_13571.sent
data/th_docs/id_13571.sent
(129, 512) (30, 512) (129, 30)
7 sentences above 0.7 threshold
data/en_docs/id_13572.sent
data/th_docs/id_13572.sent
(72, 512) (57, 512) (72, 57)
54 sentences above 0.7 threshold
data/en_docs/id_13573.sent
data/th_docs/id_13573.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13574.sent
data/th_docs/id_13574.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13575.sent
data/th_docs/id_13575.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13576.sent
data/th_docs/id_13576.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13577.sent
data/th_docs/id_13577.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13578.sent
data/th_docs/id_13578.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13579.sent
data/th_doc

6 sentences above 0.7 threshold
data/en_docs/id_13641.sent
data/th_docs/id_13641.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13642.sent
data/th_docs/id_13642.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13643.sent
data/th_docs/id_13643.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13644.sent
data/th_docs/id_13644.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13645.sent
data/th_docs/id_13645.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13646.sent
data/th_docs/id_13646.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_13647.sent
data/th_docs/id_13647.sent
(15, 512) (9, 512) (15, 9)
10 sentences above 0.7 threshold
data/en_docs/id_13648.sent
data/th_docs/id_13648.sent
(3, 512) (6, 512) (3, 6)
1 sentences above 0.7 threshold
data/en_docs/id_13649.sent
data/th_docs/id_13649.sent
(9, 512) (9, 512) (9, 9

2 sentences above 0.7 threshold
data/en_docs/id_13709.sent
data/th_docs/id_13709.sent
(42, 512) (27, 512) (42, 27)
1 sentences above 0.7 threshold
data/en_docs/id_1371.sent
data/th_docs/id_1371.sent
(1944, 512) (3, 512) (1944, 3)
8 sentences above 0.7 threshold
data/en_docs/id_13710.sent
data/th_docs/id_13710.sent
(18, 512) (258, 512) (18, 258)
1 sentences above 0.7 threshold
data/en_docs/id_13711.sent
data/th_docs/id_13711.sent
(102, 512) (276, 512) (102, 276)
2 sentences above 0.7 threshold
data/en_docs/id_13712.sent
data/th_docs/id_13712.sent
(90, 512) (3, 512) (90, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13713.sent
data/th_docs/id_13713.sent
(3, 512) (15, 512) (3, 15)
1 sentences above 0.7 threshold
data/en_docs/id_13714.sent
data/th_docs/id_13714.sent
(54, 512) (33, 512) (54, 33)
2 sentences above 0.7 threshold
data/en_docs/id_13715.sent
data/th_docs/id_13715.sent
skipping...
data/en_docs/id_13716.sent
data/th_docs/id_13716.sent
(84, 512) (12, 512) (84, 12)
2 sentences 

(12, 512) (30, 512) (12, 30)
1 sentences above 0.7 threshold
data/en_docs/id_13775.sent
data/th_docs/id_13775.sent
(12, 512) (84, 512) (12, 84)
1 sentences above 0.7 threshold
data/en_docs/id_13776.sent
data/th_docs/id_13776.sent
(3, 512) (24, 512) (3, 24)
1 sentences above 0.7 threshold
data/en_docs/id_13777.sent
data/th_docs/id_13777.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13778.sent
data/th_docs/id_13778.sent
(6, 512) (3, 512) (6, 3)
1 sentences above 0.7 threshold
data/en_docs/id_13779.sent
data/th_docs/id_13779.sent
(36, 512) (81, 512) (36, 81)
4 sentences above 0.7 threshold
data/en_docs/id_1378.sent
data/th_docs/id_1378.sent
(759, 512) (3, 512) (759, 3)
2 sentences above 0.7 threshold
data/en_docs/id_13780.sent
data/th_docs/id_13780.sent
(21, 512) (24, 512) (21, 24)
1 sentences above 0.7 threshold
data/en_docs/id_13781.sent
data/th_docs/id_13781.sent
(3, 512) (138, 512) (3, 138)
1 sentences above 0.7 threshold
data/en_docs/id_13782.sent
data

(555, 512) (9, 512) (555, 9)
9 sentences above 0.7 threshold
data/en_docs/id_13843.sent
data/th_docs/id_13843.sent
(12, 512) (15, 512) (12, 15)
9 sentences above 0.7 threshold
data/en_docs/id_13844.sent
data/th_docs/id_13844.sent
(12, 512) (24, 512) (12, 24)
10 sentences above 0.7 threshold
data/en_docs/id_13845.sent
data/th_docs/id_13845.sent
(42, 512) (12, 512) (42, 12)
17 sentences above 0.7 threshold
data/en_docs/id_13846.sent
data/th_docs/id_13846.sent
(12, 512) (12, 512) (12, 12)
9 sentences above 0.7 threshold
data/en_docs/id_13847.sent
data/th_docs/id_13847.sent
(9, 512) (18, 512) (9, 18)
6 sentences above 0.7 threshold
data/en_docs/id_13848.sent
data/th_docs/id_13848.sent
(45, 512) (6, 512) (45, 6)
1 sentences above 0.7 threshold
data/en_docs/id_13849.sent
data/th_docs/id_13849.sent
(15, 512) (33, 512) (15, 33)
1 sentences above 0.7 threshold
data/en_docs/id_1385.sent
data/th_docs/id_1385.sent
(33, 512) (18, 512) (33, 18)
2 sentences above 0.7 threshold
data/en_docs/id_13850.s

6 sentences above 0.7 threshold
data/en_docs/id_1444.sent
data/th_docs/id_1444.sent
(87, 512) (48, 512) (87, 48)
22 sentences above 0.7 threshold
data/en_docs/id_1445.sent
data/th_docs/id_1445.sent
(603, 512) (51, 512) (603, 51)
2 sentences above 0.7 threshold
data/en_docs/id_1446.sent
data/th_docs/id_1446.sent
(1098, 512) (237, 512) (1098, 237)
95 sentences above 0.7 threshold
data/en_docs/id_1447.sent
data/th_docs/id_1447.sent
(111, 512) (12, 512) (111, 12)
4 sentences above 0.7 threshold
data/en_docs/id_1448.sent
data/th_docs/id_1448.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1449.sent
data/th_docs/id_1449.sent
(1440, 512) (57, 512) (1440, 57)
111 sentences above 0.7 threshold
data/en_docs/id_145.sent
data/th_docs/id_145.sent
(78, 512) (9, 512) (78, 9)
7 sentences above 0.7 threshold
data/en_docs/id_1450.sent
data/th_docs/id_1450.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1451.sent
data/th_docs/id_1451.sent
(9, 51

3 sentences above 0.7 threshold
data/en_docs/id_1515.sent
data/th_docs/id_1515.sent
(51, 512) (6, 512) (51, 6)
4 sentences above 0.7 threshold
data/en_docs/id_1516.sent
data/th_docs/id_1516.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_1517.sent
data/th_docs/id_1517.sent
(15, 512) (6, 512) (15, 6)
5 sentences above 0.7 threshold
data/en_docs/id_1518.sent
data/th_docs/id_1518.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_1519.sent
data/th_docs/id_1519.sent
skipping...
data/en_docs/id_152.sent
data/th_docs/id_152.sent
(36, 512) (12, 512) (36, 12)
1 sentences above 0.7 threshold
data/en_docs/id_1520.sent
data/th_docs/id_1520.sent
(54, 512) (6, 512) (54, 6)
5 sentences above 0.7 threshold
data/en_docs/id_1521.sent
data/th_docs/id_1521.sent
(6, 512) (6, 512) (6, 6)
4 sentences above 0.7 threshold
data/en_docs/id_1522.sent
data/th_docs/id_1522.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_1523.sen

(48, 512) (6, 512) (48, 6)
7 sentences above 0.7 threshold
data/en_docs/id_1587.sent
data/th_docs/id_1587.sent
(12, 512) (9, 512) (12, 9)
10 sentences above 0.7 threshold
data/en_docs/id_1588.sent
data/th_docs/id_1588.sent
(414, 512) (6, 512) (414, 6)
6 sentences above 0.7 threshold
data/en_docs/id_1589.sent
data/th_docs/id_1589.sent
(18, 512) (6, 512) (18, 6)
4 sentences above 0.7 threshold
data/en_docs/id_159.sent
data/th_docs/id_159.sent
(18, 512) (9, 512) (18, 9)
12 sentences above 0.7 threshold
data/en_docs/id_1590.sent
data/th_docs/id_1590.sent
(75, 512) (36, 512) (75, 36)
31 sentences above 0.7 threshold
data/en_docs/id_1591.sent
data/th_docs/id_1591.sent
(492, 512) (102, 512) (492, 102)
20 sentences above 0.7 threshold
data/en_docs/id_1592.sent
data/th_docs/id_1592.sent
(360, 512) (57, 512) (360, 57)
37 sentences above 0.7 threshold
data/en_docs/id_1593.sent
data/th_docs/id_1593.sent
(258, 512) (51, 512) (258, 51)
40 sentences above 0.7 threshold
data/en_docs/id_1594.sent
data/

54 sentences above 0.7 threshold
data/en_docs/id_1654.sent
data/th_docs/id_1654.sent
(546, 512) (99, 512) (546, 99)
5 sentences above 0.7 threshold
data/en_docs/id_1655.sent
data/th_docs/id_1655.sent
(702, 512) (3, 512) (702, 3)
8 sentences above 0.7 threshold
data/en_docs/id_1656.sent
data/th_docs/id_1656.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1657.sent
data/th_docs/id_1657.sent
(21, 512) (3, 512) (21, 3)
5 sentences above 0.7 threshold
data/en_docs/id_1658.sent
data/th_docs/id_1658.sent
(177, 512) (15, 512) (177, 15)
16 sentences above 0.7 threshold
data/en_docs/id_1659.sent
data/th_docs/id_1659.sent
(528, 512) (6, 512) (528, 6)
1 sentences above 0.7 threshold
data/en_docs/id_166.sent
data/th_docs/id_166.sent
(3, 512) (114, 512) (3, 114)
1 sentences above 0.7 threshold
data/en_docs/id_1660.sent
data/th_docs/id_1660.sent
(48, 512) (9, 512) (48, 9)
5 sentences above 0.7 threshold
data/en_docs/id_1661.sent
data/th_docs/id_1661.sent
(228, 512) (183,

1 sentences above 0.7 threshold
data/en_docs/id_1720.sent
data/th_docs/id_1720.sent
(345, 512) (33, 512) (345, 33)
32 sentences above 0.7 threshold
data/en_docs/id_1721.sent
data/th_docs/id_1721.sent
(84, 512) (24, 512) (84, 24)
15 sentences above 0.7 threshold
data/en_docs/id_1722.sent
data/th_docs/id_1722.sent
(495, 512) (57, 512) (495, 57)
49 sentences above 0.7 threshold
data/en_docs/id_1723.sent
data/th_docs/id_1723.sent
(579, 512) (6, 512) (579, 6)
7 sentences above 0.7 threshold
data/en_docs/id_1724.sent
data/th_docs/id_1724.sent
(57, 512) (3, 512) (57, 3)
14 sentences above 0.7 threshold
data/en_docs/id_1725.sent
data/th_docs/id_1725.sent
(177, 512) (15, 512) (177, 15)
17 sentences above 0.7 threshold
data/en_docs/id_1726.sent
data/th_docs/id_1726.sent
(1002, 512) (54, 512) (1002, 54)
5 sentences above 0.7 threshold
data/en_docs/id_1727.sent
data/th_docs/id_1727.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1728.sent
data/th_docs/id_1728.sent
(66

36 sentences above 0.7 threshold
data/en_docs/id_1787.sent
data/th_docs/id_1787.sent
(189, 512) (9, 512) (189, 9)
12 sentences above 0.7 threshold
data/en_docs/id_1788.sent
data/th_docs/id_1788.sent
(735, 512) (63, 512) (735, 63)
24 sentences above 0.7 threshold
data/en_docs/id_1789.sent
data/th_docs/id_1789.sent
(228, 512) (3, 512) (228, 3)
3 sentences above 0.7 threshold
data/en_docs/id_179.sent
data/th_docs/id_179.sent
(12, 512) (3, 512) (12, 3)
10 sentences above 0.7 threshold
data/en_docs/id_1790.sent
data/th_docs/id_1790.sent
(159, 512) (57, 512) (159, 57)
8 sentences above 0.7 threshold
data/en_docs/id_1791.sent
data/th_docs/id_1791.sent
(111, 512) (18, 512) (111, 18)
1 sentences above 0.7 threshold
data/en_docs/id_1792.sent
data/th_docs/id_1792.sent
(636, 512) (42, 512) (636, 42)
11 sentences above 0.7 threshold
data/en_docs/id_1793.sent
data/th_docs/id_1793.sent
(108, 512) (3, 512) (108, 3)
4 sentences above 0.7 threshold
data/en_docs/id_1794.sent
data/th_docs/id_1794.sent
(60

(675, 512) (3, 512) (675, 3)
8 sentences above 0.7 threshold
data/en_docs/id_1853.sent
data/th_docs/id_1853.sent
(210, 512) (9, 512) (210, 9)
9 sentences above 0.7 threshold
data/en_docs/id_1854.sent
data/th_docs/id_1854.sent
(516, 512) (24, 512) (516, 24)
9 sentences above 0.7 threshold
data/en_docs/id_1855.sent
data/th_docs/id_1855.sent
(42, 512) (9, 512) (42, 9)
3 sentences above 0.7 threshold
data/en_docs/id_1856.sent
data/th_docs/id_1856.sent
(399, 512) (18, 512) (399, 18)
3 sentences above 0.7 threshold
data/en_docs/id_1857.sent
data/th_docs/id_1857.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1858.sent
data/th_docs/id_1858.sent
(51, 512) (48, 512) (51, 48)
1 sentences above 0.7 threshold
data/en_docs/id_1859.sent
data/th_docs/id_1859.sent
(141, 512) (15, 512) (141, 15)
5 sentences above 0.7 threshold
data/en_docs/id_186.sent
data/th_docs/id_186.sent
(45, 512) (3, 512) (45, 3)
2 sentences above 0.7 threshold
data/en_docs/id_1860.sent
data/th_docs/

2 sentences above 0.7 threshold
data/en_docs/id_1921.sent
data/th_docs/id_1921.sent
(12, 512) (12, 512) (12, 12)
1 sentences above 0.7 threshold
data/en_docs/id_1922.sent
data/th_docs/id_1922.sent
(12, 512) (9, 512) (12, 9)
2 sentences above 0.7 threshold
data/en_docs/id_1923.sent
data/th_docs/id_1923.sent
(21, 512) (9, 512) (21, 9)
3 sentences above 0.7 threshold
data/en_docs/id_1924.sent
data/th_docs/id_1924.sent
(21, 512) (9, 512) (21, 9)
3 sentences above 0.7 threshold
data/en_docs/id_1925.sent
data/th_docs/id_1925.sent
(3, 512) (9, 512) (3, 9)
1 sentences above 0.7 threshold
data/en_docs/id_1926.sent
data/th_docs/id_1926.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1927.sent
data/th_docs/id_1927.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1928.sent
data/th_docs/id_1928.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_1929.sent
data/th_docs/id_1929.sent
(3, 512) (3, 512) (3, 3)
3 sentenc

(18, 512) (15, 512) (18, 15)
2 sentences above 0.7 threshold
data/en_docs/id_1991.sent
data/th_docs/id_1991.sent
(18, 512) (15, 512) (18, 15)
2 sentences above 0.7 threshold
data/en_docs/id_1992.sent
data/th_docs/id_1992.sent
(3, 512) (15, 512) (3, 15)
3 sentences above 0.7 threshold
data/en_docs/id_1993.sent
data/th_docs/id_1993.sent
(57, 512) (3, 512) (57, 3)
1 sentences above 0.7 threshold
data/en_docs/id_1994.sent
data/th_docs/id_1994.sent
(69, 512) (24, 512) (69, 24)
26 sentences above 0.7 threshold
data/en_docs/id_1995.sent
data/th_docs/id_1995.sent
(96, 512) (24, 512) (96, 24)
14 sentences above 0.7 threshold
data/en_docs/id_1996.sent
data/th_docs/id_1996.sent
(6, 512) (24, 512) (6, 24)
1 sentences above 0.7 threshold
data/en_docs/id_1997.sent
data/th_docs/id_1997.sent
(1104, 512) (15, 512) (1104, 15)
17 sentences above 0.7 threshold
data/en_docs/id_1998.sent
data/th_docs/id_1998.sent
(114, 512) (81, 512) (114, 81)
17 sentences above 0.7 threshold
data/en_docs/id_1999.sent
data/

5 sentences above 0.7 threshold
data/en_docs/id_2059.sent
data/th_docs/id_2059.sent
(726, 512) (3, 512) (726, 3)
7 sentences above 0.7 threshold
data/en_docs/id_206.sent
data/th_docs/id_206.sent
(9, 512) (123, 512) (9, 123)
3 sentences above 0.7 threshold
data/en_docs/id_2060.sent
data/th_docs/id_2060.sent
(726, 512) (3, 512) (726, 3)
6 sentences above 0.7 threshold
data/en_docs/id_2061.sent
data/th_docs/id_2061.sent
(3, 512) (6, 512) (3, 6)
2 sentences above 0.7 threshold
data/en_docs/id_2062.sent
data/th_docs/id_2062.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_2063.sent
data/th_docs/id_2063.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2064.sent
data/th_docs/id_2064.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_2065.sent
data/th_docs/id_2065.sent
(6, 512) (96, 512) (6, 96)
1 sentences above 0.7 threshold
data/en_docs/id_2066.sent
data/th_docs/id_2066.sent
(3, 512) (3, 512) (3, 3)
1 sente

31 sentences above 0.7 threshold
data/en_docs/id_2125.sent
data/th_docs/id_2125.sent
(1251, 512) (381, 512) (1251, 381)
141 sentences above 0.7 threshold
data/en_docs/id_2126.sent
data/th_docs/id_2126.sent
(1449, 512) (3, 512) (1449, 3)
5 sentences above 0.7 threshold
data/en_docs/id_2127.sent
data/th_docs/id_2127.sent
(309, 512) (78, 512) (309, 78)
15 sentences above 0.7 threshold
data/en_docs/id_2128.sent
data/th_docs/id_2128.sent
(81, 512) (45, 512) (81, 45)
23 sentences above 0.7 threshold
data/en_docs/id_2129.sent
data/th_docs/id_2129.sent
(309, 512) (15, 512) (309, 15)
36 sentences above 0.7 threshold
data/en_docs/id_213.sent
data/th_docs/id_213.sent
(126, 512) (15, 512) (126, 15)
3 sentences above 0.7 threshold
data/en_docs/id_2130.sent
data/th_docs/id_2130.sent
(9, 512) (6, 512) (9, 6)
2 sentences above 0.7 threshold
data/en_docs/id_2131.sent
data/th_docs/id_2131.sent
(888, 512) (126, 512) (888, 126)
26 sentences above 0.7 threshold
data/en_docs/id_2132.sent
data/th_docs/id_213

(129, 512) (3, 512) (129, 3)
1 sentences above 0.7 threshold
data/en_docs/id_2192.sent
data/th_docs/id_2192.sent
(375, 512) (60, 512) (375, 60)
28 sentences above 0.7 threshold
data/en_docs/id_2193.sent
data/th_docs/id_2193.sent
(483, 512) (12, 512) (483, 12)
11 sentences above 0.7 threshold
data/en_docs/id_2194.sent
data/th_docs/id_2194.sent
(519, 512) (33, 512) (519, 33)
1 sentences above 0.7 threshold
data/en_docs/id_2195.sent
data/th_docs/id_2195.sent
(810, 512) (15, 512) (810, 15)
36 sentences above 0.7 threshold
data/en_docs/id_2196.sent
data/th_docs/id_2196.sent
(150, 512) (30, 512) (150, 30)
27 sentences above 0.7 threshold
data/en_docs/id_2197.sent
data/th_docs/id_2197.sent
(354, 512) (105, 512) (354, 105)
74 sentences above 0.7 threshold
data/en_docs/id_2198.sent
data/th_docs/id_2198.sent
(54, 512) (6, 512) (54, 6)
5 sentences above 0.7 threshold
data/en_docs/id_2199.sent
data/th_docs/id_2199.sent
(18, 512) (15, 512) (18, 15)
7 sentences above 0.7 threshold
data/en_docs/id_22

1 sentences above 0.7 threshold
data/en_docs/id_2257.sent
data/th_docs/id_2257.sent
(24, 512) (18, 512) (24, 18)
2 sentences above 0.7 threshold
data/en_docs/id_2258.sent
data/th_docs/id_2258.sent
(432, 512) (213, 512) (432, 213)
71 sentences above 0.7 threshold
data/en_docs/id_2259.sent
data/th_docs/id_2259.sent
(183, 512) (99, 512) (183, 99)
9 sentences above 0.7 threshold
data/en_docs/id_226.sent
data/th_docs/id_226.sent
(36, 512) (15, 512) (36, 15)
5 sentences above 0.7 threshold
data/en_docs/id_2260.sent
data/th_docs/id_2260.sent
(24, 512) (258, 512) (24, 258)
2 sentences above 0.7 threshold
data/en_docs/id_2261.sent
data/th_docs/id_2261.sent
(90, 512) (3, 512) (90, 3)
7 sentences above 0.7 threshold
data/en_docs/id_2262.sent
data/th_docs/id_2262.sent
(24, 512) (60, 512) (24, 60)
2 sentences above 0.7 threshold
data/en_docs/id_2263.sent
data/th_docs/id_2263.sent
(2196, 512) (15, 512) (2196, 15)
7 sentences above 0.7 threshold
data/en_docs/id_2264.sent
data/th_docs/id_2264.sent
(6,

49 sentences above 0.7 threshold
data/en_docs/id_2321.sent
data/th_docs/id_2321.sent
(240, 512) (57, 512) (240, 57)
12 sentences above 0.7 threshold
data/en_docs/id_2322.sent
data/th_docs/id_2322.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2323.sent
data/th_docs/id_2323.sent
(339, 512) (24, 512) (339, 24)
3 sentences above 0.7 threshold
data/en_docs/id_2324.sent
data/th_docs/id_2324.sent
(57, 512) (51, 512) (57, 51)
3 sentences above 0.7 threshold
data/en_docs/id_2325.sent
data/th_docs/id_2325.sent
(390, 512) (3, 512) (390, 3)
7 sentences above 0.7 threshold
data/en_docs/id_2326.sent
data/th_docs/id_2326.sent
(99, 512) (9, 512) (99, 9)
3 sentences above 0.7 threshold
data/en_docs/id_2327.sent
data/th_docs/id_2327.sent
(3, 512) (9, 512) (3, 9)
1 sentences above 0.7 threshold
data/en_docs/id_2328.sent
data/th_docs/id_2328.sent
(504, 512) (186, 512) (504, 186)
1 sentences above 0.7 threshold
data/en_docs/id_2329.sent
data/th_docs/id_2329.sent
(699, 512) (

49 sentences above 0.7 threshold
data/en_docs/id_2391.sent
data/th_docs/id_2391.sent
(36, 512) (6, 512) (36, 6)
9 sentences above 0.7 threshold
data/en_docs/id_2392.sent
data/th_docs/id_2392.sent
(15, 512) (33, 512) (15, 33)
1 sentences above 0.7 threshold
data/en_docs/id_2393.sent
data/th_docs/id_2393.sent
(3, 512) (12, 512) (3, 12)
1 sentences above 0.7 threshold
data/en_docs/id_2394.sent
data/th_docs/id_2394.sent
(291, 512) (126, 512) (291, 126)
2 sentences above 0.7 threshold
data/en_docs/id_2395.sent
data/th_docs/id_2395.sent
(156, 512) (33, 512) (156, 33)
6 sentences above 0.7 threshold
data/en_docs/id_2396.sent
data/th_docs/id_2396.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_2397.sent
data/th_docs/id_2397.sent
(99, 512) (30, 512) (99, 30)
5 sentences above 0.7 threshold
data/en_docs/id_2398.sent
data/th_docs/id_2398.sent
(369, 512) (24, 512) (369, 24)
22 sentences above 0.7 threshold
data/en_docs/id_2399.sent
data/th_docs/id_2399.sent
(105, 512)

(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2462.sent
data/th_docs/id_2462.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2463.sent
data/th_docs/id_2463.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2464.sent
data/th_docs/id_2464.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_2465.sent
data/th_docs/id_2465.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_2466.sent
data/th_docs/id_2466.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2467.sent
data/th_docs/id_2467.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2468.sent
data/th_docs/id_2468.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2469.sent
data/th_docs/id_2469.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_247.sent
data/th_docs/id_247.sent
(27, 512) (12, 512) (

13 sentences above 0.7 threshold
data/en_docs/id_2533.sent
data/th_docs/id_2533.sent
(1137, 512) (117, 512) (1137, 117)
34 sentences above 0.7 threshold
data/en_docs/id_2534.sent
data/th_docs/id_2534.sent
(24, 512) (9, 512) (24, 9)
1 sentences above 0.7 threshold
data/en_docs/id_2535.sent
data/th_docs/id_2535.sent
(12, 512) (9, 512) (12, 9)
3 sentences above 0.7 threshold
data/en_docs/id_2536.sent
data/th_docs/id_2536.sent
(3, 512) (15, 512) (3, 15)
1 sentences above 0.7 threshold
data/en_docs/id_2537.sent
data/th_docs/id_2537.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2538.sent
data/th_docs/id_2538.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2539.sent
data/th_docs/id_2539.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_254.sent
data/th_docs/id_254.sent
(45, 512) (15, 512) (45, 15)
1 sentences above 0.7 threshold
data/en_docs/id_2540.sent
data/th_docs/id_2540.sent
(48, 512) (3, 512) (48, 

(774, 512) (12, 512) (774, 12)
49 sentences above 0.7 threshold
data/en_docs/id_2605.sent
data/th_docs/id_2605.sent
(774, 512) (63, 512) (774, 63)
38 sentences above 0.7 threshold
data/en_docs/id_2606.sent
data/th_docs/id_2606.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2607.sent
data/th_docs/id_2607.sent
(774, 512) (42, 512) (774, 42)
5 sentences above 0.7 threshold
data/en_docs/id_2608.sent
data/th_docs/id_2608.sent
(3, 512) (24, 512) (3, 24)
3 sentences above 0.7 threshold
data/en_docs/id_2609.sent
data/th_docs/id_2609.sent
(288, 512) (3, 512) (288, 3)
3 sentences above 0.7 threshold
data/en_docs/id_261.sent
data/th_docs/id_261.sent
(12, 512) (3, 512) (12, 3)
1 sentences above 0.7 threshold
data/en_docs/id_2610.sent
data/th_docs/id_2610.sent
(57, 512) (3, 512) (57, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2611.sent
data/th_docs/id_2611.sent
(207, 512) (3, 512) (207, 3)
4 sentences above 0.7 threshold
data/en_docs/id_2612.sent
data/th_docs/

(156, 512) (3, 512) (156, 3)
6 sentences above 0.7 threshold
data/en_docs/id_2672.sent
data/th_docs/id_2672.sent
(30, 512) (9, 512) (30, 9)
1 sentences above 0.7 threshold
data/en_docs/id_2673.sent
data/th_docs/id_2673.sent
(30, 512) (3, 512) (30, 3)
1 sentences above 0.7 threshold
data/en_docs/id_2674.sent
data/th_docs/id_2674.sent
(30, 512) (3, 512) (30, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2675.sent
data/th_docs/id_2675.sent
(39, 512) (6, 512) (39, 6)
1 sentences above 0.7 threshold
data/en_docs/id_2676.sent
data/th_docs/id_2676.sent
(30, 512) (9, 512) (30, 9)
1 sentences above 0.7 threshold
data/en_docs/id_2677.sent
data/th_docs/id_2677.sent
(228, 512) (117, 512) (228, 117)
2 sentences above 0.7 threshold
data/en_docs/id_2678.sent
data/th_docs/id_2678.sent
(39, 512) (6, 512) (39, 6)
1 sentences above 0.7 threshold
data/en_docs/id_2679.sent
data/th_docs/id_2679.sent
(15, 512) (6, 512) (15, 6)
2 sentences above 0.7 threshold
data/en_docs/id_268.sent
data/th_docs/id_268.

12 sentences above 0.7 threshold
data/en_docs/id_274.sent
data/th_docs/id_274.sent
(69, 512) (78, 512) (69, 78)
24 sentences above 0.7 threshold
data/en_docs/id_2740.sent
data/th_docs/id_2740.sent
(18, 512) (69, 512) (18, 69)
7 sentences above 0.7 threshold
data/en_docs/id_2741.sent
data/th_docs/id_2741.sent
(63, 512) (39, 512) (63, 39)
1 sentences above 0.7 threshold
data/en_docs/id_2742.sent
data/th_docs/id_2742.sent
(162, 512) (39, 512) (162, 39)
4 sentences above 0.7 threshold
data/en_docs/id_2743.sent
data/th_docs/id_2743.sent
(87, 512) (18, 512) (87, 18)
16 sentences above 0.7 threshold
data/en_docs/id_2744.sent
data/th_docs/id_2744.sent
(6, 512) (6, 512) (6, 6)
1 sentences above 0.7 threshold
data/en_docs/id_2745.sent
data/th_docs/id_2745.sent
(279, 512) (18, 512) (279, 18)
2 sentences above 0.7 threshold
data/en_docs/id_2746.sent
data/th_docs/id_2746.sent
(60, 512) (3, 512) (60, 3)
1 sentences above 0.7 threshold
data/en_docs/id_2747.sent
data/th_docs/id_2747.sent
(9, 512) (3, 

(9, 512) (12, 512) (9, 12)
2 sentences above 0.7 threshold
data/en_docs/id_281.sent
data/th_docs/id_281.sent
(285, 512) (54, 512) (285, 54)
14 sentences above 0.7 threshold
data/en_docs/id_2810.sent
data/th_docs/id_2810.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2811.sent
data/th_docs/id_2811.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2812.sent
data/th_docs/id_2812.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2813.sent
data/th_docs/id_2813.sent
(3, 512) (18, 512) (3, 18)
3 sentences above 0.7 threshold
data/en_docs/id_2814.sent
data/th_docs/id_2814.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2815.sent
data/th_docs/id_2815.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2816.sent
data/th_docs/id_2816.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2817.sent
data/th_docs/id_2817.sent
(3, 512) (

(21, 512) (27, 512) (21, 27)
6 sentences above 0.7 threshold
data/en_docs/id_2878.sent
data/th_docs/id_2878.sent
(213, 512) (546, 512) (213, 546)
28 sentences above 0.7 threshold
data/en_docs/id_2879.sent
data/th_docs/id_2879.sent
(12, 512) (6, 512) (12, 6)
5 sentences above 0.7 threshold
data/en_docs/id_288.sent
data/th_docs/id_288.sent
(54, 512) (9, 512) (54, 9)
6 sentences above 0.7 threshold
data/en_docs/id_2880.sent
data/th_docs/id_2880.sent
(72, 512) (6, 512) (72, 6)
5 sentences above 0.7 threshold
data/en_docs/id_2881.sent
data/th_docs/id_2881.sent
(3, 512) (6, 512) (3, 6)
3 sentences above 0.7 threshold
data/en_docs/id_2882.sent
data/th_docs/id_2882.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2883.sent
data/th_docs/id_2883.sent
(90, 512) (3, 512) (90, 3)
5 sentences above 0.7 threshold
data/en_docs/id_2884.sent
data/th_docs/id_2884.sent
(72, 512) (3, 512) (72, 3)
5 sentences above 0.7 threshold
data/en_docs/id_2885.sent
data/th_docs/id_2885.sen

7 sentences above 0.7 threshold
data/en_docs/id_2945.sent
data/th_docs/id_2945.sent
(15, 512) (18, 512) (15, 18)
10 sentences above 0.7 threshold
data/en_docs/id_2946.sent
data/th_docs/id_2946.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_2947.sent
data/th_docs/id_2947.sent
(9, 512) (6, 512) (9, 6)
1 sentences above 0.7 threshold
data/en_docs/id_2948.sent
data/th_docs/id_2948.sent
(705, 512) (12, 512) (705, 12)
30 sentences above 0.7 threshold
data/en_docs/id_2949.sent
data/th_docs/id_2949.sent
(261, 512) (3, 512) (261, 3)
1 sentences above 0.7 threshold
data/en_docs/id_295.sent
data/th_docs/id_295.sent
(42, 512) (24, 512) (42, 24)
2 sentences above 0.7 threshold
data/en_docs/id_2950.sent
data/th_docs/id_2950.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_2951.sent
data/th_docs/id_2951.sent
(57, 512) (3, 512) (57, 3)
5 sentences above 0.7 threshold
data/en_docs/id_2952.sent
data/th_docs/id_2952.sent
(24, 512) (3, 512) (24, 

6 sentences above 0.7 threshold
data/en_docs/id_3014.sent
data/th_docs/id_3014.sent
(15, 512) (9, 512) (15, 9)
2 sentences above 0.7 threshold
data/en_docs/id_3015.sent
data/th_docs/id_3015.sent
(9, 512) (36, 512) (9, 36)
3 sentences above 0.7 threshold
data/en_docs/id_3016.sent
data/th_docs/id_3016.sent
(162, 512) (54, 512) (162, 54)
4 sentences above 0.7 threshold
data/en_docs/id_3017.sent
data/th_docs/id_3017.sent
(603, 512) (132, 512) (603, 132)
35 sentences above 0.7 threshold
data/en_docs/id_3018.sent
data/th_docs/id_3018.sent
(342, 512) (27, 512) (342, 27)
6 sentences above 0.7 threshold
data/en_docs/id_3019.sent
data/th_docs/id_3019.sent
(54, 512) (3, 512) (54, 3)
2 sentences above 0.7 threshold
data/en_docs/id_302.sent
data/th_docs/id_302.sent
(42, 512) (3, 512) (42, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3020.sent
data/th_docs/id_3020.sent
(30, 512) (90, 512) (30, 90)
2 sentences above 0.7 threshold
data/en_docs/id_3021.sent
data/th_docs/id_3021.sent
(21, 512) (45

(27, 512) (12, 512) (27, 12)
4 sentences above 0.7 threshold
data/en_docs/id_3086.sent
data/th_docs/id_3086.sent
(174, 512) (12, 512) (174, 12)
2 sentences above 0.7 threshold
data/en_docs/id_3087.sent
data/th_docs/id_3087.sent
(162, 512) (42, 512) (162, 42)
38 sentences above 0.7 threshold
data/en_docs/id_3088.sent
data/th_docs/id_3088.sent
(3, 512) (24, 512) (3, 24)
1 sentences above 0.7 threshold
data/en_docs/id_3089.sent
data/th_docs/id_3089.sent
(27, 512) (186, 512) (27, 186)
15 sentences above 0.7 threshold
data/en_docs/id_309.sent
data/th_docs/id_309.sent
skipping...
data/en_docs/id_3090.sent
data/th_docs/id_3090.sent
(3, 512) (9, 512) (3, 9)
1 sentences above 0.7 threshold
data/en_docs/id_3091.sent
data/th_docs/id_3091.sent
(3, 512) (18, 512) (3, 18)
1 sentences above 0.7 threshold
data/en_docs/id_3092.sent
data/th_docs/id_3092.sent
(84, 512) (33, 512) (84, 33)
4 sentences above 0.7 threshold
data/en_docs/id_3093.sent
data/th_docs/id_3093.sent
(24, 512) (3, 512) (24, 3)
3 sente

(45, 512) (9, 512) (45, 9)
1 sentences above 0.7 threshold
data/en_docs/id_3161.sent
data/th_docs/id_3161.sent
(45, 512) (30, 512) (45, 30)
2 sentences above 0.7 threshold
data/en_docs/id_3162.sent
data/th_docs/id_3162.sent
(3, 512) (18, 512) (3, 18)
1 sentences above 0.7 threshold
data/en_docs/id_3163.sent
data/th_docs/id_3163.sent
(24, 512) (87, 512) (24, 87)
1 sentences above 0.7 threshold
data/en_docs/id_3164.sent
data/th_docs/id_3164.sent
(12, 512) (6, 512) (12, 6)
1 sentences above 0.7 threshold
data/en_docs/id_3165.sent
data/th_docs/id_3165.sent
(45, 512) (27, 512) (45, 27)
2 sentences above 0.7 threshold
data/en_docs/id_3166.sent
data/th_docs/id_3166.sent
(759, 512) (3, 512) (759, 3)
16 sentences above 0.7 threshold
data/en_docs/id_3167.sent
data/th_docs/id_3167.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3168.sent
data/th_docs/id_3168.sent
(9, 512) (12, 512) (9, 12)
1 sentences above 0.7 threshold
data/en_docs/id_3169.sent
data/th_docs/id_3169

(15, 512) (3, 512) (15, 3)
1 sentences above 0.7 threshold
data/en_docs/id_3234.sent
data/th_docs/id_3234.sent
(78, 512) (3, 512) (78, 3)
1 sentences above 0.7 threshold
data/en_docs/id_3235.sent
data/th_docs/id_3235.sent
(3, 512) (42, 512) (3, 42)
1 sentences above 0.7 threshold
data/en_docs/id_3236.sent
data/th_docs/id_3236.sent
(171, 512) (6, 512) (171, 6)
1 sentences above 0.7 threshold
data/en_docs/id_3237.sent
data/th_docs/id_3237.sent
(27, 512) (6, 512) (27, 6)
1 sentences above 0.7 threshold
data/en_docs/id_3238.sent
data/th_docs/id_3238.sent
(102, 512) (90, 512) (102, 90)
5 sentences above 0.7 threshold
data/en_docs/id_3239.sent
data/th_docs/id_3239.sent
(198, 512) (60, 512) (198, 60)
5 sentences above 0.7 threshold
data/en_docs/id_324.sent
data/th_docs/id_324.sent
skipping...
data/en_docs/id_3240.sent
data/th_docs/id_3240.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3241.sent
data/th_docs/id_3241.sent
(3, 512) (3, 512) (3, 3)
3 sentences above

(114, 512) (36, 512) (114, 36)
7 sentences above 0.7 threshold
data/en_docs/id_3307.sent
data/th_docs/id_3307.sent
(81, 512) (78, 512) (81, 78)
6 sentences above 0.7 threshold
data/en_docs/id_3308.sent
data/th_docs/id_3308.sent
(3, 512) (63, 512) (3, 63)
1 sentences above 0.7 threshold
data/en_docs/id_3309.sent
data/th_docs/id_3309.sent
(24, 512) (18, 512) (24, 18)
1 sentences above 0.7 threshold
data/en_docs/id_331.sent
data/th_docs/id_331.sent
skipping...
data/en_docs/id_3310.sent
data/th_docs/id_3310.sent
(24, 512) (45, 512) (24, 45)
1 sentences above 0.7 threshold
data/en_docs/id_3311.sent
data/th_docs/id_3311.sent
(3, 512) (69, 512) (3, 69)
1 sentences above 0.7 threshold
data/en_docs/id_3312.sent
data/th_docs/id_3312.sent
(24, 512) (150, 512) (24, 150)
1 sentences above 0.7 threshold
data/en_docs/id_3313.sent
data/th_docs/id_3313.sent
(21, 512) (114, 512) (21, 114)
2 sentences above 0.7 threshold
data/en_docs/id_3314.sent
data/th_docs/id_3314.sent
(24, 512) (90, 512) (24, 90)
1 s

(54, 512) (3, 512) (54, 3)
1 sentences above 0.7 threshold
data/en_docs/id_3376.sent
data/th_docs/id_3376.sent
(81, 512) (27, 512) (81, 27)
6 sentences above 0.7 threshold
data/en_docs/id_3377.sent
data/th_docs/id_3377.sent
(51, 512) (3, 512) (51, 3)
1 sentences above 0.7 threshold
data/en_docs/id_3378.sent
data/th_docs/id_3378.sent
(33, 512) (39, 512) (33, 39)
1 sentences above 0.7 threshold
data/en_docs/id_3379.sent
data/th_docs/id_3379.sent
(156, 512) (24, 512) (156, 24)
5 sentences above 0.7 threshold
data/en_docs/id_338.sent
data/th_docs/id_338.sent
skipping...
data/en_docs/id_3380.sent
data/th_docs/id_3380.sent
(27, 512) (12, 512) (27, 12)
1 sentences above 0.7 threshold
data/en_docs/id_3381.sent
data/th_docs/id_3381.sent
(81, 512) (18, 512) (81, 18)
1 sentences above 0.7 threshold
data/en_docs/id_3382.sent
data/th_docs/id_3382.sent
(81, 512) (9, 512) (81, 9)
1 sentences above 0.7 threshold
data/en_docs/id_3383.sent
data/th_docs/id_3383.sent
(30, 512) (12, 512) (30, 12)
1 sentenc

(201, 512) (372, 512) (201, 372)
7 sentences above 0.7 threshold
data/en_docs/id_3445.sent
data/th_docs/id_3445.sent
(27, 512) (36, 512) (27, 36)
1 sentences above 0.7 threshold
data/en_docs/id_3446.sent
data/th_docs/id_3446.sent
(72, 512) (18, 512) (72, 18)
1 sentences above 0.7 threshold
data/en_docs/id_3447.sent
data/th_docs/id_3447.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3448.sent
data/th_docs/id_3448.sent
(3, 512) (42, 512) (3, 42)
1 sentences above 0.7 threshold
data/en_docs/id_3449.sent
data/th_docs/id_3449.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_345.sent
data/th_docs/id_345.sent
skipping...
data/en_docs/id_3450.sent
data/th_docs/id_3450.sent
(1797, 512) (45, 512) (1797, 45)
34 sentences above 0.7 threshold
data/en_docs/id_3451.sent
data/th_docs/id_3451.sent
(252, 512) (3, 512) (252, 3)
6 sentences above 0.7 threshold
data/en_docs/id_3452.sent
data/th_docs/id_3452.sent
(3, 512) (3, 512) (3, 3)
3 sentence

10 sentences above 0.7 threshold
data/en_docs/id_3516.sent
data/th_docs/id_3516.sent
(303, 512) (3, 512) (303, 3)
17 sentences above 0.7 threshold
data/en_docs/id_3517.sent
data/th_docs/id_3517.sent
(48, 512) (3, 512) (48, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3518.sent
data/th_docs/id_3518.sent
(39, 512) (3, 512) (39, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3519.sent
data/th_docs/id_3519.sent
(30, 512) (3, 512) (30, 3)
4 sentences above 0.7 threshold
data/en_docs/id_352.sent
data/th_docs/id_352.sent
skipping...
data/en_docs/id_3520.sent
data/th_docs/id_3520.sent
(72, 512) (3, 512) (72, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3521.sent
data/th_docs/id_3521.sent
(618, 512) (39, 512) (618, 39)
86 sentences above 0.7 threshold
data/en_docs/id_3522.sent
data/th_docs/id_3522.sent
(447, 512) (18, 512) (447, 18)
16 sentences above 0.7 threshold
data/en_docs/id_3523.sent
data/th_docs/id_3523.sent
(48, 512) (12, 512) (48, 12)
1 sentences above 0.7 threshold
da

1 sentences above 0.7 threshold
data/en_docs/id_3589.sent
data/th_docs/id_3589.sent
(783, 512) (6, 512) (783, 6)
11 sentences above 0.7 threshold
data/en_docs/id_359.sent
data/th_docs/id_359.sent
skipping...
data/en_docs/id_3590.sent
data/th_docs/id_3590.sent
(27, 512) (3, 512) (27, 3)
1 sentences above 0.7 threshold
data/en_docs/id_3591.sent
data/th_docs/id_3591.sent
(57, 512) (3, 512) (57, 3)
6 sentences above 0.7 threshold
data/en_docs/id_3592.sent
data/th_docs/id_3592.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3593.sent
data/th_docs/id_3593.sent
(48, 512) (3, 512) (48, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3594.sent
data/th_docs/id_3594.sent
(39, 512) (6, 512) (39, 6)
3 sentences above 0.7 threshold
data/en_docs/id_3595.sent
data/th_docs/id_3595.sent
(78, 512) (3, 512) (78, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3596.sent
data/th_docs/id_3596.sent
(12, 512) (3, 512) (12, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3

(72, 512) (9, 512) (72, 9)
5 sentences above 0.7 threshold
data/en_docs/id_3659.sent
data/th_docs/id_3659.sent
(21, 512) (9, 512) (21, 9)
4 sentences above 0.7 threshold
data/en_docs/id_366.sent
data/th_docs/id_366.sent
(18, 512) (9, 512) (18, 9)
4 sentences above 0.7 threshold
data/en_docs/id_3660.sent
data/th_docs/id_3660.sent
(3, 512) (9, 512) (3, 9)
3 sentences above 0.7 threshold
data/en_docs/id_3661.sent
data/th_docs/id_3661.sent
(99, 512) (9, 512) (99, 9)
8 sentences above 0.7 threshold
data/en_docs/id_3662.sent
data/th_docs/id_3662.sent
(12, 512) (3, 512) (12, 3)
2 sentences above 0.7 threshold
data/en_docs/id_3663.sent
data/th_docs/id_3663.sent
(54, 512) (3, 512) (54, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3664.sent
data/th_docs/id_3664.sent
(126, 512) (3, 512) (126, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3665.sent
data/th_docs/id_3665.sent
(42, 512) (3, 512) (42, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3666.sent
data/th_docs/id_3666.sent
(54

(2463, 512) (1332, 512) (2463, 1332)
521 sentences above 0.7 threshold
data/en_docs/id_3726.sent
data/th_docs/id_3726.sent
(237, 512) (27, 512) (237, 27)
1 sentences above 0.7 threshold
data/en_docs/id_3727.sent
data/th_docs/id_3727.sent
(588, 512) (18, 512) (588, 18)
3 sentences above 0.7 threshold
data/en_docs/id_3728.sent
data/th_docs/id_3728.sent
(942, 512) (3, 512) (942, 3)
12 sentences above 0.7 threshold
data/en_docs/id_3729.sent
data/th_docs/id_3729.sent
(540, 512) (30, 512) (540, 30)
13 sentences above 0.7 threshold
data/en_docs/id_373.sent
data/th_docs/id_373.sent
(36, 512) (18, 512) (36, 18)
1 sentences above 0.7 threshold
data/en_docs/id_3730.sent
data/th_docs/id_3730.sent
(252, 512) (9, 512) (252, 9)
1 sentences above 0.7 threshold
data/en_docs/id_3731.sent
data/th_docs/id_3731.sent
(6, 512) (24, 512) (6, 24)
1 sentences above 0.7 threshold
data/en_docs/id_3732.sent
data/th_docs/id_3732.sent
(390, 512) (60, 512) (390, 60)
63 sentences above 0.7 threshold
data/en_docs/id_37

5 sentences above 0.7 threshold
data/en_docs/id_3792.sent
data/th_docs/id_3792.sent
(96, 512) (3, 512) (96, 3)
3 sentences above 0.7 threshold
data/en_docs/id_3793.sent
data/th_docs/id_3793.sent
(216, 512) (3, 512) (216, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3794.sent
data/th_docs/id_3794.sent
(213, 512) (3, 512) (213, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3795.sent
data/th_docs/id_3795.sent
(219, 512) (3, 512) (219, 3)
5 sentences above 0.7 threshold
data/en_docs/id_3796.sent
data/th_docs/id_3796.sent
(48, 512) (3, 512) (48, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3797.sent
data/th_docs/id_3797.sent
(213, 512) (3, 512) (213, 3)
4 sentences above 0.7 threshold
data/en_docs/id_3798.sent
data/th_docs/id_3798.sent
(15, 512) (3, 512) (15, 3)
5 sentences above 0.7 threshold
data/en_docs/id_3799.sent
data/th_docs/id_3799.sent
(189, 512) (3, 512) (189, 3)
4 sentences above 0.7 threshold
data/en_docs/id_38.sent
data/th_docs/id_38.sent
(33, 512) (3, 512) (33

(111, 512) (48, 512) (111, 48)
5 sentences above 0.7 threshold
data/en_docs/id_386.sent
data/th_docs/id_386.sent
(39, 512) (6, 512) (39, 6)
1 sentences above 0.7 threshold
data/en_docs/id_3860.sent
data/th_docs/id_3860.sent
(579, 512) (30, 512) (579, 30)
2 sentences above 0.7 threshold
data/en_docs/id_3861.sent
data/th_docs/id_3861.sent
(99, 512) (15, 512) (99, 15)
11 sentences above 0.7 threshold
data/en_docs/id_3862.sent
data/th_docs/id_3862.sent
(12, 512) (6, 512) (12, 6)
1 sentences above 0.7 threshold
data/en_docs/id_3863.sent
data/th_docs/id_3863.sent
(249, 512) (120, 512) (249, 120)
40 sentences above 0.7 threshold
data/en_docs/id_3864.sent
data/th_docs/id_3864.sent
(84, 512) (6, 512) (84, 6)
6 sentences above 0.7 threshold
data/en_docs/id_3865.sent
data/th_docs/id_3865.sent
(6, 512) (15, 512) (6, 15)
1 sentences above 0.7 threshold
data/en_docs/id_3866.sent
data/th_docs/id_3866.sent
(6, 512) (36, 512) (6, 36)
6 sentences above 0.7 threshold
data/en_docs/id_3867.sent
data/th_doc

35 sentences above 0.7 threshold
data/en_docs/id_3926.sent
data/th_docs/id_3926.sent
(183, 512) (6, 512) (183, 6)
1 sentences above 0.7 threshold
data/en_docs/id_3927.sent
data/th_docs/id_3927.sent
(45, 512) (12, 512) (45, 12)
1 sentences above 0.7 threshold
data/en_docs/id_3928.sent
data/th_docs/id_3928.sent
(39, 512) (3, 512) (39, 3)
6 sentences above 0.7 threshold
data/en_docs/id_3929.sent
data/th_docs/id_3929.sent
(93, 512) (270, 512) (93, 270)
18 sentences above 0.7 threshold
data/en_docs/id_393.sent
data/th_docs/id_393.sent
(99, 512) (6, 512) (99, 6)
9 sentences above 0.7 threshold
data/en_docs/id_3930.sent
data/th_docs/id_3930.sent
(63, 512) (9, 512) (63, 9)
2 sentences above 0.7 threshold
data/en_docs/id_3931.sent
data/th_docs/id_3931.sent
(21, 512) (27, 512) (21, 27)
1 sentences above 0.7 threshold
data/en_docs/id_3932.sent
data/th_docs/id_3932.sent
(6, 512) (24, 512) (6, 24)
1 sentences above 0.7 threshold
data/en_docs/id_3933.sent
data/th_docs/id_3933.sent
(765, 512) (45, 51

(24, 512) (15, 512) (24, 15)
5 sentences above 0.7 threshold
data/en_docs/id_3996.sent
data/th_docs/id_3996.sent
(27, 512) (84, 512) (27, 84)
2 sentences above 0.7 threshold
data/en_docs/id_3997.sent
data/th_docs/id_3997.sent
(6, 512) (3, 512) (6, 3)
2 sentences above 0.7 threshold
data/en_docs/id_3998.sent
data/th_docs/id_3998.sent
(9, 512) (18, 512) (9, 18)
7 sentences above 0.7 threshold
data/en_docs/id_3999.sent
data/th_docs/id_3999.sent
(21, 512) (6, 512) (21, 6)
4 sentences above 0.7 threshold
data/en_docs/id_4.sent
data/th_docs/id_4.sent
(21, 512) (3, 512) (21, 3)
5 sentences above 0.7 threshold
data/en_docs/id_40.sent
data/th_docs/id_40.sent
(15, 512) (3, 512) (15, 3)
5 sentences above 0.7 threshold
data/en_docs/id_400.sent
data/th_docs/id_400.sent
(66, 512) (21, 512) (66, 21)
2 sentences above 0.7 threshold
data/en_docs/id_4000.sent
data/th_docs/id_4000.sent
(24, 512) (3, 512) (24, 3)
4 sentences above 0.7 threshold
data/en_docs/id_4001.sent
data/th_docs/id_4001.sent
(102, 512

5 sentences above 0.7 threshold
data/en_docs/id_4064.sent
data/th_docs/id_4064.sent
(60, 512) (3, 512) (60, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4065.sent
data/th_docs/id_4065.sent
(15, 512) (3, 512) (15, 3)
1 sentences above 0.7 threshold
data/en_docs/id_4066.sent
data/th_docs/id_4066.sent
(6, 512) (3, 512) (6, 3)
1 sentences above 0.7 threshold
data/en_docs/id_4067.sent
data/th_docs/id_4067.sent
(69, 512) (3, 512) (69, 3)
6 sentences above 0.7 threshold
data/en_docs/id_4068.sent
data/th_docs/id_4068.sent
(54, 512) (3, 512) (54, 3)
4 sentences above 0.7 threshold
data/en_docs/id_4069.sent
data/th_docs/id_4069.sent
(48, 512) (3, 512) (48, 3)
4 sentences above 0.7 threshold
data/en_docs/id_407.sent
data/th_docs/id_407.sent
(39, 512) (3, 512) (39, 3)
1 sentences above 0.7 threshold
data/en_docs/id_4070.sent
data/th_docs/id_4070.sent
(36, 512) (3, 512) (36, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4071.sent
data/th_docs/id_4071.sent
(51, 512) (3, 512) (51, 3)
5 sen

59 sentences above 0.7 threshold
data/en_docs/id_4130.sent
data/th_docs/id_4130.sent
(75, 512) (3, 512) (75, 3)
2 sentences above 0.7 threshold
data/en_docs/id_4131.sent
data/th_docs/id_4131.sent
(12, 512) (3, 512) (12, 3)
1 sentences above 0.7 threshold
data/en_docs/id_4132.sent
data/th_docs/id_4132.sent
(48, 512) (9, 512) (48, 9)
4 sentences above 0.7 threshold
data/en_docs/id_4133.sent
data/th_docs/id_4133.sent
(36, 512) (6, 512) (36, 6)
2 sentences above 0.7 threshold
data/en_docs/id_4134.sent
data/th_docs/id_4134.sent
(12, 512) (255, 512) (12, 255)
1 sentences above 0.7 threshold
data/en_docs/id_4135.sent
data/th_docs/id_4135.sent
(27, 512) (6, 512) (27, 6)
6 sentences above 0.7 threshold
data/en_docs/id_4136.sent
data/th_docs/id_4136.sent
(72, 512) (3, 512) (72, 3)
6 sentences above 0.7 threshold
data/en_docs/id_4137.sent
data/th_docs/id_4137.sent
(72, 512) (3, 512) (72, 3)
5 sentences above 0.7 threshold
data/en_docs/id_4138.sent
data/th_docs/id_4138.sent
(66, 512) (3, 512) (66,

(48, 512) (3, 512) (48, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4198.sent
data/th_docs/id_4198.sent
(12, 512) (168, 512) (12, 168)
1 sentences above 0.7 threshold
data/en_docs/id_4199.sent
data/th_docs/id_4199.sent
(54, 512) (144, 512) (54, 144)
3 sentences above 0.7 threshold
data/en_docs/id_42.sent
data/th_docs/id_42.sent
(9, 512) (3, 512) (9, 3)
5 sentences above 0.7 threshold
data/en_docs/id_420.sent
data/th_docs/id_420.sent
(915, 512) (60, 512) (915, 60)
23 sentences above 0.7 threshold
data/en_docs/id_4200.sent
data/th_docs/id_4200.sent
(390, 512) (27, 512) (390, 27)
3 sentences above 0.7 threshold
data/en_docs/id_4201.sent
data/th_docs/id_4201.sent
(24, 512) (9, 512) (24, 9)
7 sentences above 0.7 threshold
data/en_docs/id_4202.sent
data/th_docs/id_4202.sent
(54, 512) (3, 512) (54, 3)
5 sentences above 0.7 threshold
data/en_docs/id_4203.sent
data/th_docs/id_4203.sent
(24, 512) (3, 512) (24, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4204.sent
data/th_docs/id_42

13 sentences above 0.7 threshold
data/en_docs/id_4263.sent
data/th_docs/id_4263.sent
(72, 512) (21, 512) (72, 21)
17 sentences above 0.7 threshold
data/en_docs/id_4264.sent
data/th_docs/id_4264.sent
(12, 512) (3, 512) (12, 3)
6 sentences above 0.7 threshold
data/en_docs/id_4265.sent
data/th_docs/id_4265.sent
(393, 512) (6, 512) (393, 6)
1 sentences above 0.7 threshold
data/en_docs/id_4266.sent
data/th_docs/id_4266.sent
(1704, 512) (69, 512) (1704, 69)
112 sentences above 0.7 threshold
data/en_docs/id_4267.sent
data/th_docs/id_4267.sent
(24, 512) (12, 512) (24, 12)
2 sentences above 0.7 threshold
data/en_docs/id_4268.sent
data/th_docs/id_4268.sent
(1404, 512) (27, 512) (1404, 27)
5 sentences above 0.7 threshold
data/en_docs/id_4269.sent
data/th_docs/id_4269.sent
(33, 512) (9, 512) (33, 9)
2 sentences above 0.7 threshold
data/en_docs/id_427.sent
data/th_docs/id_427.sent
(84, 512) (15, 512) (84, 15)
8 sentences above 0.7 threshold
data/en_docs/id_4270.sent
data/th_docs/id_4270.sent
(240, 

(3, 512) (39, 512) (3, 39)
1 sentences above 0.7 threshold
data/en_docs/id_4329.sent
data/th_docs/id_4329.sent
(633, 512) (90, 512) (633, 90)
7 sentences above 0.7 threshold
data/en_docs/id_433.sent
data/th_docs/id_433.sent
(3, 512) (15, 512) (3, 15)
1 sentences above 0.7 threshold
data/en_docs/id_4330.sent
data/th_docs/id_4330.sent
(57, 512) (9, 512) (57, 9)
2 sentences above 0.7 threshold
data/en_docs/id_4331.sent
data/th_docs/id_4331.sent
(6, 512) (48, 512) (6, 48)
1 sentences above 0.7 threshold
data/en_docs/id_4332.sent
data/th_docs/id_4332.sent
(3, 512) (21, 512) (3, 21)
1 sentences above 0.7 threshold
data/en_docs/id_4333.sent
data/th_docs/id_4333.sent
(63, 512) (6, 512) (63, 6)
1 sentences above 0.7 threshold
data/en_docs/id_4334.sent
data/th_docs/id_4334.sent
(57, 512) (6, 512) (57, 6)
1 sentences above 0.7 threshold
data/en_docs/id_4335.sent
data/th_docs/id_4335.sent
(42, 512) (18, 512) (42, 18)
1 sentences above 0.7 threshold
data/en_docs/id_4336.sent
data/th_docs/id_4336.se

39 sentences above 0.7 threshold
data/en_docs/id_4398.sent
data/th_docs/id_4398.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_4399.sent
data/th_docs/id_4399.sent
(234, 512) (27, 512) (234, 27)
17 sentences above 0.7 threshold
data/en_docs/id_44.sent
data/th_docs/id_44.sent
(294, 512) (21, 512) (294, 21)
17 sentences above 0.7 threshold
data/en_docs/id_440.sent
data/th_docs/id_440.sent
(24, 512) (45, 512) (24, 45)
1 sentences above 0.7 threshold
data/en_docs/id_4400.sent
data/th_docs/id_4400.sent
(6, 512) (99, 512) (6, 99)
1 sentences above 0.7 threshold
data/en_docs/id_4401.sent
data/th_docs/id_4401.sent
(288, 512) (12, 512) (288, 12)
8 sentences above 0.7 threshold
data/en_docs/id_4402.sent
data/th_docs/id_4402.sent
(546, 512) (15, 512) (546, 15)
9 sentences above 0.7 threshold
data/en_docs/id_4403.sent
data/th_docs/id_4403.sent
(180, 512) (18, 512) (180, 18)
2 sentences above 0.7 threshold
data/en_docs/id_4404.sent
data/th_docs/id_4404.sent
(93, 512) (

(984, 512) (387, 512) (984, 387)
161 sentences above 0.7 threshold
data/en_docs/id_4464.sent
data/th_docs/id_4464.sent
(195, 512) (21, 512) (195, 21)
1 sentences above 0.7 threshold
data/en_docs/id_4465.sent
data/th_docs/id_4465.sent
(276, 512) (9, 512) (276, 9)
25 sentences above 0.7 threshold
data/en_docs/id_4466.sent
data/th_docs/id_4466.sent
(36, 512) (9, 512) (36, 9)
4 sentences above 0.7 threshold
data/en_docs/id_4467.sent
data/th_docs/id_4467.sent
(417, 512) (9, 512) (417, 9)
2 sentences above 0.7 threshold
data/en_docs/id_4468.sent
data/th_docs/id_4468.sent
(186, 512) (15, 512) (186, 15)
2 sentences above 0.7 threshold
data/en_docs/id_4469.sent
data/th_docs/id_4469.sent
(3, 512) (6, 512) (3, 6)
1 sentences above 0.7 threshold
data/en_docs/id_447.sent
data/th_docs/id_447.sent
(549, 512) (3, 512) (549, 3)
1 sentences above 0.7 threshold
data/en_docs/id_4470.sent
data/th_docs/id_4470.sent
(111, 512) (39, 512) (111, 39)
13 sentences above 0.7 threshold
data/en_docs/id_4471.sent
dat

3 sentences above 0.7 threshold
data/en_docs/id_4531.sent
data/th_docs/id_4531.sent
(30, 512) (3, 512) (30, 3)
2 sentences above 0.7 threshold
data/en_docs/id_4532.sent
data/th_docs/id_4532.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_4533.sent
data/th_docs/id_4533.sent
(66, 512) (12, 512) (66, 12)
1 sentences above 0.7 threshold
data/en_docs/id_4534.sent
data/th_docs/id_4534.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4535.sent
data/th_docs/id_4535.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_4536.sent
data/th_docs/id_4536.sent
(24, 512) (3, 512) (24, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4537.sent
data/th_docs/id_4537.sent
(216, 512) (6, 512) (216, 6)
1 sentences above 0.7 threshold
data/en_docs/id_4538.sent
data/th_docs/id_4538.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4539.sent
data/th_docs/id_4539.sent
(45, 512) (45, 512) (45, 45)
3 s

3 sentences above 0.7 threshold
data/en_docs/id_46.sent
data/th_docs/id_46.sent
(3, 512) (21, 512) (3, 21)
1 sentences above 0.7 threshold
data/en_docs/id_460.sent
data/th_docs/id_460.sent
(24, 512) (12, 512) (24, 12)
1 sentences above 0.7 threshold
data/en_docs/id_4600.sent
data/th_docs/id_4600.sent
(45, 512) (96, 512) (45, 96)
11 sentences above 0.7 threshold
data/en_docs/id_4601.sent
data/th_docs/id_4601.sent
(36, 512) (24, 512) (36, 24)
1 sentences above 0.7 threshold
data/en_docs/id_4602.sent
data/th_docs/id_4602.sent
(15, 512) (3, 512) (15, 3)
4 sentences above 0.7 threshold
data/en_docs/id_4603.sent
data/th_docs/id_4603.sent
(24, 512) (3, 512) (24, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4604.sent
data/th_docs/id_4604.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_4605.sent
data/th_docs/id_4605.sent
(108, 512) (21, 512) (108, 21)
1 sentences above 0.7 threshold
data/en_docs/id_4606.sent
data/th_docs/id_4606.sent
(24, 512) (3, 512) (24, 3

3 sentences above 0.7 threshold
data/en_docs/id_4665.sent
data/th_docs/id_4665.sent
(6, 512) (60, 512) (6, 60)
1 sentences above 0.7 threshold
data/en_docs/id_4666.sent
data/th_docs/id_4666.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4667.sent
data/th_docs/id_4667.sent
(78, 512) (3, 512) (78, 3)
4 sentences above 0.7 threshold
data/en_docs/id_4668.sent
data/th_docs/id_4668.sent
(27, 512) (3, 512) (27, 3)
2 sentences above 0.7 threshold
data/en_docs/id_4669.sent
data/th_docs/id_4669.sent
(39, 512) (3, 512) (39, 3)
4 sentences above 0.7 threshold
data/en_docs/id_467.sent
data/th_docs/id_467.sent
(987, 512) (27, 512) (987, 27)
16 sentences above 0.7 threshold
data/en_docs/id_4670.sent
data/th_docs/id_4670.sent
(111, 512) (3, 512) (111, 3)
4 sentences above 0.7 threshold
data/en_docs/id_4671.sent
data/th_docs/id_4671.sent
(63, 512) (3, 512) (63, 3)
3 sentences above 0.7 threshold
data/en_docs/id_4672.sent
data/th_docs/id_4672.sent
(147, 512) (3, 512) (147,

(300, 512) (3, 512) (300, 3)
6 sentences above 0.7 threshold
data/en_docs/id_4732.sent
data/th_docs/id_4732.sent
(750, 512) (141, 512) (750, 141)
159 sentences above 0.7 threshold
data/en_docs/id_4733.sent
data/th_docs/id_4733.sent
(153, 512) (18, 512) (153, 18)
1 sentences above 0.7 threshold
data/en_docs/id_4734.sent
data/th_docs/id_4734.sent
(84, 512) (51, 512) (84, 51)
26 sentences above 0.7 threshold
data/en_docs/id_4735.sent
data/th_docs/id_4735.sent
(12, 512) (57, 512) (12, 57)
1 sentences above 0.7 threshold
data/en_docs/id_4736.sent
data/th_docs/id_4736.sent
(63, 512) (6, 512) (63, 6)
1 sentences above 0.7 threshold
data/en_docs/id_4737.sent
data/th_docs/id_4737.sent
(174, 512) (72, 512) (174, 72)
31 sentences above 0.7 threshold
data/en_docs/id_4738.sent
data/th_docs/id_4738.sent
(630, 512) (144, 512) (630, 144)
40 sentences above 0.7 threshold
data/en_docs/id_4739.sent
data/th_docs/id_4739.sent
(1644, 512) (27, 512) (1644, 27)
19 sentences above 0.7 threshold
data/en_docs/id

(132, 512) (21, 512) (132, 21)
17 sentences above 0.7 threshold
data/en_docs/id_48.sent
data/th_docs/id_48.sent
(36, 512) (12, 512) (36, 12)
1 sentences above 0.7 threshold
data/en_docs/id_480.sent
data/th_docs/id_480.sent
(195, 512) (69, 512) (195, 69)
11 sentences above 0.7 threshold
data/en_docs/id_4800.sent
data/th_docs/id_4800.sent
(375, 512) (6, 512) (375, 6)
27 sentences above 0.7 threshold
data/en_docs/id_4801.sent
data/th_docs/id_4801.sent
(183, 512) (18, 512) (183, 18)
3 sentences above 0.7 threshold
data/en_docs/id_4802.sent
data/th_docs/id_4802.sent
(531, 512) (18, 512) (531, 18)
17 sentences above 0.7 threshold
data/en_docs/id_4803.sent
data/th_docs/id_4803.sent
(3, 512) (48, 512) (3, 48)
3 sentences above 0.7 threshold
data/en_docs/id_4804.sent
data/th_docs/id_4804.sent
(297, 512) (12, 512) (297, 12)
29 sentences above 0.7 threshold
data/en_docs/id_4805.sent
data/th_docs/id_4805.sent
(1413, 512) (591, 512) (1413, 591)
211 sentences above 0.7 threshold
data/en_docs/id_4806

(888, 512) (51, 512) (888, 51)
37 sentences above 0.7 threshold
data/en_docs/id_4865.sent
data/th_docs/id_4865.sent
(285, 512) (3, 512) (285, 3)
5 sentences above 0.7 threshold
data/en_docs/id_4866.sent
data/th_docs/id_4866.sent
(3, 512) (9, 512) (3, 9)
1 sentences above 0.7 threshold
data/en_docs/id_4867.sent
data/th_docs/id_4867.sent
(72, 512) (21, 512) (72, 21)
1 sentences above 0.7 threshold
data/en_docs/id_4868.sent
data/th_docs/id_4868.sent
(33, 512) (42, 512) (33, 42)
1 sentences above 0.7 threshold
data/en_docs/id_4869.sent
data/th_docs/id_4869.sent
(624, 512) (9, 512) (624, 9)
9 sentences above 0.7 threshold
data/en_docs/id_487.sent
data/th_docs/id_487.sent
(300, 512) (6, 512) (300, 6)
1 sentences above 0.7 threshold
data/en_docs/id_4870.sent
data/th_docs/id_4870.sent
(345, 512) (45, 512) (345, 45)
16 sentences above 0.7 threshold
data/en_docs/id_4871.sent
data/th_docs/id_4871.sent
(33, 512) (99, 512) (33, 99)
1 sentences above 0.7 threshold
data/en_docs/id_4872.sent
data/th_d

11 sentences above 0.7 threshold
data/en_docs/id_4930.sent
data/th_docs/id_4930.sent
(657, 512) (21, 512) (657, 21)
17 sentences above 0.7 threshold
data/en_docs/id_4931.sent
data/th_docs/id_4931.sent
(294, 512) (12, 512) (294, 12)
17 sentences above 0.7 threshold
data/en_docs/id_4932.sent
data/th_docs/id_4932.sent
(159, 512) (33, 512) (159, 33)
3 sentences above 0.7 threshold
data/en_docs/id_4933.sent
data/th_docs/id_4933.sent
(81, 512) (6, 512) (81, 6)
8 sentences above 0.7 threshold
data/en_docs/id_4934.sent
data/th_docs/id_4934.sent
(51, 512) (12, 512) (51, 12)
10 sentences above 0.7 threshold
data/en_docs/id_4935.sent
data/th_docs/id_4935.sent
(39, 512) (6, 512) (39, 6)
11 sentences above 0.7 threshold
data/en_docs/id_4936.sent
data/th_docs/id_4936.sent
(60, 512) (33, 512) (60, 33)
17 sentences above 0.7 threshold
data/en_docs/id_4937.sent
data/th_docs/id_4937.sent
(342, 512) (12, 512) (342, 12)
10 sentences above 0.7 threshold
data/en_docs/id_4938.sent
data/th_docs/id_4938.sent
(

15 sentences above 0.7 threshold
data/en_docs/id_4999.sent
data/th_docs/id_4999.sent
(3, 512) (36, 512) (3, 36)
1 sentences above 0.7 threshold
data/en_docs/id_5.sent
data/th_docs/id_5.sent
(87, 512) (3, 512) (87, 3)
5 sentences above 0.7 threshold
data/en_docs/id_50.sent
data/th_docs/id_50.sent
(54, 512) (3, 512) (54, 3)
6 sentences above 0.7 threshold
data/en_docs/id_500.sent
data/th_docs/id_500.sent
skipping...
data/en_docs/id_5000.sent
data/th_docs/id_5000.sent
(786, 512) (417, 512) (786, 417)
364 sentences above 0.7 threshold
data/en_docs/id_5001.sent
data/th_docs/id_5001.sent
(3, 512) (33, 512) (3, 33)
1 sentences above 0.7 threshold
data/en_docs/id_5002.sent
data/th_docs/id_5002.sent
(63, 512) (48, 512) (63, 48)
3 sentences above 0.7 threshold
data/en_docs/id_5003.sent
data/th_docs/id_5003.sent
(93, 512) (3, 512) (93, 3)
2 sentences above 0.7 threshold
data/en_docs/id_5004.sent
data/th_docs/id_5004.sent
(99, 512) (3, 512) (99, 3)
7 sentences above 0.7 threshold
data/en_docs/id_5

(183, 512) (63, 512) (183, 63)
14 sentences above 0.7 threshold
data/en_docs/id_5063.sent
data/th_docs/id_5063.sent
(24, 512) (3, 512) (24, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5064.sent
data/th_docs/id_5064.sent
(57, 512) (3, 512) (57, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5065.sent
data/th_docs/id_5065.sent
(93, 512) (18, 512) (93, 18)
1 sentences above 0.7 threshold
data/en_docs/id_5066.sent
data/th_docs/id_5066.sent
(24, 512) (57, 512) (24, 57)
1 sentences above 0.7 threshold
data/en_docs/id_5067.sent
data/th_docs/id_5067.sent
(174, 512) (39, 512) (174, 39)
11 sentences above 0.7 threshold
data/en_docs/id_5068.sent
data/th_docs/id_5068.sent
(675, 512) (27, 512) (675, 27)
10 sentences above 0.7 threshold
data/en_docs/id_5069.sent
data/th_docs/id_5069.sent
(255, 512) (132, 512) (255, 132)
12 sentences above 0.7 threshold
data/en_docs/id_507.sent
data/th_docs/id_507.sent
(396, 512) (18, 512) (396, 18)
18 sentences above 0.7 threshold
data/en_docs/id_5070.sen

1 sentences above 0.7 threshold
data/en_docs/id_5133.sent
data/th_docs/id_5133.sent
(9, 512) (3, 512) (9, 3)
6 sentences above 0.7 threshold
data/en_docs/id_5134.sent
data/th_docs/id_5134.sent
(27, 512) (18, 512) (27, 18)
9 sentences above 0.7 threshold
data/en_docs/id_5135.sent
data/th_docs/id_5135.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5136.sent
data/th_docs/id_5136.sent
(258, 512) (6, 512) (258, 6)
11 sentences above 0.7 threshold
data/en_docs/id_5137.sent
data/th_docs/id_5137.sent
(24, 512) (24, 512) (24, 24)
1 sentences above 0.7 threshold
data/en_docs/id_5138.sent
data/th_docs/id_5138.sent
(39, 512) (15, 512) (39, 15)
12 sentences above 0.7 threshold
data/en_docs/id_5139.sent
data/th_docs/id_5139.sent
(39, 512) (12, 512) (39, 12)
1 sentences above 0.7 threshold
data/en_docs/id_514.sent
data/th_docs/id_514.sent
(24, 512) (3, 512) (24, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5140.sent
data/th_docs/id_5140.sent
(111, 512) (3, 512) (1

3 sentences above 0.7 threshold
data/en_docs/id_5200.sent
data/th_docs/id_5200.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_5201.sent
data/th_docs/id_5201.sent
(3, 512) (42, 512) (3, 42)
1 sentences above 0.7 threshold
data/en_docs/id_5202.sent
data/th_docs/id_5202.sent
(30, 512) (15, 512) (30, 15)
7 sentences above 0.7 threshold
data/en_docs/id_5203.sent
data/th_docs/id_5203.sent
(15, 512) (6, 512) (15, 6)
1 sentences above 0.7 threshold
data/en_docs/id_5204.sent
data/th_docs/id_5204.sent
(261, 512) (12, 512) (261, 12)
2 sentences above 0.7 threshold
data/en_docs/id_5205.sent
data/th_docs/id_5205.sent
(51, 512) (24, 512) (51, 24)
1 sentences above 0.7 threshold
data/en_docs/id_5206.sent
data/th_docs/id_5206.sent
(261, 512) (9, 512) (261, 9)
1 sentences above 0.7 threshold
data/en_docs/id_5207.sent
data/th_docs/id_5207.sent
(15, 512) (18, 512) (15, 18)
1 sentences above 0.7 threshold
data/en_docs/id_5208.sent
data/th_docs/id_5208.sent
(3, 512) (39, 512)

8 sentences above 0.7 threshold
data/en_docs/id_5268.sent
data/th_docs/id_5268.sent
(21, 512) (6, 512) (21, 6)
1 sentences above 0.7 threshold
data/en_docs/id_5269.sent
data/th_docs/id_5269.sent
(33, 512) (54, 512) (33, 54)
2 sentences above 0.7 threshold
data/en_docs/id_527.sent
data/th_docs/id_527.sent
(3, 512) (39, 512) (3, 39)
1 sentences above 0.7 threshold
data/en_docs/id_5270.sent
data/th_docs/id_5270.sent
(12, 512) (6, 512) (12, 6)
5 sentences above 0.7 threshold
data/en_docs/id_5271.sent
data/th_docs/id_5271.sent
(12, 512) (6, 512) (12, 6)
5 sentences above 0.7 threshold
data/en_docs/id_5272.sent
data/th_docs/id_5272.sent
(864, 512) (810, 512) (864, 810)
319 sentences above 0.7 threshold
data/en_docs/id_5273.sent
data/th_docs/id_5273.sent
(15, 512) (6, 512) (15, 6)
1 sentences above 0.7 threshold
data/en_docs/id_5274.sent
data/th_docs/id_5274.sent
(15, 512) (6, 512) (15, 6)
6 sentences above 0.7 threshold
data/en_docs/id_5275.sent
data/th_docs/id_5275.sent
(72, 512) (6, 512) (

(66, 512) (24, 512) (66, 24)
34 sentences above 0.7 threshold
data/en_docs/id_5340.sent
data/th_docs/id_5340.sent
(3, 512) (45, 512) (3, 45)
2 sentences above 0.7 threshold
data/en_docs/id_5341.sent
data/th_docs/id_5341.sent
(51, 512) (9, 512) (51, 9)
1 sentences above 0.7 threshold
data/en_docs/id_5342.sent
data/th_docs/id_5342.sent
(9, 512) (6, 512) (9, 6)
2 sentences above 0.7 threshold
data/en_docs/id_5343.sent
data/th_docs/id_5343.sent
(15, 512) (18, 512) (15, 18)
5 sentences above 0.7 threshold
data/en_docs/id_5344.sent
data/th_docs/id_5344.sent
(348, 512) (12, 512) (348, 12)
32 sentences above 0.7 threshold
data/en_docs/id_5345.sent
data/th_docs/id_5345.sent
(27, 512) (36, 512) (27, 36)
1 sentences above 0.7 threshold
data/en_docs/id_5346.sent
data/th_docs/id_5346.sent
(30, 512) (30, 512) (30, 30)
19 sentences above 0.7 threshold
data/en_docs/id_5347.sent
data/th_docs/id_5347.sent
(264, 512) (3, 512) (264, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5348.sent
data/th_docs

4 sentences above 0.7 threshold
data/en_docs/id_541.sent
data/th_docs/id_541.sent
(9, 512) (153, 512) (9, 153)
1 sentences above 0.7 threshold
data/en_docs/id_5410.sent
data/th_docs/id_5410.sent
(9, 512) (3, 512) (9, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5411.sent
data/th_docs/id_5411.sent
(21, 512) (3, 512) (21, 3)
2 sentences above 0.7 threshold
data/en_docs/id_5412.sent
data/th_docs/id_5412.sent
(9, 512) (3, 512) (9, 3)
2 sentences above 0.7 threshold
data/en_docs/id_5413.sent
data/th_docs/id_5413.sent
(9, 512) (3, 512) (9, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5414.sent
data/th_docs/id_5414.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5415.sent
data/th_docs/id_5415.sent
(195, 512) (3, 512) (195, 3)
6 sentences above 0.7 threshold
data/en_docs/id_5416.sent
data/th_docs/id_5416.sent
(45, 512) (6, 512) (45, 6)
4 sentences above 0.7 threshold
data/en_docs/id_5417.sent
data/th_docs/id_5417.sent
(24, 512) (3, 512) (24, 3)
6 sente

(456, 512) (237, 512) (456, 237)
63 sentences above 0.7 threshold
data/en_docs/id_5480.sent
data/th_docs/id_5480.sent
(6, 512) (102, 512) (6, 102)
1 sentences above 0.7 threshold
data/en_docs/id_5481.sent
data/th_docs/id_5481.sent
(12, 512) (9, 512) (12, 9)
1 sentences above 0.7 threshold
data/en_docs/id_5482.sent
data/th_docs/id_5482.sent
(207, 512) (6, 512) (207, 6)
21 sentences above 0.7 threshold
data/en_docs/id_5483.sent
data/th_docs/id_5483.sent
(12, 512) (6, 512) (12, 6)
1 sentences above 0.7 threshold
data/en_docs/id_5484.sent
data/th_docs/id_5484.sent
(15, 512) (3, 512) (15, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5485.sent
data/th_docs/id_5485.sent
(219, 512) (141, 512) (219, 141)
5 sentences above 0.7 threshold
data/en_docs/id_5486.sent
data/th_docs/id_5486.sent
(6, 512) (3, 512) (6, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5487.sent
data/th_docs/id_5487.sent
(48, 512) (33, 512) (48, 33)
4 sentences above 0.7 threshold
data/en_docs/id_5488.sent
data/th_d

(216, 512) (342, 512) (216, 342)
10 sentences above 0.7 threshold
data/en_docs/id_5548.sent
data/th_docs/id_5548.sent
(51, 512) (21, 512) (51, 21)
3 sentences above 0.7 threshold
data/en_docs/id_5549.sent
data/th_docs/id_5549.sent
(426, 512) (99, 512) (426, 99)
22 sentences above 0.7 threshold
data/en_docs/id_555.sent
data/th_docs/id_555.sent
(780, 512) (60, 512) (780, 60)
20 sentences above 0.7 threshold
data/en_docs/id_5550.sent
data/th_docs/id_5550.sent
(18, 512) (15, 512) (18, 15)
3 sentences above 0.7 threshold
data/en_docs/id_5551.sent
data/th_docs/id_5551.sent
(3, 512) (24, 512) (3, 24)
1 sentences above 0.7 threshold
data/en_docs/id_5552.sent
data/th_docs/id_5552.sent
(3, 512) (27, 512) (3, 27)
1 sentences above 0.7 threshold
data/en_docs/id_5553.sent
data/th_docs/id_5553.sent
(279, 512) (3, 512) (279, 3)
21 sentences above 0.7 threshold
data/en_docs/id_5554.sent
data/th_docs/id_5554.sent
(267, 512) (6, 512) (267, 6)
4 sentences above 0.7 threshold
data/en_docs/id_5555.sent
dat

4 sentences above 0.7 threshold
data/en_docs/id_5617.sent
data/th_docs/id_5617.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5618.sent
data/th_docs/id_5618.sent
(9, 512) (3, 512) (9, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5619.sent
data/th_docs/id_5619.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_562.sent
data/th_docs/id_562.sent
(459, 512) (36, 512) (459, 36)
9 sentences above 0.7 threshold
data/en_docs/id_5620.sent
data/th_docs/id_5620.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5621.sent
data/th_docs/id_5621.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5622.sent
data/th_docs/id_5622.sent
(48, 512) (3, 512) (48, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5623.sent
data/th_docs/id_5623.sent
(33, 512) (3, 512) (33, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5624.sent
data/th_docs/id_5624.sent
(36, 512) (3, 512) (36, 3)
4 sentenc

(24, 512) (3, 512) (24, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5686.sent
data/th_docs/id_5686.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5687.sent
data/th_docs/id_5687.sent
(12, 512) (3, 512) (12, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5688.sent
data/th_docs/id_5688.sent
(36, 512) (3, 512) (36, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5689.sent
data/th_docs/id_5689.sent
(21, 512) (3, 512) (21, 3)
3 sentences above 0.7 threshold
data/en_docs/id_569.sent
data/th_docs/id_569.sent
(84, 512) (3, 512) (84, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5690.sent
data/th_docs/id_5690.sent
(36, 512) (3, 512) (36, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5691.sent
data/th_docs/id_5691.sent
(9, 512) (3, 512) (9, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5692.sent
data/th_docs/id_5692.sent
(21, 512) (3, 512) (21, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5693.sent
data/th_docs/id_5693.sent
(36, 51

(12, 512) (3, 512) (12, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5754.sent
data/th_docs/id_5754.sent
(27, 512) (3, 512) (27, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5755.sent
data/th_docs/id_5755.sent
(21, 512) (3, 512) (21, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5756.sent
data/th_docs/id_5756.sent
(21, 512) (3, 512) (21, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5757.sent
data/th_docs/id_5757.sent
(12, 512) (3, 512) (12, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5758.sent
data/th_docs/id_5758.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5759.sent
data/th_docs/id_5759.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_576.sent
data/th_docs/id_576.sent
(69, 512) (3, 512) (69, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5760.sent
data/th_docs/id_5760.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5761.sent
data/th_docs/id_5761.sent
(9, 512) 

49 sentences above 0.7 threshold
data/en_docs/id_5820.sent
data/th_docs/id_5820.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5821.sent
data/th_docs/id_5821.sent
(15, 512) (3, 512) (15, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5822.sent
data/th_docs/id_5822.sent
(30, 512) (3, 512) (30, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5823.sent
data/th_docs/id_5823.sent
(24, 512) (3, 512) (24, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5824.sent
data/th_docs/id_5824.sent
(30, 512) (3, 512) (30, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5825.sent
data/th_docs/id_5825.sent
(18, 512) (3, 512) (18, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5826.sent
data/th_docs/id_5826.sent
(9, 512) (3, 512) (9, 3)
6 sentences above 0.7 threshold
data/en_docs/id_5827.sent
data/th_docs/id_5827.sent
(24, 512) (3, 512) (24, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5828.sent
data/th_docs/id_5828.sent
(3, 512) (3, 512) (3, 3)
3 sent

(285, 512) (9, 512) (285, 9)
1 sentences above 0.7 threshold
data/en_docs/id_5889.sent
data/th_docs/id_5889.sent
(12, 512) (12, 512) (12, 12)
1 sentences above 0.7 threshold
data/en_docs/id_589.sent
data/th_docs/id_589.sent
(3, 512) (18, 512) (3, 18)
1 sentences above 0.7 threshold
data/en_docs/id_5890.sent
data/th_docs/id_5890.sent
(51, 512) (36, 512) (51, 36)
2 sentences above 0.7 threshold
data/en_docs/id_5891.sent
data/th_docs/id_5891.sent
(51, 512) (42, 512) (51, 42)
2 sentences above 0.7 threshold
data/en_docs/id_5892.sent
data/th_docs/id_5892.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5893.sent
data/th_docs/id_5893.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_5894.sent
data/th_docs/id_5894.sent
(3, 512) (12, 512) (3, 12)
1 sentences above 0.7 threshold
data/en_docs/id_5895.sent
data/th_docs/id_5895.sent
(63, 512) (15, 512) (63, 15)
41 sentences above 0.7 threshold
data/en_docs/id_5896.sent
data/th_docs/id_5896.s

5 sentences above 0.7 threshold
data/en_docs/id_5955.sent
data/th_docs/id_5955.sent
(15, 512) (3, 512) (15, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5956.sent
data/th_docs/id_5956.sent
(18, 512) (129, 512) (18, 129)
1 sentences above 0.7 threshold
data/en_docs/id_5957.sent
data/th_docs/id_5957.sent
(12, 512) (12, 512) (12, 12)
5 sentences above 0.7 threshold
data/en_docs/id_5958.sent
data/th_docs/id_5958.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5959.sent
data/th_docs/id_5959.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_596.sent
data/th_docs/id_596.sent
(222, 512) (3, 512) (222, 3)
1 sentences above 0.7 threshold
data/en_docs/id_5960.sent
data/th_docs/id_5960.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_5961.sent
data/th_docs/id_5961.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_5962.sent
data/th_docs/id_5962.sent
(12, 512) (72, 512) (12, 72)
1

1 sentences above 0.7 threshold
data/en_docs/id_6020.sent
data/th_docs/id_6020.sent
(36, 512) (45, 512) (36, 45)
1 sentences above 0.7 threshold
data/en_docs/id_6021.sent
data/th_docs/id_6021.sent
(33, 512) (18, 512) (33, 18)
1 sentences above 0.7 threshold
data/en_docs/id_6022.sent
data/th_docs/id_6022.sent
(372, 512) (30, 512) (372, 30)
4 sentences above 0.7 threshold
data/en_docs/id_6023.sent
data/th_docs/id_6023.sent
(51, 512) (24, 512) (51, 24)
5 sentences above 0.7 threshold
data/en_docs/id_6024.sent
data/th_docs/id_6024.sent
(6, 512) (57, 512) (6, 57)
1 sentences above 0.7 threshold
data/en_docs/id_6025.sent
data/th_docs/id_6025.sent
(3, 512) (6, 512) (3, 6)
2 sentences above 0.7 threshold
data/en_docs/id_6026.sent
data/th_docs/id_6026.sent
(18, 512) (9, 512) (18, 9)
8 sentences above 0.7 threshold
data/en_docs/id_6027.sent
data/th_docs/id_6027.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_6028.sent
data/th_docs/id_6028.sent
(90, 512) (102, 512) (

3 sentences above 0.7 threshold
data/en_docs/id_6088.sent
data/th_docs/id_6088.sent
(12, 512) (66, 512) (12, 66)
1 sentences above 0.7 threshold
data/en_docs/id_6089.sent
data/th_docs/id_6089.sent
(213, 512) (3, 512) (213, 3)
17 sentences above 0.7 threshold
data/en_docs/id_609.sent
data/th_docs/id_609.sent
(78, 512) (3, 512) (78, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6090.sent
data/th_docs/id_6090.sent
(18, 512) (30, 512) (18, 30)
1 sentences above 0.7 threshold
data/en_docs/id_6091.sent
data/th_docs/id_6091.sent
(27, 512) (6, 512) (27, 6)
11 sentences above 0.7 threshold
data/en_docs/id_6092.sent
data/th_docs/id_6092.sent
(57, 512) (12, 512) (57, 12)
1 sentences above 0.7 threshold
data/en_docs/id_6093.sent
data/th_docs/id_6093.sent
(24, 512) (27, 512) (24, 27)
9 sentences above 0.7 threshold
data/en_docs/id_6094.sent
data/th_docs/id_6094.sent
(48, 512) (102, 512) (48, 102)
19 sentences above 0.7 threshold
data/en_docs/id_6095.sent
data/th_docs/id_6095.sent
skipping...
d

(18, 512) (60, 512) (18, 60)
10 sentences above 0.7 threshold
data/en_docs/id_6155.sent
data/th_docs/id_6155.sent
(3, 512) (24, 512) (3, 24)
1 sentences above 0.7 threshold
data/en_docs/id_6156.sent
data/th_docs/id_6156.sent
(24, 512) (141, 512) (24, 141)
15 sentences above 0.7 threshold
data/en_docs/id_6157.sent
data/th_docs/id_6157.sent
(6, 512) (3, 512) (6, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6158.sent
data/th_docs/id_6158.sent
(438, 512) (51, 512) (438, 51)
17 sentences above 0.7 threshold
data/en_docs/id_6159.sent
data/th_docs/id_6159.sent
(6, 512) (6, 512) (6, 6)
1 sentences above 0.7 threshold
data/en_docs/id_616.sent
data/th_docs/id_616.sent
(144, 512) (237, 512) (144, 237)
18 sentences above 0.7 threshold
data/en_docs/id_6160.sent
data/th_docs/id_6160.sent
(6, 512) (78, 512) (6, 78)
1 sentences above 0.7 threshold
data/en_docs/id_6161.sent
data/th_docs/id_6161.sent
(18, 512) (15, 512) (18, 15)
1 sentences above 0.7 threshold
data/en_docs/id_6162.sent
data/th_doc

1 sentences above 0.7 threshold
data/en_docs/id_6221.sent
data/th_docs/id_6221.sent
(6, 512) (27, 512) (6, 27)
1 sentences above 0.7 threshold
data/en_docs/id_6222.sent
data/th_docs/id_6222.sent
(9, 512) (72, 512) (9, 72)
1 sentences above 0.7 threshold
data/en_docs/id_6223.sent
data/th_docs/id_6223.sent
(84, 512) (45, 512) (84, 45)
5 sentences above 0.7 threshold
data/en_docs/id_6224.sent
data/th_docs/id_6224.sent
(57, 512) (6, 512) (57, 6)
5 sentences above 0.7 threshold
data/en_docs/id_6225.sent
data/th_docs/id_6225.sent
(42, 512) (72, 512) (42, 72)
9 sentences above 0.7 threshold
data/en_docs/id_6226.sent
data/th_docs/id_6226.sent
(48, 512) (36, 512) (48, 36)
1 sentences above 0.7 threshold
data/en_docs/id_6227.sent
data/th_docs/id_6227.sent
(72, 512) (81, 512) (72, 81)
5 sentences above 0.7 threshold
data/en_docs/id_6228.sent
data/th_docs/id_6228.sent
(72, 512) (3, 512) (72, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6229.sent
data/th_docs/id_6229.sent
(114, 512) (3, 512) 

(63, 512) (3, 512) (63, 3)
3 sentences above 0.7 threshold
data/en_docs/id_6291.sent
data/th_docs/id_6291.sent
(21, 512) (6, 512) (21, 6)
3 sentences above 0.7 threshold
data/en_docs/id_6292.sent
data/th_docs/id_6292.sent
(120, 512) (6, 512) (120, 6)
8 sentences above 0.7 threshold
data/en_docs/id_6293.sent
data/th_docs/id_6293.sent
(15, 512) (12, 512) (15, 12)
8 sentences above 0.7 threshold
data/en_docs/id_6294.sent
data/th_docs/id_6294.sent
(15, 512) (6, 512) (15, 6)
8 sentences above 0.7 threshold
data/en_docs/id_6295.sent
data/th_docs/id_6295.sent
(15, 512) (15, 512) (15, 15)
1 sentences above 0.7 threshold
data/en_docs/id_6296.sent
data/th_docs/id_6296.sent
(603, 512) (18, 512) (603, 18)
2 sentences above 0.7 threshold
data/en_docs/id_6297.sent
data/th_docs/id_6297.sent
(60, 512) (30, 512) (60, 30)
1 sentences above 0.7 threshold
data/en_docs/id_6298.sent
data/th_docs/id_6298.sent
(27, 512) (6, 512) (27, 6)
1 sentences above 0.7 threshold
data/en_docs/id_6299.sent
data/th_docs/id

5 sentences above 0.7 threshold
data/en_docs/id_6359.sent
data/th_docs/id_6359.sent
(195, 512) (108, 512) (195, 108)
46 sentences above 0.7 threshold
data/en_docs/id_636.sent
data/th_docs/id_636.sent
(159, 512) (63, 512) (159, 63)
20 sentences above 0.7 threshold
data/en_docs/id_6360.sent
data/th_docs/id_6360.sent
(54, 512) (3, 512) (54, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6361.sent
data/th_docs/id_6361.sent
(144, 512) (24, 512) (144, 24)
7 sentences above 0.7 threshold
data/en_docs/id_6362.sent
data/th_docs/id_6362.sent
(252, 512) (36, 512) (252, 36)
35 sentences above 0.7 threshold
data/en_docs/id_6363.sent
data/th_docs/id_6363.sent
(234, 512) (45, 512) (234, 45)
8 sentences above 0.7 threshold
data/en_docs/id_6364.sent
data/th_docs/id_6364.sent
(66, 512) (3, 512) (66, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6365.sent
data/th_docs/id_6365.sent
(90, 512) (9, 512) (90, 9)
1 sentences above 0.7 threshold
data/en_docs/id_6366.sent
data/th_docs/id_6366.sent
(12, 

52 sentences above 0.7 threshold
data/en_docs/id_6427.sent
data/th_docs/id_6427.sent
(138, 512) (63, 512) (138, 63)
23 sentences above 0.7 threshold
data/en_docs/id_6428.sent
data/th_docs/id_6428.sent
(279, 512) (93, 512) (279, 93)
55 sentences above 0.7 threshold
data/en_docs/id_6429.sent
data/th_docs/id_6429.sent
(357, 512) (30, 512) (357, 30)
2 sentences above 0.7 threshold
data/en_docs/id_643.sent
data/th_docs/id_643.sent
(441, 512) (9, 512) (441, 9)
14 sentences above 0.7 threshold
data/en_docs/id_6430.sent
data/th_docs/id_6430.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6431.sent
data/th_docs/id_6431.sent
(111, 512) (39, 512) (111, 39)
25 sentences above 0.7 threshold
data/en_docs/id_6432.sent
data/th_docs/id_6432.sent
(138, 512) (63, 512) (138, 63)
4 sentences above 0.7 threshold
data/en_docs/id_6433.sent
data/th_docs/id_6433.sent
(78, 512) (12, 512) (78, 12)
4 sentences above 0.7 threshold
data/en_docs/id_6434.sent
data/th_docs/id_6434.sent
(15

1 sentences above 0.7 threshold
data/en_docs/id_6496.sent
data/th_docs/id_6496.sent
(51, 512) (3, 512) (51, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6497.sent
data/th_docs/id_6497.sent
(78, 512) (12, 512) (78, 12)
1 sentences above 0.7 threshold
data/en_docs/id_6498.sent
data/th_docs/id_6498.sent
(126, 512) (12, 512) (126, 12)
4 sentences above 0.7 threshold
data/en_docs/id_6499.sent
data/th_docs/id_6499.sent
(33, 512) (6, 512) (33, 6)
6 sentences above 0.7 threshold
data/en_docs/id_65.sent
data/th_docs/id_65.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_650.sent
data/th_docs/id_650.sent
(1746, 512) (21, 512) (1746, 21)
26 sentences above 0.7 threshold
data/en_docs/id_6500.sent
data/th_docs/id_6500.sent
(21, 512) (9, 512) (21, 9)
1 sentences above 0.7 threshold
data/en_docs/id_6501.sent
data/th_docs/id_6501.sent
(48, 512) (3, 512) (48, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6502.sent
data/th_docs/id_6502.sent
(36, 512) (3, 512) (36,

(3, 512) (6, 512) (3, 6)
1 sentences above 0.7 threshold
data/en_docs/id_6564.sent
data/th_docs/id_6564.sent
(891, 512) (12, 512) (891, 12)
14 sentences above 0.7 threshold
data/en_docs/id_6565.sent
data/th_docs/id_6565.sent
(3, 512) (42, 512) (3, 42)
1 sentences above 0.7 threshold
data/en_docs/id_6566.sent
data/th_docs/id_6566.sent
(6, 512) (36, 512) (6, 36)
1 sentences above 0.7 threshold
data/en_docs/id_6567.sent
data/th_docs/id_6567.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_6568.sent
data/th_docs/id_6568.sent
(33, 512) (24, 512) (33, 24)
14 sentences above 0.7 threshold
data/en_docs/id_6569.sent
data/th_docs/id_6569.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_657.sent
data/th_docs/id_657.sent
(741, 512) (15, 512) (741, 15)
15 sentences above 0.7 threshold
data/en_docs/id_6570.sent
data/th_docs/id_6570.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_6571.sent
data/th_docs/id_6571.sen

7 sentences above 0.7 threshold
data/en_docs/id_663.sent
data/th_docs/id_663.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_6630.sent
data/th_docs/id_6630.sent
(3, 512) (6, 512) (3, 6)
1 sentences above 0.7 threshold
data/en_docs/id_6631.sent
data/th_docs/id_6631.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_6632.sent
data/th_docs/id_6632.sent
(54, 512) (12, 512) (54, 12)
7 sentences above 0.7 threshold
data/en_docs/id_6633.sent
data/th_docs/id_6633.sent
(75, 512) (9, 512) (75, 9)
6 sentences above 0.7 threshold
data/en_docs/id_6634.sent
data/th_docs/id_6634.sent
(3, 512) (30, 512) (3, 30)
1 sentences above 0.7 threshold
data/en_docs/id_6635.sent
data/th_docs/id_6635.sent
(57, 512) (129, 512) (57, 129)
9 sentences above 0.7 threshold
data/en_docs/id_6636.sent
data/th_docs/id_6636.sent
(486, 512) (189, 512) (486, 189)
51 sentences above 0.7 threshold
data/en_docs/id_6637.sent
data/th_docs/id_6637.sent
(450, 512) (159, 512) (

107 sentences above 0.7 threshold
data/en_docs/id_6697.sent
data/th_docs/id_6697.sent
(339, 512) (24, 512) (339, 24)
18 sentences above 0.7 threshold
data/en_docs/id_6698.sent
data/th_docs/id_6698.sent
(36, 512) (12, 512) (36, 12)
14 sentences above 0.7 threshold
data/en_docs/id_6699.sent
data/th_docs/id_6699.sent
(9, 512) (21, 512) (9, 21)
7 sentences above 0.7 threshold
data/en_docs/id_67.sent
data/th_docs/id_67.sent
(132, 512) (18, 512) (132, 18)
6 sentences above 0.7 threshold
data/en_docs/id_670.sent
data/th_docs/id_670.sent
(162, 512) (111, 512) (162, 111)
12 sentences above 0.7 threshold
data/en_docs/id_6700.sent
data/th_docs/id_6700.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_6701.sent
data/th_docs/id_6701.sent
(51, 512) (9, 512) (51, 9)
11 sentences above 0.7 threshold
data/en_docs/id_6702.sent
data/th_docs/id_6702.sent
(39, 512) (3, 512) (39, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6703.sent
data/th_docs/id_6703.sent
(213, 512) (93

(243, 512) (3, 512) (243, 3)
3 sentences above 0.7 threshold
data/en_docs/id_6762.sent
data/th_docs/id_6762.sent
(9, 512) (3, 512) (9, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6763.sent
data/th_docs/id_6763.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_6764.sent
data/th_docs/id_6764.sent
(15, 512) (9, 512) (15, 9)
11 sentences above 0.7 threshold
data/en_docs/id_6765.sent
data/th_docs/id_6765.sent
(3, 512) (78, 512) (3, 78)
1 sentences above 0.7 threshold
data/en_docs/id_6766.sent
data/th_docs/id_6766.sent
(1101, 512) (318, 512) (1101, 318)
126 sentences above 0.7 threshold
data/en_docs/id_6767.sent
data/th_docs/id_6767.sent
(105, 512) (39, 512) (105, 39)
28 sentences above 0.7 threshold
data/en_docs/id_6768.sent
data/th_docs/id_6768.sent
(3, 512) (108, 512) (3, 108)
1 sentences above 0.7 threshold
data/en_docs/id_6769.sent
data/th_docs/id_6769.sent
(81, 512) (30, 512) (81, 30)
6 sentences above 0.7 threshold
data/en_docs/id_677.sent
data/th_do

(183, 512) (42, 512) (183, 42)
22 sentences above 0.7 threshold
data/en_docs/id_6832.sent
data/th_docs/id_6832.sent
(18, 512) (6, 512) (18, 6)
1 sentences above 0.7 threshold
data/en_docs/id_6833.sent
data/th_docs/id_6833.sent
(336, 512) (66, 512) (336, 66)
39 sentences above 0.7 threshold
data/en_docs/id_6834.sent
data/th_docs/id_6834.sent
(105, 512) (6, 512) (105, 6)
7 sentences above 0.7 threshold
data/en_docs/id_6835.sent
data/th_docs/id_6835.sent
(285, 512) (15, 512) (285, 15)
13 sentences above 0.7 threshold
data/en_docs/id_6836.sent
data/th_docs/id_6836.sent
(240, 512) (93, 512) (240, 93)
56 sentences above 0.7 threshold
data/en_docs/id_6837.sent
data/th_docs/id_6837.sent
(240, 512) (312, 512) (240, 312)
226 sentences above 0.7 threshold
data/en_docs/id_6838.sent
data/th_docs/id_6838.sent
(96, 512) (48, 512) (96, 48)
5 sentences above 0.7 threshold
data/en_docs/id_6839.sent
data/th_docs/id_6839.sent
(189, 512) (66, 512) (189, 66)
23 sentences above 0.7 threshold
data/en_docs/id_

(162, 512) (39, 512) (162, 39)
66 sentences above 0.7 threshold
data/en_docs/id_6902.sent
data/th_docs/id_6902.sent
(78, 512) (18, 512) (78, 18)
2 sentences above 0.7 threshold
data/en_docs/id_6903.sent
data/th_docs/id_6903.sent
(1395, 512) (162, 512) (1395, 162)
114 sentences above 0.7 threshold
data/en_docs/id_6904.sent
data/th_docs/id_6904.sent
(111, 512) (12, 512) (111, 12)
1 sentences above 0.7 threshold
data/en_docs/id_6905.sent
data/th_docs/id_6905.sent
(93, 512) (15, 512) (93, 15)
10 sentences above 0.7 threshold
data/en_docs/id_6906.sent
data/th_docs/id_6906.sent
(1092, 512) (12, 512) (1092, 12)
29 sentences above 0.7 threshold
data/en_docs/id_6907.sent
data/th_docs/id_6907.sent
(54, 512) (12, 512) (54, 12)
11 sentences above 0.7 threshold
data/en_docs/id_6908.sent
data/th_docs/id_6908.sent
(12, 512) (3, 512) (12, 3)
4 sentences above 0.7 threshold
data/en_docs/id_6909.sent
data/th_docs/id_6909.sent
(2667, 512) (87, 512) (2667, 87)
38 sentences above 0.7 threshold
data/en_docs

(753, 512) (3, 512) (753, 3)
2 sentences above 0.7 threshold
data/en_docs/id_6970.sent
data/th_docs/id_6970.sent
(114, 512) (9, 512) (114, 9)
1 sentences above 0.7 threshold
data/en_docs/id_6971.sent
data/th_docs/id_6971.sent
(3, 512) (54, 512) (3, 54)
1 sentences above 0.7 threshold
data/en_docs/id_6972.sent
data/th_docs/id_6972.sent
(465, 512) (12, 512) (465, 12)
8 sentences above 0.7 threshold
data/en_docs/id_6973.sent
data/th_docs/id_6973.sent
(132, 512) (3, 512) (132, 3)
1 sentences above 0.7 threshold
data/en_docs/id_6974.sent
data/th_docs/id_6974.sent
(297, 512) (9, 512) (297, 9)
13 sentences above 0.7 threshold
data/en_docs/id_6975.sent
data/th_docs/id_6975.sent
(531, 512) (54, 512) (531, 54)
85 sentences above 0.7 threshold
data/en_docs/id_6976.sent
data/th_docs/id_6976.sent
(111, 512) (12, 512) (111, 12)
24 sentences above 0.7 threshold
data/en_docs/id_6977.sent
data/th_docs/id_6977.sent
(3, 512) (6, 512) (3, 6)
2 sentences above 0.7 threshold
data/en_docs/id_6978.sent
data/t

88 sentences above 0.7 threshold
data/en_docs/id_7035.sent
data/th_docs/id_7035.sent
(699, 512) (12, 512) (699, 12)
15 sentences above 0.7 threshold
data/en_docs/id_7036.sent
data/th_docs/id_7036.sent
(66, 512) (327, 512) (66, 327)
22 sentences above 0.7 threshold
data/en_docs/id_7037.sent
data/th_docs/id_7037.sent
(807, 512) (102, 512) (807, 102)
29 sentences above 0.7 threshold
data/en_docs/id_7038.sent
data/th_docs/id_7038.sent
(900, 512) (6, 512) (900, 6)
1 sentences above 0.7 threshold
data/en_docs/id_7039.sent
data/th_docs/id_7039.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_704.sent
data/th_docs/id_704.sent
(3, 512) (27, 512) (3, 27)
1 sentences above 0.7 threshold
data/en_docs/id_7040.sent
data/th_docs/id_7040.sent
(189, 512) (6, 512) (189, 6)
4 sentences above 0.7 threshold
data/en_docs/id_7041.sent
data/th_docs/id_7041.sent
(21, 512) (6, 512) (21, 6)
3 sentences above 0.7 threshold
data/en_docs/id_7042.sent
data/th_docs/id_7042.sent
(3, 512) (

48 sentences above 0.7 threshold
data/en_docs/id_7103.sent
data/th_docs/id_7103.sent
(753, 512) (21, 512) (753, 21)
2 sentences above 0.7 threshold
data/en_docs/id_7104.sent
data/th_docs/id_7104.sent
(171, 512) (36, 512) (171, 36)
1 sentences above 0.7 threshold
data/en_docs/id_7105.sent
data/th_docs/id_7105.sent
(9, 512) (12, 512) (9, 12)
1 sentences above 0.7 threshold
data/en_docs/id_7106.sent
data/th_docs/id_7106.sent
(333, 512) (66, 512) (333, 66)
12 sentences above 0.7 threshold
data/en_docs/id_7107.sent
data/th_docs/id_7107.sent
(12, 512) (3, 512) (12, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7108.sent
data/th_docs/id_7108.sent
(1536, 512) (60, 512) (1536, 60)
27 sentences above 0.7 threshold
data/en_docs/id_7109.sent
data/th_docs/id_7109.sent
(12, 512) (78, 512) (12, 78)
1 sentences above 0.7 threshold
data/en_docs/id_711.sent
data/th_docs/id_711.sent
(57, 512) (3, 512) (57, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7110.sent
data/th_docs/id_7110.sent
(3, 512

(552, 512) (12, 512) (552, 12)
21 sentences above 0.7 threshold
data/en_docs/id_7171.sent
data/th_docs/id_7171.sent
(48, 512) (18, 512) (48, 18)
13 sentences above 0.7 threshold
data/en_docs/id_7172.sent
data/th_docs/id_7172.sent
(6, 512) (18, 512) (6, 18)
1 sentences above 0.7 threshold
data/en_docs/id_7173.sent
data/th_docs/id_7173.sent
(120, 512) (9, 512) (120, 9)
10 sentences above 0.7 threshold
data/en_docs/id_7174.sent
data/th_docs/id_7174.sent
(1242, 512) (108, 512) (1242, 108)
8 sentences above 0.7 threshold
data/en_docs/id_7175.sent
data/th_docs/id_7175.sent
(369, 512) (15, 512) (369, 15)
1 sentences above 0.7 threshold
data/en_docs/id_7176.sent
data/th_docs/id_7176.sent
(198, 512) (9, 512) (198, 9)
1 sentences above 0.7 threshold
data/en_docs/id_7177.sent
data/th_docs/id_7177.sent
(141, 512) (81, 512) (141, 81)
96 sentences above 0.7 threshold
data/en_docs/id_7178.sent
data/th_docs/id_7178.sent
(102, 512) (36, 512) (102, 36)
7 sentences above 0.7 threshold
data/en_docs/id_717

96 sentences above 0.7 threshold
data/en_docs/id_7237.sent
data/th_docs/id_7237.sent
(258, 512) (42, 512) (258, 42)
51 sentences above 0.7 threshold
data/en_docs/id_7238.sent
data/th_docs/id_7238.sent
(3, 512) (12, 512) (3, 12)
1 sentences above 0.7 threshold
data/en_docs/id_7239.sent
data/th_docs/id_7239.sent
(270, 512) (33, 512) (270, 33)
14 sentences above 0.7 threshold
data/en_docs/id_724.sent
data/th_docs/id_724.sent
skipping...
data/en_docs/id_7240.sent
data/th_docs/id_7240.sent
(114, 512) (3, 512) (114, 3)
11 sentences above 0.7 threshold
data/en_docs/id_7241.sent
data/th_docs/id_7241.sent
(501, 512) (9, 512) (501, 9)
10 sentences above 0.7 threshold
data/en_docs/id_7242.sent
data/th_docs/id_7242.sent
(3, 512) (33, 512) (3, 33)
1 sentences above 0.7 threshold
data/en_docs/id_7243.sent
data/th_docs/id_7243.sent
(1194, 512) (15, 512) (1194, 15)
4 sentences above 0.7 threshold
data/en_docs/id_7244.sent
data/th_docs/id_7244.sent
(504, 512) (15, 512) (504, 15)
10 sentences above 0.7 

30 sentences above 0.7 threshold
data/en_docs/id_7303.sent
data/th_docs/id_7303.sent
(294, 512) (6, 512) (294, 6)
1 sentences above 0.7 threshold
data/en_docs/id_7304.sent
data/th_docs/id_7304.sent
(471, 512) (3, 512) (471, 3)
7 sentences above 0.7 threshold
data/en_docs/id_7305.sent
data/th_docs/id_7305.sent
(27, 512) (15, 512) (27, 15)
6 sentences above 0.7 threshold
data/en_docs/id_7306.sent
data/th_docs/id_7306.sent
(102, 512) (3, 512) (102, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7307.sent
data/th_docs/id_7307.sent
(423, 512) (9, 512) (423, 9)
16 sentences above 0.7 threshold
data/en_docs/id_7308.sent
data/th_docs/id_7308.sent
(402, 512) (144, 512) (402, 144)
4 sentences above 0.7 threshold
data/en_docs/id_7309.sent
data/th_docs/id_7309.sent
(270, 512) (69, 512) (270, 69)
24 sentences above 0.7 threshold
data/en_docs/id_731.sent
data/th_docs/id_731.sent
(6, 512) (24, 512) (6, 24)
1 sentences above 0.7 threshold
data/en_docs/id_7310.sent
data/th_docs/id_7310.sent
(105, 5

(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7373.sent
data/th_docs/id_7373.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7374.sent
data/th_docs/id_7374.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7375.sent
data/th_docs/id_7375.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7376.sent
data/th_docs/id_7376.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7377.sent
data/th_docs/id_7377.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_7378.sent
data/th_docs/id_7378.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7379.sent
data/th_docs/id_7379.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_738.sent
data/th_docs/id_738.sent
(81, 512) (3, 512) (81, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7380.sent
data/th_docs/id_7380.sent
(3, 512) (3, 512) (

5 sentences above 0.7 threshold
data/en_docs/id_7440.sent
data/th_docs/id_7440.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7441.sent
data/th_docs/id_7441.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7442.sent
data/th_docs/id_7442.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7443.sent
data/th_docs/id_7443.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7444.sent
data/th_docs/id_7444.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7445.sent
data/th_docs/id_7445.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7446.sent
data/th_docs/id_7446.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7447.sent
data/th_docs/id_7447.sent
(9, 512) (3, 512) (9, 3)
6 sentences above 0.7 threshold
data/en_docs/id_7448.sent
data/th_docs/id_7448.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0

(120, 512) (3, 512) (120, 3)
2 sentences above 0.7 threshold
data/en_docs/id_7510.sent
data/th_docs/id_7510.sent
(90, 512) (3, 512) (90, 3)
4 sentences above 0.7 threshold
data/en_docs/id_7511.sent
data/th_docs/id_7511.sent
(669, 512) (27, 512) (669, 27)
16 sentences above 0.7 threshold
data/en_docs/id_7512.sent
data/th_docs/id_7512.sent
(159, 512) (54, 512) (159, 54)
27 sentences above 0.7 threshold
data/en_docs/id_7513.sent
data/th_docs/id_7513.sent
(153, 512) (36, 512) (153, 36)
35 sentences above 0.7 threshold
data/en_docs/id_7514.sent
data/th_docs/id_7514.sent
(180, 512) (18, 512) (180, 18)
18 sentences above 0.7 threshold
data/en_docs/id_7515.sent
data/th_docs/id_7515.sent
(792, 512) (48, 512) (792, 48)
60 sentences above 0.7 threshold
data/en_docs/id_7516.sent
data/th_docs/id_7516.sent
(84, 512) (27, 512) (84, 27)
3 sentences above 0.7 threshold
data/en_docs/id_7517.sent
data/th_docs/id_7517.sent
(78, 512) (3, 512) (78, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7518.sen

18 sentences above 0.7 threshold
data/en_docs/id_7577.sent
data/th_docs/id_7577.sent
(345, 512) (3, 512) (345, 3)
6 sentences above 0.7 threshold
data/en_docs/id_7578.sent
data/th_docs/id_7578.sent
(6, 512) (54, 512) (6, 54)
1 sentences above 0.7 threshold
data/en_docs/id_7579.sent
data/th_docs/id_7579.sent
(222, 512) (12, 512) (222, 12)
1 sentences above 0.7 threshold
data/en_docs/id_758.sent
data/th_docs/id_758.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_7580.sent
data/th_docs/id_7580.sent
(78, 512) (36, 512) (78, 36)
5 sentences above 0.7 threshold
data/en_docs/id_7581.sent
data/th_docs/id_7581.sent
(171, 512) (159, 512) (171, 159)
7 sentences above 0.7 threshold
data/en_docs/id_7582.sent
data/th_docs/id_7582.sent
(408, 512) (9, 512) (408, 9)
5 sentences above 0.7 threshold
data/en_docs/id_7583.sent
data/th_docs/id_7583.sent
(27, 512) (3, 512) (27, 3)
4 sentences above 0.7 threshold
data/en_docs/id_7584.sent
data/th_docs/id_7584.sent
(108, 512) (18,

(168, 512) (6, 512) (168, 6)
7 sentences above 0.7 threshold
data/en_docs/id_7646.sent
data/th_docs/id_7646.sent
(177, 512) (3, 512) (177, 3)
5 sentences above 0.7 threshold
data/en_docs/id_7647.sent
data/th_docs/id_7647.sent
(75, 512) (3, 512) (75, 3)
4 sentences above 0.7 threshold
data/en_docs/id_7648.sent
data/th_docs/id_7648.sent
(126, 512) (3, 512) (126, 3)
5 sentences above 0.7 threshold
data/en_docs/id_7649.sent
data/th_docs/id_7649.sent
skipping...
data/en_docs/id_765.sent
data/th_docs/id_765.sent
(120, 512) (3, 512) (120, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7650.sent
data/th_docs/id_7650.sent
skipping...
data/en_docs/id_7651.sent
data/th_docs/id_7651.sent
(3, 512) (36, 512) (3, 36)
1 sentences above 0.7 threshold
data/en_docs/id_7652.sent
data/th_docs/id_7652.sent
(267, 512) (18, 512) (267, 18)
18 sentences above 0.7 threshold
data/en_docs/id_7653.sent
data/th_docs/id_7653.sent
(51, 512) (3, 512) (51, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7654.sent

(24, 512) (3, 512) (24, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7716.sent
data/th_docs/id_7716.sent
(3, 512) (105, 512) (3, 105)
1 sentences above 0.7 threshold
data/en_docs/id_7717.sent
data/th_docs/id_7717.sent
(27, 512) (39, 512) (27, 39)
25 sentences above 0.7 threshold
data/en_docs/id_7718.sent
data/th_docs/id_7718.sent
(117, 512) (3, 512) (117, 3)
6 sentences above 0.7 threshold
data/en_docs/id_7719.sent
data/th_docs/id_7719.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_772.sent
data/th_docs/id_772.sent
(72, 512) (3, 512) (72, 3)
2 sentences above 0.7 threshold
data/en_docs/id_7720.sent
data/th_docs/id_7720.sent
(24, 512) (3, 512) (24, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7721.sent
data/th_docs/id_7721.sent
(243, 512) (36, 512) (243, 36)
34 sentences above 0.7 threshold
data/en_docs/id_7722.sent
data/th_docs/id_7722.sent
(18, 512) (18, 512) (18, 18)
6 sentences above 0.7 threshold
data/en_docs/id_7723.sent
data/th_docs/id_7

56 sentences above 0.7 threshold
data/en_docs/id_7784.sent
data/th_docs/id_7784.sent
(153, 512) (15, 512) (153, 15)
11 sentences above 0.7 threshold
data/en_docs/id_7785.sent
data/th_docs/id_7785.sent
(75, 512) (12, 512) (75, 12)
23 sentences above 0.7 threshold
data/en_docs/id_7786.sent
data/th_docs/id_7786.sent
(969, 512) (3, 512) (969, 3)
5 sentences above 0.7 threshold
data/en_docs/id_7787.sent
data/th_docs/id_7787.sent
(33, 512) (3, 512) (33, 3)
4 sentences above 0.7 threshold
data/en_docs/id_7788.sent
data/th_docs/id_7788.sent
(15, 512) (183, 512) (15, 183)
1 sentences above 0.7 threshold
data/en_docs/id_7789.sent
data/th_docs/id_7789.sent
(393, 512) (12, 512) (393, 12)
8 sentences above 0.7 threshold
data/en_docs/id_779.sent
data/th_docs/id_779.sent
(1176, 512) (3, 512) (1176, 3)
2 sentences above 0.7 threshold
data/en_docs/id_7790.sent
data/th_docs/id_7790.sent
(111, 512) (18, 512) (111, 18)
6 sentences above 0.7 threshold
data/en_docs/id_7791.sent
data/th_docs/id_7791.sent
(67

(387, 512) (99, 512) (387, 99)
32 sentences above 0.7 threshold
data/en_docs/id_7850.sent
data/th_docs/id_7850.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7851.sent
data/th_docs/id_7851.sent
(252, 512) (27, 512) (252, 27)
22 sentences above 0.7 threshold
data/en_docs/id_7852.sent
data/th_docs/id_7852.sent
(3, 512) (15, 512) (3, 15)
1 sentences above 0.7 threshold
data/en_docs/id_7853.sent
data/th_docs/id_7853.sent
(123, 512) (3, 512) (123, 3)
5 sentences above 0.7 threshold
data/en_docs/id_7854.sent
data/th_docs/id_7854.sent
(15, 512) (3, 512) (15, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7855.sent
data/th_docs/id_7855.sent
(30, 512) (3, 512) (30, 3)
4 sentences above 0.7 threshold
data/en_docs/id_7856.sent
data/th_docs/id_7856.sent
(384, 512) (216, 512) (384, 216)
53 sentences above 0.7 threshold
data/en_docs/id_7857.sent
data/th_docs/id_7857.sent
(57, 512) (21, 512) (57, 21)
1 sentences above 0.7 threshold
data/en_docs/id_7858.sent
data/th_

3 sentences above 0.7 threshold
data/en_docs/id_7918.sent
data/th_docs/id_7918.sent
(12, 512) (3, 512) (12, 3)
5 sentences above 0.7 threshold
data/en_docs/id_7919.sent
data/th_docs/id_7919.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_792.sent
data/th_docs/id_792.sent
(1200, 512) (51, 512) (1200, 51)
19 sentences above 0.7 threshold
data/en_docs/id_7920.sent
data/th_docs/id_7920.sent
(27, 512) (3, 512) (27, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7921.sent
data/th_docs/id_7921.sent
(9, 512) (3, 512) (9, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7922.sent
data/th_docs/id_7922.sent
(24, 512) (3, 512) (24, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7923.sent
data/th_docs/id_7923.sent
(12, 512) (3, 512) (12, 3)
1 sentences above 0.7 threshold
data/en_docs/id_7924.sent
data/th_docs/id_7924.sent
(18, 512) (3, 512) (18, 3)
3 sentences above 0.7 threshold
data/en_docs/id_7925.sent
data/th_docs/id_7925.sent
(15, 512) (3, 512) (15, 3)


(1044, 512) (9, 512) (1044, 9)
26 sentences above 0.7 threshold
data/en_docs/id_7987.sent
data/th_docs/id_7987.sent
(255, 512) (3, 512) (255, 3)
2 sentences above 0.7 threshold
data/en_docs/id_7988.sent
data/th_docs/id_7988.sent
(411, 512) (12, 512) (411, 12)
12 sentences above 0.7 threshold
data/en_docs/id_7989.sent
data/th_docs/id_7989.sent
(264, 512) (9, 512) (264, 9)
7 sentences above 0.7 threshold
data/en_docs/id_799.sent
data/th_docs/id_799.sent
(9, 512) (3, 512) (9, 3)
6 sentences above 0.7 threshold
data/en_docs/id_7990.sent
data/th_docs/id_7990.sent
(129, 512) (18, 512) (129, 18)
1 sentences above 0.7 threshold
data/en_docs/id_7991.sent
data/th_docs/id_7991.sent
(288, 512) (18, 512) (288, 18)
19 sentences above 0.7 threshold
data/en_docs/id_7992.sent
data/th_docs/id_7992.sent
(579, 512) (27, 512) (579, 27)
9 sentences above 0.7 threshold
data/en_docs/id_7993.sent
data/th_docs/id_7993.sent
(303, 512) (78, 512) (303, 78)
33 sentences above 0.7 threshold
data/en_docs/id_7994.sent

62 sentences above 0.7 threshold
data/en_docs/id_8057.sent
data/th_docs/id_8057.sent
(516, 512) (18, 512) (516, 18)
37 sentences above 0.7 threshold
data/en_docs/id_8058.sent
data/th_docs/id_8058.sent
(189, 512) (357, 512) (189, 357)
81 sentences above 0.7 threshold
data/en_docs/id_8059.sent
data/th_docs/id_8059.sent
(231, 512) (183, 512) (231, 183)
59 sentences above 0.7 threshold
data/en_docs/id_806.sent
data/th_docs/id_806.sent
(6, 512) (3, 512) (6, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8060.sent
data/th_docs/id_8060.sent
(135, 512) (6, 512) (135, 6)
22 sentences above 0.7 threshold
data/en_docs/id_8061.sent
data/th_docs/id_8061.sent
(30, 512) (9, 512) (30, 9)
9 sentences above 0.7 threshold
data/en_docs/id_8062.sent
data/th_docs/id_8062.sent
(288, 512) (9, 512) (288, 9)
19 sentences above 0.7 threshold
data/en_docs/id_8063.sent
data/th_docs/id_8063.sent
(504, 512) (18, 512) (504, 18)
86 sentences above 0.7 threshold
data/en_docs/id_8064.sent
data/th_docs/id_8064.sent
(

18 sentences above 0.7 threshold
data/en_docs/id_8121.sent
data/th_docs/id_8121.sent
(513, 512) (60, 512) (513, 60)
13 sentences above 0.7 threshold
data/en_docs/id_8122.sent
data/th_docs/id_8122.sent
(471, 512) (18, 512) (471, 18)
21 sentences above 0.7 threshold
data/en_docs/id_8123.sent
data/th_docs/id_8123.sent
(3, 512) (18, 512) (3, 18)
1 sentences above 0.7 threshold
data/en_docs/id_8124.sent
data/th_docs/id_8124.sent
skipping...
data/en_docs/id_8125.sent
data/th_docs/id_8125.sent
(957, 512) (150, 512) (957, 150)
134 sentences above 0.7 threshold
data/en_docs/id_8126.sent
data/th_docs/id_8126.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8127.sent
data/th_docs/id_8127.sent
(3, 512) (15, 512) (3, 15)
1 sentences above 0.7 threshold
data/en_docs/id_8128.sent
data/th_docs/id_8128.sent
(564, 512) (81, 512) (564, 81)
21 sentences above 0.7 threshold
data/en_docs/id_8129.sent
data/th_docs/id_8129.sent
(123, 512) (57, 512) (123, 57)
1 sentences above 0.7 

102 sentences above 0.7 threshold
data/en_docs/id_8190.sent
data/th_docs/id_8190.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_8191.sent
data/th_docs/id_8191.sent
(168, 512) (6, 512) (168, 6)
5 sentences above 0.7 threshold
data/en_docs/id_8192.sent
data/th_docs/id_8192.sent
(9, 512) (3, 512) (9, 3)
5 sentences above 0.7 threshold
data/en_docs/id_8193.sent
data/th_docs/id_8193.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8194.sent
data/th_docs/id_8194.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8195.sent
data/th_docs/id_8195.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8196.sent
data/th_docs/id_8196.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8197.sent
data/th_docs/id_8197.sent
(15, 512) (3, 512) (15, 3)
12 sentences above 0.7 threshold
data/en_docs/id_8198.sent
data/th_docs/id_8198.sent
(3, 512) (3, 512) (3, 3)
3 sentence

(1671, 512) (3, 512) (1671, 3)
7 sentences above 0.7 threshold
data/en_docs/id_8260.sent
data/th_docs/id_8260.sent
(282, 512) (69, 512) (282, 69)
40 sentences above 0.7 threshold
data/en_docs/id_8261.sent
data/th_docs/id_8261.sent
(12, 512) (6, 512) (12, 6)
9 sentences above 0.7 threshold
data/en_docs/id_8262.sent
data/th_docs/id_8262.sent
(24, 512) (6, 512) (24, 6)
4 sentences above 0.7 threshold
data/en_docs/id_8263.sent
data/th_docs/id_8263.sent
(36, 512) (21, 512) (36, 21)
10 sentences above 0.7 threshold
data/en_docs/id_8264.sent
data/th_docs/id_8264.sent
(54, 512) (9, 512) (54, 9)
1 sentences above 0.7 threshold
data/en_docs/id_8265.sent
data/th_docs/id_8265.sent
(54, 512) (12, 512) (54, 12)
22 sentences above 0.7 threshold
data/en_docs/id_8266.sent
data/th_docs/id_8266.sent
(12, 512) (3, 512) (12, 3)
2 sentences above 0.7 threshold
data/en_docs/id_8267.sent
data/th_docs/id_8267.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8268.sent
data/th_docs/i

24 sentences above 0.7 threshold
data/en_docs/id_8327.sent
data/th_docs/id_8327.sent
(57, 512) (36, 512) (57, 36)
4 sentences above 0.7 threshold
data/en_docs/id_8328.sent
data/th_docs/id_8328.sent
(81, 512) (6, 512) (81, 6)
6 sentences above 0.7 threshold
data/en_docs/id_8329.sent
data/th_docs/id_8329.sent
(39, 512) (6, 512) (39, 6)
10 sentences above 0.7 threshold
data/en_docs/id_833.sent
data/th_docs/id_833.sent
(99, 512) (42, 512) (99, 42)
31 sentences above 0.7 threshold
data/en_docs/id_8330.sent
data/th_docs/id_8330.sent
(96, 512) (6, 512) (96, 6)
19 sentences above 0.7 threshold
data/en_docs/id_8331.sent
data/th_docs/id_8331.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_8332.sent
data/th_docs/id_8332.sent
(1161, 512) (132, 512) (1161, 132)
53 sentences above 0.7 threshold
data/en_docs/id_8333.sent
data/th_docs/id_8333.sent
(213, 512) (24, 512) (213, 24)
5 sentences above 0.7 threshold
data/en_docs/id_8334.sent
data/th_docs/id_8334.sent
(3, 512) (4

1 sentences above 0.7 threshold
data/en_docs/id_8393.sent
data/th_docs/id_8393.sent
(606, 512) (3, 512) (606, 3)
1 sentences above 0.7 threshold
data/en_docs/id_8394.sent
data/th_docs/id_8394.sent
(3, 512) (24, 512) (3, 24)
1 sentences above 0.7 threshold
data/en_docs/id_8395.sent
data/th_docs/id_8395.sent
(45, 512) (6, 512) (45, 6)
5 sentences above 0.7 threshold
data/en_docs/id_8396.sent
data/th_docs/id_8396.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8397.sent
data/th_docs/id_8397.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8398.sent
data/th_docs/id_8398.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8399.sent
data/th_docs/id_8399.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_84.sent
data/th_docs/id_84.sent
(846, 512) (15, 512) (846, 15)
1 sentences above 0.7 threshold
data/en_docs/id_840.sent
data/th_docs/id_840.sent
(6, 512) (9, 512) (6, 9)
1 sentences

16 sentences above 0.7 threshold
data/en_docs/id_846.sent
data/th_docs/id_846.sent
(597, 512) (66, 512) (597, 66)
34 sentences above 0.7 threshold
data/en_docs/id_8460.sent
data/th_docs/id_8460.sent
(171, 512) (12, 512) (171, 12)
4 sentences above 0.7 threshold
data/en_docs/id_8461.sent
data/th_docs/id_8461.sent
(714, 512) (36, 512) (714, 36)
40 sentences above 0.7 threshold
data/en_docs/id_8462.sent
data/th_docs/id_8462.sent
skipping...
data/en_docs/id_8463.sent
data/th_docs/id_8463.sent
(792, 512) (459, 512) (792, 459)
273 sentences above 0.7 threshold
data/en_docs/id_8464.sent
data/th_docs/id_8464.sent
(555, 512) (45, 512) (555, 45)
14 sentences above 0.7 threshold
data/en_docs/id_8465.sent
data/th_docs/id_8465.sent
(255, 512) (18, 512) (255, 18)
25 sentences above 0.7 threshold
data/en_docs/id_8466.sent
data/th_docs/id_8466.sent
(906, 512) (69, 512) (906, 69)
22 sentences above 0.7 threshold
data/en_docs/id_8467.sent
data/th_docs/id_8467.sent
(219, 512) (33, 512) (219, 33)
1 senten

61 sentences above 0.7 threshold
data/en_docs/id_8525.sent
data/th_docs/id_8525.sent
(168, 512) (39, 512) (168, 39)
12 sentences above 0.7 threshold
data/en_docs/id_8526.sent
data/th_docs/id_8526.sent
(6, 512) (15, 512) (6, 15)
1 sentences above 0.7 threshold
data/en_docs/id_8527.sent
data/th_docs/id_8527.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8528.sent
data/th_docs/id_8528.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8529.sent
data/th_docs/id_8529.sent
(231, 512) (213, 512) (231, 213)
25 sentences above 0.7 threshold
data/en_docs/id_853.sent
data/th_docs/id_853.sent
(78, 512) (39, 512) (78, 39)
12 sentences above 0.7 threshold
data/en_docs/id_8530.sent
data/th_docs/id_8530.sent
(114, 512) (15, 512) (114, 15)
9 sentences above 0.7 threshold
data/en_docs/id_8531.sent
data/th_docs/id_8531.sent
(69, 512) (18, 512) (69, 18)
3 sentences above 0.7 threshold
data/en_docs/id_8532.sent
data/th_docs/id_8532.sent
(762, 512) (

(660, 512) (6, 512) (660, 6)
1 sentences above 0.7 threshold
data/en_docs/id_8592.sent
data/th_docs/id_8592.sent
(1143, 512) (3, 512) (1143, 3)
1 sentences above 0.7 threshold
data/en_docs/id_8593.sent
data/th_docs/id_8593.sent
(21, 512) (18, 512) (21, 18)
4 sentences above 0.7 threshold
data/en_docs/id_8594.sent
data/th_docs/id_8594.sent
(18, 512) (6, 512) (18, 6)
6 sentences above 0.7 threshold
data/en_docs/id_8595.sent
data/th_docs/id_8595.sent
(342, 512) (297, 512) (342, 297)
4 sentences above 0.7 threshold
data/en_docs/id_8596.sent
data/th_docs/id_8596.sent
(2001, 512) (12, 512) (2001, 12)
11 sentences above 0.7 threshold
data/en_docs/id_8597.sent
data/th_docs/id_8597.sent
(87, 512) (141, 512) (87, 141)
3 sentences above 0.7 threshold
data/en_docs/id_8598.sent
data/th_docs/id_8598.sent
(240, 512) (3, 512) (240, 3)
1 sentences above 0.7 threshold
data/en_docs/id_8599.sent
data/th_docs/id_8599.sent
(420, 512) (3, 512) (420, 3)
1 sentences above 0.7 threshold
data/en_docs/id_86.sent


(417, 512) (24, 512) (417, 24)
24 sentences above 0.7 threshold
data/en_docs/id_8660.sent
data/th_docs/id_8660.sent
(138, 512) (6, 512) (138, 6)
1 sentences above 0.7 threshold
data/en_docs/id_8661.sent
data/th_docs/id_8661.sent
(48, 512) (3, 512) (48, 3)
4 sentences above 0.7 threshold
data/en_docs/id_8662.sent
data/th_docs/id_8662.sent
(45, 512) (90, 512) (45, 90)
4 sentences above 0.7 threshold
data/en_docs/id_8663.sent
data/th_docs/id_8663.sent
(6, 512) (66, 512) (6, 66)
1 sentences above 0.7 threshold
data/en_docs/id_8664.sent
data/th_docs/id_8664.sent
(42, 512) (24, 512) (42, 24)
4 sentences above 0.7 threshold
data/en_docs/id_8665.sent
data/th_docs/id_8665.sent
(159, 512) (87, 512) (159, 87)
47 sentences above 0.7 threshold
data/en_docs/id_8666.sent
data/th_docs/id_8666.sent
(3, 512) (3, 512) (3, 3)
2 sentences above 0.7 threshold
data/en_docs/id_8667.sent
data/th_docs/id_8667.sent
(3, 512) (12, 512) (3, 12)
2 sentences above 0.7 threshold
data/en_docs/id_8668.sent
data/th_docs/

1 sentences above 0.7 threshold
data/en_docs/id_873.sent
data/th_docs/id_873.sent
(261, 512) (3, 512) (261, 3)
6 sentences above 0.7 threshold
data/en_docs/id_8730.sent
data/th_docs/id_8730.sent
(9, 512) (3, 512) (9, 3)
1 sentences above 0.7 threshold
data/en_docs/id_8731.sent
data/th_docs/id_8731.sent
(3, 512) (24, 512) (3, 24)
1 sentences above 0.7 threshold
data/en_docs/id_8732.sent
data/th_docs/id_8732.sent
(81, 512) (27, 512) (81, 27)
1 sentences above 0.7 threshold
data/en_docs/id_8733.sent
data/th_docs/id_8733.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_8734.sent
data/th_docs/id_8734.sent
(9, 512) (12, 512) (9, 12)
1 sentences above 0.7 threshold
data/en_docs/id_8735.sent
data/th_docs/id_8735.sent
(27, 512) (42, 512) (27, 42)
2 sentences above 0.7 threshold
data/en_docs/id_8736.sent
data/th_docs/id_8736.sent
(36, 512) (27, 512) (36, 27)
1 sentences above 0.7 threshold
data/en_docs/id_8737.sent
data/th_docs/id_8737.sent
(36, 512) (6, 512) (36, 6)

14 sentences above 0.7 threshold
data/en_docs/id_8797.sent
data/th_docs/id_8797.sent
(24, 512) (27, 512) (24, 27)
3 sentences above 0.7 threshold
data/en_docs/id_8798.sent
data/th_docs/id_8798.sent
(24, 512) (39, 512) (24, 39)
3 sentences above 0.7 threshold
data/en_docs/id_8799.sent
data/th_docs/id_8799.sent
(129, 512) (57, 512) (129, 57)
2 sentences above 0.7 threshold
data/en_docs/id_88.sent
data/th_docs/id_88.sent
(9, 512) (9, 512) (9, 9)
4 sentences above 0.7 threshold
data/en_docs/id_880.sent
data/th_docs/id_880.sent
(933, 512) (3, 512) (933, 3)
7 sentences above 0.7 threshold
data/en_docs/id_8800.sent
data/th_docs/id_8800.sent
(6, 512) (3, 512) (6, 3)
6 sentences above 0.7 threshold
data/en_docs/id_8801.sent
data/th_docs/id_8801.sent
(336, 512) (21, 512) (336, 21)
8 sentences above 0.7 threshold
data/en_docs/id_8802.sent
data/th_docs/id_8802.sent
(6, 512) (33, 512) (6, 33)
1 sentences above 0.7 threshold
data/en_docs/id_8803.sent
data/th_docs/id_8803.sent
(3, 512) (3, 512) (3, 3

3 sentences above 0.7 threshold
data/en_docs/id_8865.sent
data/th_docs/id_8865.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8866.sent
data/th_docs/id_8866.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8867.sent
data/th_docs/id_8867.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8868.sent
data/th_docs/id_8868.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8869.sent
data/th_docs/id_8869.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_887.sent
data/th_docs/id_887.sent
(351, 512) (3, 512) (351, 3)
2 sentences above 0.7 threshold
data/en_docs/id_8870.sent
data/th_docs/id_8870.sent
(81, 512) (3, 512) (81, 3)
6 sentences above 0.7 threshold
data/en_docs/id_8871.sent
data/th_docs/id_8871.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8872.sent
data/th_docs/id_8872.sent
(3, 512) (3, 512) (3, 3)
3 sentences abo

(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8938.sent
data/th_docs/id_8938.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_8939.sent
data/th_docs/id_8939.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_894.sent
data/th_docs/id_894.sent
(105, 512) (36, 512) (105, 36)
1 sentences above 0.7 threshold
data/en_docs/id_8940.sent
data/th_docs/id_8940.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8941.sent
data/th_docs/id_8941.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8942.sent
data/th_docs/id_8942.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8943.sent
data/th_docs/id_8943.sent
(6, 512) (3, 512) (6, 3)
4 sentences above 0.7 threshold
data/en_docs/id_8944.sent
data/th_docs/id_8944.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_8945.sent
data/th_docs/id_8945.sent
(3, 512) (3, 51

(84, 512) (174, 512) (84, 174)
3 sentences above 0.7 threshold
data/en_docs/id_9010.sent
data/th_docs/id_9010.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9011.sent
data/th_docs/id_9011.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9012.sent
data/th_docs/id_9012.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9013.sent
data/th_docs/id_9013.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9014.sent
data/th_docs/id_9014.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9015.sent
data/th_docs/id_9015.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9016.sent
data/th_docs/id_9016.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9017.sent
data/th_docs/id_9017.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9018.sent
data/th_docs/id_9018.sent
(3, 512) (24,

(9, 512) (9, 512) (9, 9)
9 sentences above 0.7 threshold
data/en_docs/id_908.sent
data/th_docs/id_908.sent
(126, 512) (12, 512) (126, 12)
3 sentences above 0.7 threshold
data/en_docs/id_9080.sent
data/th_docs/id_9080.sent
(9, 512) (9, 512) (9, 9)
9 sentences above 0.7 threshold
data/en_docs/id_9081.sent
data/th_docs/id_9081.sent
(9, 512) (9, 512) (9, 9)
9 sentences above 0.7 threshold
data/en_docs/id_9082.sent
data/th_docs/id_9082.sent
(21, 512) (3, 512) (21, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9083.sent
data/th_docs/id_9083.sent
(15, 512) (27, 512) (15, 27)
1 sentences above 0.7 threshold
data/en_docs/id_9084.sent
data/th_docs/id_9084.sent
(3, 512) (27, 512) (3, 27)
1 sentences above 0.7 threshold
data/en_docs/id_9085.sent
data/th_docs/id_9085.sent
(33, 512) (21, 512) (33, 21)
1 sentences above 0.7 threshold
data/en_docs/id_9086.sent
data/th_docs/id_9086.sent
(63, 512) (9, 512) (63, 9)
1 sentences above 0.7 threshold
data/en_docs/id_9087.sent
data/th_docs/id_9087.sent
(

22 sentences above 0.7 threshold
data/en_docs/id_915.sent
data/th_docs/id_915.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9150.sent
data/th_docs/id_9150.sent
(210, 512) (60, 512) (210, 60)
23 sentences above 0.7 threshold
data/en_docs/id_9151.sent
data/th_docs/id_9151.sent
(120, 512) (15, 512) (120, 15)
16 sentences above 0.7 threshold
data/en_docs/id_9152.sent
data/th_docs/id_9152.sent
(333, 512) (15, 512) (333, 15)
1 sentences above 0.7 threshold
data/en_docs/id_9153.sent
data/th_docs/id_9153.sent
(333, 512) (117, 512) (333, 117)
5 sentences above 0.7 threshold
data/en_docs/id_9154.sent
data/th_docs/id_9154.sent
(333, 512) (24, 512) (333, 24)
1 sentences above 0.7 threshold
data/en_docs/id_9155.sent
data/th_docs/id_9155.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9156.sent
data/th_docs/id_9156.sent
(6, 512) (3, 512) (6, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9157.sent
data/th_docs/id_9157.sent
(12, 512) (

(216, 512) (24, 512) (216, 24)
7 sentences above 0.7 threshold
data/en_docs/id_9220.sent
data/th_docs/id_9220.sent
(18, 512) (45, 512) (18, 45)
2 sentences above 0.7 threshold
data/en_docs/id_9221.sent
data/th_docs/id_9221.sent
(18, 512) (138, 512) (18, 138)
2 sentences above 0.7 threshold
data/en_docs/id_9222.sent
data/th_docs/id_9222.sent
skipping...
data/en_docs/id_9223.sent
data/th_docs/id_9223.sent
(21, 512) (24, 512) (21, 24)
4 sentences above 0.7 threshold
data/en_docs/id_9224.sent
data/th_docs/id_9224.sent
(18, 512) (45, 512) (18, 45)
2 sentences above 0.7 threshold
data/en_docs/id_9225.sent
data/th_docs/id_9225.sent
(78, 512) (141, 512) (78, 141)
3 sentences above 0.7 threshold
data/en_docs/id_9226.sent
data/th_docs/id_9226.sent
(18, 512) (45, 512) (18, 45)
2 sentences above 0.7 threshold
data/en_docs/id_9227.sent
data/th_docs/id_9227.sent
(6, 512) (3, 512) (6, 3)
5 sentences above 0.7 threshold
data/en_docs/id_9228.sent
data/th_docs/id_9228.sent
(3, 512) (21, 512) (3, 21)
2 s

(546, 512) (195, 512) (546, 195)
71 sentences above 0.7 threshold
data/en_docs/id_9289.sent
data/th_docs/id_9289.sent
(15, 512) (24, 512) (15, 24)
11 sentences above 0.7 threshold
data/en_docs/id_929.sent
data/th_docs/id_929.sent
(291, 512) (27, 512) (291, 27)
1 sentences above 0.7 threshold
data/en_docs/id_9290.sent
data/th_docs/id_9290.sent
(30, 512) (51, 512) (30, 51)
1 sentences above 0.7 threshold
data/en_docs/id_9291.sent
data/th_docs/id_9291.sent
(9, 512) (3, 512) (9, 3)
6 sentences above 0.7 threshold
data/en_docs/id_9292.sent
data/th_docs/id_9292.sent
(18, 512) (9, 512) (18, 9)
1 sentences above 0.7 threshold
data/en_docs/id_9293.sent
data/th_docs/id_9293.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9294.sent
data/th_docs/id_9294.sent
(102, 512) (102, 512) (102, 102)
45 sentences above 0.7 threshold
data/en_docs/id_9295.sent
data/th_docs/id_9295.sent
skipping...
data/en_docs/id_9296.sent
data/th_docs/id_9296.sent
(9, 512) (3, 512) (9, 3)
1 sent

7 sentences above 0.7 threshold
data/en_docs/id_9357.sent
data/th_docs/id_9357.sent
(90, 512) (126, 512) (90, 126)
15 sentences above 0.7 threshold
data/en_docs/id_9358.sent
data/th_docs/id_9358.sent
(12, 512) (6, 512) (12, 6)
2 sentences above 0.7 threshold
data/en_docs/id_9359.sent
data/th_docs/id_9359.sent
(6, 512) (27, 512) (6, 27)
1 sentences above 0.7 threshold
data/en_docs/id_936.sent
data/th_docs/id_936.sent
(894, 512) (24, 512) (894, 24)
18 sentences above 0.7 threshold
data/en_docs/id_9360.sent
data/th_docs/id_9360.sent
(39, 512) (6, 512) (39, 6)
6 sentences above 0.7 threshold
data/en_docs/id_9361.sent
data/th_docs/id_9361.sent
(210, 512) (33, 512) (210, 33)
28 sentences above 0.7 threshold
data/en_docs/id_9362.sent
data/th_docs/id_9362.sent
(39, 512) (33, 512) (39, 33)
2 sentences above 0.7 threshold
data/en_docs/id_9363.sent
data/th_docs/id_9363.sent
(9, 512) (18, 512) (9, 18)
2 sentences above 0.7 threshold
data/en_docs/id_9364.sent
data/th_docs/id_9364.sent
(36, 512) (3,

(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9429.sent
data/th_docs/id_9429.sent
(192, 512) (9, 512) (192, 9)
6 sentences above 0.7 threshold
data/en_docs/id_943.sent
data/th_docs/id_943.sent
(1197, 512) (12, 512) (1197, 12)
27 sentences above 0.7 threshold
data/en_docs/id_9430.sent
data/th_docs/id_9430.sent
(6, 512) (6, 512) (6, 6)
6 sentences above 0.7 threshold
data/en_docs/id_9431.sent
data/th_docs/id_9431.sent
(3, 512) (87, 512) (3, 87)
1 sentences above 0.7 threshold
data/en_docs/id_9432.sent
data/th_docs/id_9432.sent
(36, 512) (3, 512) (36, 3)
5 sentences above 0.7 threshold
data/en_docs/id_9433.sent
data/th_docs/id_9433.sent
(42, 512) (3, 512) (42, 3)
5 sentences above 0.7 threshold
data/en_docs/id_9434.sent
data/th_docs/id_9434.sent
(30, 512) (3, 512) (30, 3)
6 sentences above 0.7 threshold
data/en_docs/id_9435.sent
data/th_docs/id_9435.sent
(6, 512) (18, 512) (6, 18)
2 sentences above 0.7 threshold
data/en_docs/id_9436.sent
data/th_docs/id_9436.sen

6 sentences above 0.7 threshold
data/en_docs/id_9497.sent
data/th_docs/id_9497.sent
(99, 512) (63, 512) (99, 63)
60 sentences above 0.7 threshold
data/en_docs/id_9498.sent
data/th_docs/id_9498.sent
(3, 512) (6, 512) (3, 6)
1 sentences above 0.7 threshold
data/en_docs/id_9499.sent
data/th_docs/id_9499.sent
(15, 512) (12, 512) (15, 12)
7 sentences above 0.7 threshold
data/en_docs/id_95.sent
data/th_docs/id_95.sent
(48, 512) (9, 512) (48, 9)
8 sentences above 0.7 threshold
data/en_docs/id_950.sent
data/th_docs/id_950.sent
(2055, 512) (15, 512) (2055, 15)
20 sentences above 0.7 threshold
data/en_docs/id_9500.sent
data/th_docs/id_9500.sent
(9, 512) (3, 512) (9, 3)
4 sentences above 0.7 threshold
data/en_docs/id_9501.sent
data/th_docs/id_9501.sent
(15, 512) (9, 512) (15, 9)
6 sentences above 0.7 threshold
data/en_docs/id_9502.sent
data/th_docs/id_9502.sent
(15, 512) (3, 512) (15, 3)
6 sentences above 0.7 threshold
data/en_docs/id_9503.sent
data/th_docs/id_9503.sent
(3, 512) (3, 512) (3, 3)
3

(15, 512) (39, 512) (15, 39)
1 sentences above 0.7 threshold
data/en_docs/id_9569.sent
data/th_docs/id_9569.sent
(6, 512) (12, 512) (6, 12)
1 sentences above 0.7 threshold
data/en_docs/id_957.sent
data/th_docs/id_957.sent
(33, 512) (93, 512) (33, 93)
5 sentences above 0.7 threshold
data/en_docs/id_9570.sent
data/th_docs/id_9570.sent
(6, 512) (6, 512) (6, 6)
1 sentences above 0.7 threshold
data/en_docs/id_9571.sent
data/th_docs/id_9571.sent
(66, 512) (48, 512) (66, 48)
1 sentences above 0.7 threshold
data/en_docs/id_9572.sent
data/th_docs/id_9572.sent
(30, 512) (6, 512) (30, 6)
1 sentences above 0.7 threshold
data/en_docs/id_9573.sent
data/th_docs/id_9573.sent
(6, 512) (9, 512) (6, 9)
1 sentences above 0.7 threshold
data/en_docs/id_9574.sent
data/th_docs/id_9574.sent
(6, 512) (21, 512) (6, 21)
1 sentences above 0.7 threshold
data/en_docs/id_9575.sent
data/th_docs/id_9575.sent
(6, 512) (15, 512) (6, 15)
1 sentences above 0.7 threshold
data/en_docs/id_9576.sent
data/th_docs/id_9576.sent
(

(186, 512) (6, 512) (186, 6)
10 sentences above 0.7 threshold
data/en_docs/id_9639.sent
data/th_docs/id_9639.sent
(36, 512) (66, 512) (36, 66)
1 sentences above 0.7 threshold
data/en_docs/id_964.sent
data/th_docs/id_964.sent
(207, 512) (33, 512) (207, 33)
20 sentences above 0.7 threshold
data/en_docs/id_9640.sent
data/th_docs/id_9640.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9641.sent
data/th_docs/id_9641.sent
(81, 512) (9, 512) (81, 9)
5 sentences above 0.7 threshold
data/en_docs/id_9642.sent
data/th_docs/id_9642.sent
(90, 512) (9, 512) (90, 9)
28 sentences above 0.7 threshold
data/en_docs/id_9643.sent
data/th_docs/id_9643.sent
(27, 512) (12, 512) (27, 12)
1 sentences above 0.7 threshold
data/en_docs/id_9644.sent
data/th_docs/id_9644.sent
(363, 512) (15, 512) (363, 15)
3 sentences above 0.7 threshold
data/en_docs/id_9645.sent
data/th_docs/id_9645.sent
(363, 512) (36, 512) (363, 36)
48 sentences above 0.7 threshold
data/en_docs/id_9646.sent
data/th_d

8 sentences above 0.7 threshold
data/en_docs/id_9705.sent
data/th_docs/id_9705.sent
(6, 512) (9, 512) (6, 9)
3 sentences above 0.7 threshold
data/en_docs/id_9706.sent
data/th_docs/id_9706.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9707.sent
data/th_docs/id_9707.sent
(9, 512) (3, 512) (9, 3)
5 sentences above 0.7 threshold
data/en_docs/id_9708.sent
data/th_docs/id_9708.sent
(18, 512) (3, 512) (18, 3)
5 sentences above 0.7 threshold
data/en_docs/id_9709.sent
data/th_docs/id_9709.sent
(24, 512) (3, 512) (24, 3)
5 sentences above 0.7 threshold
data/en_docs/id_971.sent
data/th_docs/id_971.sent
(150, 512) (30, 512) (150, 30)
20 sentences above 0.7 threshold
data/en_docs/id_9710.sent
data/th_docs/id_9710.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9711.sent
data/th_docs/id_9711.sent
(18, 512) (3, 512) (18, 3)
4 sentences above 0.7 threshold
data/en_docs/id_9712.sent
data/th_docs/id_9712.sent
(24, 512) (3, 512) (24, 3)
4 sent

(63, 512) (105, 512) (63, 105)
12 sentences above 0.7 threshold
data/en_docs/id_9775.sent
data/th_docs/id_9775.sent
(12, 512) (3, 512) (12, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9776.sent
data/th_docs/id_9776.sent
(15, 512) (3, 512) (15, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9777.sent
data/th_docs/id_9777.sent
(6, 512) (45, 512) (6, 45)
1 sentences above 0.7 threshold
data/en_docs/id_9778.sent
data/th_docs/id_9778.sent
(606, 512) (162, 512) (606, 162)
171 sentences above 0.7 threshold
data/en_docs/id_9779.sent
data/th_docs/id_9779.sent
(549, 512) (21, 512) (549, 21)
7 sentences above 0.7 threshold
data/en_docs/id_978.sent
data/th_docs/id_978.sent
(681, 512) (21, 512) (681, 21)
39 sentences above 0.7 threshold
data/en_docs/id_9780.sent
data/th_docs/id_9780.sent
(9, 512) (3, 512) (9, 3)
2 sentences above 0.7 threshold
data/en_docs/id_9781.sent
data/th_docs/id_9781.sent
(12, 512) (72, 512) (12, 72)
1 sentences above 0.7 threshold
data/en_docs/id_9782.sent
data/th

2 sentences above 0.7 threshold
data/en_docs/id_9842.sent
data/th_docs/id_9842.sent
(48, 512) (15, 512) (48, 15)
3 sentences above 0.7 threshold
data/en_docs/id_9843.sent
data/th_docs/id_9843.sent
(72, 512) (12, 512) (72, 12)
1 sentences above 0.7 threshold
data/en_docs/id_9844.sent
data/th_docs/id_9844.sent
(3, 512) (39, 512) (3, 39)
1 sentences above 0.7 threshold
data/en_docs/id_9845.sent
data/th_docs/id_9845.sent
(27, 512) (27, 512) (27, 27)
1 sentences above 0.7 threshold
data/en_docs/id_9846.sent
data/th_docs/id_9846.sent
(3, 512) (12, 512) (3, 12)
1 sentences above 0.7 threshold
data/en_docs/id_9847.sent
data/th_docs/id_9847.sent
(24, 512) (30, 512) (24, 30)
2 sentences above 0.7 threshold
data/en_docs/id_9848.sent
data/th_docs/id_9848.sent
(72, 512) (27, 512) (72, 27)
1 sentences above 0.7 threshold
data/en_docs/id_9849.sent
data/th_docs/id_9849.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_985.sent
data/th_docs/id_985.sent
(51, 512) (12, 512) (5

(1545, 512) (6, 512) (1545, 6)
9 sentences above 0.7 threshold
data/en_docs/id_9910.sent
data/th_docs/id_9910.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9911.sent
data/th_docs/id_9911.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9912.sent
data/th_docs/id_9912.sent
(3, 512) (3, 512) (3, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9913.sent
data/th_docs/id_9913.sent
(45, 512) (33, 512) (45, 33)
14 sentences above 0.7 threshold
data/en_docs/id_9914.sent
data/th_docs/id_9914.sent
(243, 512) (18, 512) (243, 18)
10 sentences above 0.7 threshold
data/en_docs/id_9915.sent
data/th_docs/id_9915.sent
(138, 512) (78, 512) (138, 78)
79 sentences above 0.7 threshold
data/en_docs/id_9916.sent
data/th_docs/id_9916.sent
(66, 512) (18, 512) (66, 18)
9 sentences above 0.7 threshold
data/en_docs/id_9917.sent
data/th_docs/id_9917.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9918.sent
data/th_docs/id_

(3, 512) (24, 512) (3, 24)
1 sentences above 0.7 threshold
data/en_docs/id_9986.sent
data/th_docs/id_9986.sent
(144, 512) (60, 512) (144, 60)
50 sentences above 0.7 threshold
data/en_docs/id_9987.sent
data/th_docs/id_9987.sent
(18, 512) (3, 512) (18, 3)
1 sentences above 0.7 threshold
data/en_docs/id_9988.sent
data/th_docs/id_9988.sent
(93, 512) (18, 512) (93, 18)
30 sentences above 0.7 threshold
data/en_docs/id_9989.sent
data/th_docs/id_9989.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_999.sent
data/th_docs/id_999.sent
(105, 512) (6, 512) (105, 6)
4 sentences above 0.7 threshold
data/en_docs/id_9990.sent
data/th_docs/id_9990.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9991.sent
data/th_docs/id_9991.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9992.sent
data/th_docs/id_9992.sent
(3, 512) (3, 512) (3, 3)
3 sentences above 0.7 threshold
data/en_docs/id_9993.sent
data/th_docs/id_9993.sent
(

In [13]:
df['rnk'] = df.sort_values('use_score',ascending=False).groupby(['src','id_en']).cumcount()+1
df = df[df.rnk==1]
df.shape

(88539, 6)

In [20]:
df

Unnamed: 0,en_text,th_text,use_score,id_en,src
0,Palestine at the 2004 Summer Olympics,ปาเลสไตน์ในโอลิมปิกฤดูร้อน 2004,0.857068,0,data/en_docs/id_0.sent
1,Palestine competed at the 2004 Summer Olympics...,ปาเลสไตน์ในโอลิมปิกฤดูร้อน 2004 ปาเลสไตน์ เข้า...,0.808070,1,data/en_docs/id_0.sent
5,Pakistan at the 2004 Summer Olympics,ประเทศปากีสถานในโอลิมปิกฤดูร้อน 2004,0.911841,0,data/en_docs/id_1.sent
6,Pakistan competed at the 2004 Summer Olympics ...,ประเทศปากีสถานในโอลิมปิกฤดูร้อน 2004 ประเทศปาก...,0.840949,1,data/en_docs/id_1.sent
10,Sri Lanka at the 2004 Summer Olympics,ประเทศศรีลังกาในโอลิมปิกฤดูร้อน 2004,0.898206,0,data/en_docs/id_10.sent
...,...,...,...,...,...
159462,1998 in Portugal Events in the year 1998 in Po...,ประเทศโปรตุเกสใน ค.ศ. 1998 เหตุการณ์ที่เกิดขึ้...,0.888258,0,data/en_docs/id_9997.sent
159464,Sultan Al Kuwari,รัฐสุลต่านนัจญด์,0.855637,0,data/en_docs/id_9998.sent
159465,Hemiorchis,Hemiorchis,1.000000,0,data/en_docs/id_9999.sent
159466,"It contains three recognized species, native t...",Hemiorchis เป็นสกุลของพืชในวงศ์ขิง มีสมาชิก 3 ...,0.717094,2,data/en_docs/id_9999.sent


In [5]:
df = pd.concat(res_en_ths).dropna().drop_duplicates().reset_index(drop=True)

In [11]:
df.sort_values('id_en').head(20)

Unnamed: 0,en_text,th_text,use_score,id_en,src
0,Palestine at the 2004 Summer Olympics,ปาเลสไตน์ในโอลิมปิกฤดูร้อน 2004,0.857068,0,data/en_docs/id_0.sent
66133,Nepenthes surigaoensis Nepenthes surigaoensis ...,Nepenthes surigaoensis Nepenthes surigaoensis ...,0.764689,0,data/en_docs/id_2868.sent
66138,Nepenthes surigaoensis Nepenthes surigaoensis ...,Nepenthes surigaoensis Nepenthes surigaoensis ...,0.800336,0,data/en_docs/id_2868.sent
66140,Nepenthes chaniana,Nepenthes chaniana,1.0,0,data/en_docs/id_2869.sent
66143,Nepenthes chaniana Nepenthes chaniana (; after...,Nepenthes chaniana Nepenthes chaniana ( ได้ชื่...,0.702826,0,data/en_docs/id_2869.sent
66148,Nepenthes chaniana Nepenthes chaniana (; after...,Nepenthes chaniana Nepenthes chaniana ( ได้ชื่...,0.768615,0,data/en_docs/id_2869.sent
66155,Millennium Development Goals,เป้าหมายการพัฒนาสหัสวรรษ,0.890555,0,data/en_docs/id_287.sent
66159,Millennium Development Goals The Millennium De...,เป้าหมายการพัฒนาสหัสวรรษ เป้าหมายการพัฒนาสหัสว...,0.811786,0,data/en_docs/id_287.sent
66163,Millennium Development Goals The Millennium De...,เป้าหมายการพัฒนาสหัสวรรษ เป้าหมายการพัฒนาสหัสว...,0.767815,0,data/en_docs/id_287.sent
66167,British Virgin Islands at the 1988 Summer Olym...,หมู่เกาะบริติชเวอร์จินในโอลิมปิกฤดูร้อน 1988,0.90567,0,data/en_docs/id_2870.sent


In [27]:
import re
#remove special tokens
df['en_text'] = df.en_text.map(lambda x: rm_useless_spaces(str(x).replace('\n',' ').replace('\t',' ').replace('\r','').strip()))
df['th_text'] = df.th_text.map(lambda x: rm_useless_spaces(str(x).replace('\n',' ').replace('\t',' ').replace('\r','').strip()))

#filter by percentage of characters and number of tokens
df['en_tokens'] = df.en_text.map(lambda x: len(x.split()))
def char_percent(pattern,text):
    return len(re.findall(pattern,text)) / (len(text)+0.01)
df['per_en'] = df.en_text.map(lambda x: char_percent(r'[a-zA-Z0-9]',x))
df['per_th'] = df.th_text.map(lambda x: char_percent(r'[ก-๙0-9]',x))
df['th_in_en'] = df.en_text.map(lambda x: 1 if char_percent(r'[ก-๙]',x) else 0)
df = df[df.th_in_en==0]
df = df[(df.en_tokens>5)&(df.en_tokens<150)&(df.per_en>0.5)&(df.per_th>0.5)]

# #groupby to get unique en and th texts
# df = df.groupby('en_text').max().sort_values('en_tokens').reset_index()
# df = df.groupby('th_text').max().sort_values('en_tokens').reset_index()

In [21]:
df.to_csv(args.output_path,index=False)

In [47]:
df['rnk'] = df.sort_values('use_score',ascending=False).groupby('th_text').cumcount()+1
df = df[df.rnk==1]
df = df.reset_index(drop=True)

In [52]:
df[['en_text','th_text','src','use_score','id_en']].to_csv('data/wiki_sentences.csv',index=False)

In [50]:
df.iloc[41136,0],df.iloc[41136,1]

('1994 in Portugal Events in the year 1994 in Portugal.',
 'ประเทศโปรตุเกสใน ค.ศ. 1994 เหตุการณ์ที่เกิดขึ้นในประเทศโปรตุเกส')

In [53]:
df.shape

(41139, 10)