### Generate & Combine vectors Transformer

In [1]:
from DeezyMatch import inference as trans_inference
from DeezyMatch import combine_vecs_trans

def generate_vectors_trans(model_name, query_path, candidates_path, scenario="trans", inference=trans_inference):
  input_file_path = "./models/" + model_name + "/input_dfm_rnn.yaml"
  pretrained_model_path = "./models/" + model_name + "/" + model_name + ".model"
  pretrained_vocab_path = "./models/" + model_name + "/" + model_name + ".vocab"

  # generate query vector
  inference(input_file_path=input_file_path,
              dataset_path= query_path, 
              pretrained_model_path=pretrained_model_path, 
              pretrained_vocab_path=pretrained_vocab_path,
              inference_mode="vect",
              scenario="./queries/"+scenario)
  
  # generate candidates vector
  inference(input_file_path=input_file_path,
              dataset_path=candidates_path, 
              pretrained_model_path=pretrained_model_path, 
              pretrained_vocab_path=pretrained_vocab_path,
              inference_mode="vect",
              scenario="./candidates/"+scenario)

def combine_vector_trans(scenario, model_name):
    # combine query
    combine_vecs_trans(model_name=model_name,
                input_scenario="./queries/"+scenario, 
                output_scenario="./combined/queries_"+scenario, 
                print_every=10)
    
    # combine candidates
    combine_vecs_trans(model_name=model_name,
                input_scenario="./candidates/"+scenario, 
                output_scenario="./combined/candidates_"+scenario, 
                print_every=10)

### Generate & Combine vectors GRU

In [2]:
from DeezyMatchBase.DeezyMatch import inference as gru_inference
from DeezyMatchBase.DeezyMatch import combine_vecs

def generate_vectors_gru(model_name, query_path, candidates_path, scenario="gru", inference=gru_inference):
    input_file_path = "./models/" + model_name + "/input_dfm_rnn.yaml"
    pretrained_model_path = "./models/" + model_name + "/" + model_name + ".model"
    pretrained_vocab_path = "./models/" + model_name + "/" + model_name + ".vocab"

    # generate query vector
    inference(input_file_path=input_file_path,
                dataset_path= query_path, 
                pretrained_model_path=pretrained_model_path, 
                pretrained_vocab_path=pretrained_vocab_path,
                inference_mode="vect",
                scenario="./queries/"+scenario)
  
    # generate candidates vector
    inference(input_file_path=input_file_path,
                dataset_path=candidates_path, 
                pretrained_model_path=pretrained_model_path, 
                pretrained_vocab_path=pretrained_vocab_path,
                inference_mode="vect",
                scenario="./candidates/"+scenario)

def combine_vector_gru(scenario):
    # combine query
    combine_vecs(input_scenario="./queries/"+scenario, 
                output_scenario="./combined/queries_"+scenario, 
                print_every=10)
    
    # combine candidates
    combine_vecs(input_scenario="./candidates/"+scenario, 
                output_scenario="./combined/candidates_"+scenario, 
                print_every=10)

## Demo

In [3]:
# Dataset
queries = "Michael Jackson"
candidates = ["Michael Jackson", "Micheal Jackson", "MichaelJackson", "Michael-Jackson", 
              "Michael Joseph Jackson", "Michael Joe Jackson", "Jackson, Michael", 
              "Jackson, Michael Joseph", "Майкл Джексон", "Μάϊκλ Τζάκσον", "マイケルジャクソン", 
              "M. J.", "M. Jackson", "Michael J. Jackson", "M. J. Jackson", "Mr. Jackson", 'MJ', 
              "King of Pop"]
              
queries_path= "./dataset/queries.txt"
candidates_path= "./dataset/candidates.txt"
more_candidates_path= "./dataset/more-candidates.txt"

# Parameters
threshold = 0.8
num_candidates = 20
search_size = 10
number_test_rows = 1

### Monge-Elkan

In [4]:
import DeezyMatch.traditional_string_matching as traditional_method

result = {}
for cand in candidates:
    score = traditional_method.monge_elkan(queries, cand)
    if (score >= threshold):
        result[cand] = str(score)[:4]

re = dict(sorted(result.items(), key=lambda item: item[1], reverse=True))
print("Monge-Elkan:", len(re), "results")
for x in re:
    print(re[x], x)

Monge-Elkan: 7 results
1.0 Michael Jackson
0.98 Micheal Jackson
0.98 Jackson, Michael
0.92 Michael Joseph Jackson
0.92 Michael J. Jackson
0.91 Michael Joe Jackson
0.91 Jackson, Michael Joseph


### BiGRU

In [5]:
from DeezyMatch import candidate_ranker

model_name="finetuned_gru_wikidata_23072022"
scenario="gru"

generate_vectors_gru(model_name, queries_path, candidates_path, scenario=scenario)
combine_vector_gru(scenario)

candidates_pd = \
    candidate_ranker(query_scenario="./combined/queries_"+scenario,
                    candidate_scenario="./combined/candidates_"+scenario, 
                    ranking_metric="conf", 
                    selection_threshold=threshold, 
                    num_candidates=num_candidates, 
                    search_size=search_size, 
                    output_path="./ranker_results/"+scenario, 
                    pretrained_model_path="./models/"+model_name+"/"+model_name+".model", 
                    pretrained_vocab_path="./models/"+model_name+"/"+model_name+".vocab", 
                    number_test_rows=number_test_rows)

[92m2022-08-29 14:09:49[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./models/finetuned_gru_wikidata_23072022/input_dfm_rnn.yaml[0m
[92m2022-08-29 14:09:50[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m
[92m2022-08-29 14:09:54[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread CSV file: ./dataset/queries.txt[0m
[92m2022-08-29 14:09:54[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mnumber of labels, True: 1 and False: 0[0m
[92m2022-08-29 14:09:54[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mskipping 0 lines[0m


                                                 

[92m2022-08-29 14:09:54[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32msave test-data-class: d:\KLTN\DeezyMatch-master\queries\gru\dataframe.df[0m


                                                   

[92m2022-08-29 14:09:54[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mh1_shape[0,1] torch.Size([1, 64])[0m
--- 4.0658605098724365 seconds ---
[92m2022-08-29 14:09:54[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./models/finetuned_gru_wikidata_23072022/input_dfm_rnn.yaml[0m
[92m2022-08-29 14:09:54[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m
[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread CSV file: ./dataset/candidates.txt[0m
[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mnumber of labels, True: 23 and False: 0[0m
[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mskipping 0 lines[0m


                                                  

[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32msave test-data-class: d:\KLTN\DeezyMatch-master\candidates\gru\dataframe.df[0m


                                           

[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mh1_shape[0,1] torch.Size([23, 64])[0m




--- 1.657121181488037 seconds ---
[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./queries/gru\input_dfm_rnn.yaml[0m
[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m


-- Combine vectors
Reading vectors from ./queries/gru\embeddings\rnn_fwd*
0000000 ./queries/gru\embeddings\rnn_fwd_0


-- Combine IDs

0000000 ./queries/gru\embeddings\rnn_indxs_0


-- Combine vectors
Reading vectors from ./queries/gru\embeddings\rnn_bwd*
0000000 ./queries/gru\embeddings\rnn_bwd_0


-- Combine IDs

0000000 ./queries/gru\embeddings\rnn_indxs_0
--- 15.810302495956421 seconds ---
[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./candidates/gru\input_dfm_rnn.yaml[0m
[92m2022-08-29 14:09:56[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m


-- Combine vectors
Reading vectors from ./candidates/gru\embeddings\rn

                                                   

ID: 1/1 -- Number of found candidates so far: 9, searched: 10


                                                   

ID: 1/1 -- Number of found candidates so far: 14, searched: 20


                                                 

ID: 1/1 -- Number of found candidates so far: 15, searched: 23




TOTAL TIME: 2.4386560916900635


In [6]:
pos=0
print(candidates_pd.iloc[pos]["query"])
candidates_pd.iloc[pos]["pred_score"]

Michael Jackson


OrderedDict([('Michael Jackson', 0.9972),
             ('Michael-Jackson', 0.9961),
             ('Michael Joe Jackson', 0.9951),
             ('Micheal Jackson', 0.9947),
             ('Michael J. Jackson', 0.9935),
             ('MichaelJackson', 0.9935),
             ('Майкл Джексон', 0.9891),
             ('M. J. Jackson', 0.9762),
             ('M. Jackson', 0.9723),
             ('Μάϊκλ Τζάκσον', 0.9631),
             ('Mr. Jackson', 0.9477),
             ('マイケルジャクソン', 0.9059),
             ('mary jackson', 0.9046),
             ('Michael Joseph Jackson', 0.8855),
             ('Jackson, Michael', 0.8672)])

### Transformer

In [7]:
from DeezyMatch import candidate_ranker_trans

model_name="finetuned_transformer_wikidata_28072022"
scenario="trans"
#scenario="morecand"

generate_vectors_trans(model_name, queries_path, candidates_path, scenario=scenario)
combine_vector_trans(scenario, model_name)

candidates_pd = \
    candidate_ranker_trans(model_name=model_name,
                    query_scenario="./combined/queries_"+scenario,
                    candidate_scenario="./combined/candidates_"+scenario, 
                    ranking_metric="conf", 
                    selection_threshold=threshold,
                    num_candidates=num_candidates, 
                    search_size=search_size, 
                    output_path="./ranker_results/"+scenario, 
                    pretrained_model_path="./models/"+model_name+"/"+model_name+".model", 
                    pretrained_vocab_path="./models/"+model_name+"/"+model_name+".vocab", 
                    number_test_rows=number_test_rows)

[92m2022-08-29 14:10:03[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./models/finetuned_transformer_wikidata_28072022/input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:03[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m
[92m2022-08-29 14:10:07[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread CSV file: ./dataset/queries.txt[0m
[92m2022-08-29 14:10:07[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mnumber of labels, True: 1 and False: 0[0m
[92m2022-08-29 14:10:07[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mskipping 0 lines[0m


                                                 

[92m2022-08-29 14:10:07[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32msave test-data-class: d:\KLTN\DeezyMatch-master\queries\trans\dataframe.df[0m


                                           

--- 3.4216957092285156 seconds ---




[92m2022-08-29 14:10:07[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./models/finetuned_transformer_wikidata_28072022/input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:07[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m
[92m2022-08-29 14:10:08[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread CSV file: ./dataset/candidates.txt[0m
[92m2022-08-29 14:10:08[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mnumber of labels, True: 23 and False: 0[0m
[92m2022-08-29 14:10:08[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mskipping 0 lines[0m


                                                  

[92m2022-08-29 14:10:09[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32msave test-data-class: d:\KLTN\DeezyMatch-master\candidates\trans\dataframe.df[0m


                                                   

--- 1.664137363433838 seconds ---




[92m2022-08-29 14:10:09[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./queries/trans\input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:09[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m


-- Combine vectors
Reading vectors from ./queries/trans\embeddings\finetuned_transformer_wikidata_28072022_vecs_*
list_files:  ['./queries/trans\\embeddings\\finetuned_transformer_wikidata_28072022_vecs_0']
0000000 ./queries/trans\embeddings\finetuned_transformer_wikidata_28072022_vecs_0
0


-- Combine IDs
0000000 ./queries/trans\embeddings\finetuned_transformer_wikidata_28072022_indxs_0

--- 31.385404586791992 seconds ---
[92m2022-08-29 14:10:09[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./candidates/trans\input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:09[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m


-- Combine vectors
Reading vectors from ./candidates/trans\embeddings\

                                                   

ID: 1/1 -- Number of found candidates so far: 10, searched: 10


                                                  

ID: 1/1 -- Number of found candidates so far: 16, searched: 20


                                                 

ID: 1/1 -- Number of found candidates so far: 18, searched: 23
TOTAL TIME: 1.9621846675872803


In [8]:
pos=0
print(candidates_pd.iloc[pos]["query"])
candidates_pd.iloc[pos]["pred_score"]

Michael Jackson


OrderedDict([('Майкл Джексон', 0.9973),
             ('Michael J. Jackson', 0.9972),
             ('Michael Jackson', 0.9957),
             ('Micheal Jackson', 0.9954),
             ('Michael Joe Jackson', 0.9947),
             ('MichaelJackson', 0.9893),
             ('Jackson, Michael', 0.9882),
             ('Michael-Jackson', 0.9871),
             ('Michael Joseph Jackson', 0.9775),
             ('M. Jackson', 0.9774),
             ('M. J. Jackson', 0.9747),
             ('Μάϊκλ Τζάκσον', 0.946),
             ('Jackson, Michael Joseph', 0.9397),
             ('マイケルジャクソン', 0.9297),
             ('mary jackson', 0.8918),
             ('Mr. Jackson', 0.8912),
             ('M. J.', 0.8871),
             ('MJ', 0.8149)])

### Matching Demo

In [9]:
from DeezyMatch import candidate_ranker_trans

databaseA ="./dataset/database-a.txt"
databaseB ="./dataset/database-b.txt"

model_name="finetuned_transformer_wikidata_28072022"
scenario="matching"

generate_vectors_trans(model_name, databaseA, databaseB, scenario=scenario)
combine_vector_trans(scenario, model_name)

candidates_pd = \
    candidate_ranker_trans(model_name=model_name,
                    query_scenario="./combined/queries_"+scenario,
                    candidate_scenario="./combined/candidates_"+scenario, 
                    ranking_metric="conf", 
                    selection_threshold=0.85, 
                    num_candidates=3, 
                    search_size=10, 
                    output_path="./ranker_results/"+scenario, 
                    pretrained_model_path="./models/"+model_name+"/"+model_name+".model", 
                    pretrained_vocab_path="./models/"+model_name+"/"+model_name+".vocab", 
                    number_test_rows=20)


[92m2022-08-29 14:10:17[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./models/finetuned_transformer_wikidata_28072022/input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:17[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m
[92m2022-08-29 14:10:19[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread CSV file: ./dataset/database-a.txt[0m
[92m2022-08-29 14:10:19[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mnumber of labels, True: 15 and False: 0[0m
[92m2022-08-29 14:10:19[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mskipping 0 lines[0m


                                                  

[92m2022-08-29 14:10:19[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32msave test-data-class: d:\KLTN\DeezyMatch-master\queries\matching\dataframe.df[0m


                                           

--- 1.80362868309021 seconds ---




[92m2022-08-29 14:10:19[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./models/finetuned_transformer_wikidata_28072022/input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:19[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m
[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread CSV file: ./dataset/database-b.txt[0m
[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mnumber of labels, True: 41 and False: 0[0m
[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mskipping 0 lines[0m


                                                  

[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32msave test-data-class: d:\KLTN\DeezyMatch-master\candidates\matching\dataframe.df[0m


                                                   

--- 1.99454927444458 seconds ---
[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./queries/matching\input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m


-- Combine vectors
Reading vectors from ./queries/matching\embeddings\finetuned_transformer_wikidata_28072022_vecs_*
list_files:  ['./queries/matching\\embeddings\\finetuned_transformer_wikidata_28072022_vecs_0']
0000000 ./queries/matching\embeddings\finetuned_transformer_wikidata_28072022_vecs_0
0


-- Combine IDs
0000000 ./queries/matching\embeddings\finetuned_transformer_wikidata_28072022_indxs_0

--- 43.69518852233887 seconds ---
[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [2;32mread input file: ./candidates/matching\input_dfm_rnn.yaml[0m
[92m2022-08-29 14:10:21[0m [95mHOANG-MINH-LAPTOP[0m [1m[90m[INFO][0m [1;32mpytorch will use: cpu[0m


-- Combine vectors
R

                                                  

ID: 1/15 -- Number of found candidates so far: 2, searched: 10


                                                  

ID: 1/15 -- Number of found candidates so far: 2, searched: 20


                                                  

ID: 1/15 -- Number of found candidates so far: 2, searched: 30


                                                  

ID: 1/15 -- Number of found candidates so far: 2, searched: 40


                                                 

ID: 1/15 -- Number of found candidates so far: 2, searched: 41


                                                   

ID: 2/15 -- Number of found candidates so far: 2, searched: 10


                                                   

ID: 2/15 -- Number of found candidates so far: 2, searched: 20


                                                   

ID: 2/15 -- Number of found candidates so far: 2, searched: 30


                                                  

ID: 2/15 -- Number of found candidates so far: 2, searched: 40


                                                 

ID: 2/15 -- Number of found candidates so far: 2, searched: 41


                                                   

ID: 3/15 -- Number of found candidates so far: 1, searched: 10


                                                  

ID: 3/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 3/15 -- Number of found candidates so far: 1, searched: 30


                                                   

ID: 3/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 3/15 -- Number of found candidates so far: 1, searched: 41


                                                   

ID: 4/15 -- Number of found candidates so far: 1, searched: 10


                                                  

ID: 4/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 4/15 -- Number of found candidates so far: 1, searched: 30


                                                  

ID: 4/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 4/15 -- Number of found candidates so far: 1, searched: 41


                                                  

ID: 5/15 -- Number of found candidates so far: 1, searched: 10


                                                   

ID: 5/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 5/15 -- Number of found candidates so far: 1, searched: 30


                                                  

ID: 5/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 5/15 -- Number of found candidates so far: 1, searched: 41


                                                  

ID: 6/15 -- Number of found candidates so far: 1, searched: 10


                                                  

ID: 6/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 6/15 -- Number of found candidates so far: 1, searched: 30


                                                  

ID: 6/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 6/15 -- Number of found candidates so far: 1, searched: 41


                                                  

ID: 7/15 -- Number of found candidates so far: 2, searched: 10


                                                  

ID: 7/15 -- Number of found candidates so far: 2, searched: 20


                                                  

ID: 7/15 -- Number of found candidates so far: 2, searched: 30


                                                  

ID: 7/15 -- Number of found candidates so far: 2, searched: 40


                                                 

ID: 7/15 -- Number of found candidates so far: 2, searched: 41


                                                  

ID: 8/15 -- Number of found candidates so far: 3, searched: 10


                                                  

ID: 9/15 -- Number of found candidates so far: 1, searched: 10


                                                  

ID: 9/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 9/15 -- Number of found candidates so far: 1, searched: 30


                                                  

ID: 9/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 9/15 -- Number of found candidates so far: 1, searched: 41


                                                  

ID: 10/15 -- Number of found candidates so far: 1, searched: 10


                                                  

ID: 10/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 10/15 -- Number of found candidates so far: 1, searched: 30


                                                  

ID: 10/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 10/15 -- Number of found candidates so far: 1, searched: 41


                                                  

ID: 11/15 -- Number of found candidates so far: 2, searched: 10


                                                  

ID: 11/15 -- Number of found candidates so far: 2, searched: 20


                                                  

ID: 11/15 -- Number of found candidates so far: 2, searched: 30


                                                  

ID: 11/15 -- Number of found candidates so far: 2, searched: 40


                                                 

ID: 11/15 -- Number of found candidates so far: 2, searched: 41


                                                  

ID: 12/15 -- Number of found candidates so far: 1, searched: 10


                                                  

ID: 12/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 12/15 -- Number of found candidates so far: 1, searched: 30


                                                  

ID: 12/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 12/15 -- Number of found candidates so far: 1, searched: 41


                                                  

ID: 13/15 -- Number of found candidates so far: 2, searched: 10


                                                  

ID: 13/15 -- Number of found candidates so far: 2, searched: 20


                                                  

ID: 13/15 -- Number of found candidates so far: 2, searched: 30


                                                  

ID: 13/15 -- Number of found candidates so far: 2, searched: 40


                                                 

ID: 13/15 -- Number of found candidates so far: 2, searched: 41


                                                  

ID: 14/15 -- Number of found candidates so far: 2, searched: 10


                                                  

ID: 14/15 -- Number of found candidates so far: 2, searched: 20


                                                  

ID: 14/15 -- Number of found candidates so far: 2, searched: 30


                                                  

ID: 14/15 -- Number of found candidates so far: 2, searched: 40


                                                 

ID: 14/15 -- Number of found candidates so far: 2, searched: 41


                                                  

ID: 15/15 -- Number of found candidates so far: 1, searched: 10


                                                  

ID: 15/15 -- Number of found candidates so far: 1, searched: 20


                                                  

ID: 15/15 -- Number of found candidates so far: 1, searched: 30


                                                  

ID: 15/15 -- Number of found candidates so far: 1, searched: 40


                                                 

ID: 15/15 -- Number of found candidates so far: 1, searched: 41
TOTAL TIME: 12.561673402786255


In [9]:
for i in range(0, len(candidates_pd)):
    query = candidates_pd.iloc[i]["query"]
    cand = list(candidates_pd.iloc[i]["pred_score"])[0]
    score = list(candidates_pd.iloc[i]["pred_score"].values())[0]
    #print(". Score: ", score, "\t", query, "  --->  ", cand)
    print(query, "  --->  ", cand)

Michael Jackson   --->   M. J. Jackson
Jo Young-jin   --->   Youngjin Cho
P. H. Polk   --->   Polk Prentice H.
W. Aveman   --->   Avemann Wolfgang
G. F. Watts   --->   George Fred Watts
Deming W. M.   --->   Deming Wilber Merton
Charlotte Höglund   --->   Eva Charlotte Höglund
Александър Николов   --->   Aleksandar Nikolov
Alfred Apaka   --->   Alfred Aholo Afat Jr.
Carol Celeste Carmichael Parks   --->   Carol Parks
卡羅爾·布魯斯   --->   Carol Bruce
Robert Phillips   --->   Bob Phillips
Adrian Targon   --->   Aart Targon


In [10]:
for i in range(0, len(candidates_pd)):
    query = candidates_pd.iloc[i]["query"]
    cand = list(candidates_pd.iloc[i]["pred_score"])
    score = list(candidates_pd.iloc[i]["pred_score"].values())
    #print(". Score: ", score, "\t", query, "  --->  ", cand)
    print("___", query)
    for i in range(0,len(cand)):
        print(score[i], " ", cand[i])
    print()

___ Michael Jackson
0.9747   M. J. Jackson
0.8918   mary jackson

___ Jo Young-jin
0.971   Youngjin Cho
0.9174   Johan Young

___ P. H. Polk
0.8502   Polk Prentice H.

___ W. Aveman
0.8639   Avemann Wolfgang

___ G. F. Watts
0.9703   George Fred Watts

___ Deming W. M.
0.9683   Deming Wilber Merton

___ Charlotte Höglund
0.9537   Eva Charlotte Höglund
0.9146   Aart Targon

___ Александър Николов
0.9994   Aleksandar Nikolov
0.9112   Bill Jackson
0.8598   Stevland Judkins

___ Alfred Apaka
0.8839   Alfred Aholo Afat Jr.

___ Carol Celeste Carmichael Parks
0.9712   Carol Parks

___ 卡羅爾·布魯斯
0.9743   Carol Bruce
0.9151   Carol Parks

___ Robert Phillips
0.962   Bob Phillips

___ Adrian Targon
0.9761   Aart Targon
0.9251   Adriana

___ Hoang Minh
0.9311   H. Minh
0.8587   Avemann Wolfgang

___ Do Thi Thanh Ha
0.9479   Ha Do

