# Search Tester Analysis
*Follow these steps to compare legacy results to the new api results.*

### Setup the environment

In [None]:
%run ./local/setup.ipynb

### Find search batches (choose the data set)
*find the ones you want to analyze*

In [None]:
search_type = None
after_date = None
before_date = None
### OPTIONAL: refine your batch search with any of the following
# search_type = SearchRequest.SearchTypes.XXXX
# before_date = _datetime_in_utc_
# after_date = _datetime_in_utc_
batches = TestSearchBatch.find_search_batches(search_type, after_date, before_date)
for batch in batches:
    print(batch.id, batch.test_date, batch.search_type, batch.sim_val_business, batch.sim_val_first_name, batch.sim_val_last_name)


In [None]:
# add the ids of the batches you want to analyze
batch_ids = []
for batch in batches:
    # fill in the clause (if you want all of them then set to True or remove)
    if _clause_:  # i.e. batch.id > _ , batch.sim_val_business > _ ...
        batch_ids.append(batch.id)

### Get the batch json

In [None]:
batches_to_analyze = []
for batch_id in batch_ids:
    batches_to_analyze.append(TestSearchBatch.find_by_id(batch_id))

# Analysis

### Exact Matches

#### Pass rate

In [None]:
searches_total = 0
searches_passed = 0
failed_searches = []
for batch in batches_to_analyze:
    for search in batch.searches:
        searches_total += 1
        if len(search.missed_matches(TestSearchResult.MatchType.EXACT.value)) > 0:
            failed_searches.append(search)
        else:
            searches_passed += 1
print('pass rate: ', searches_passed/searches_total)
print('number of failed searches: ', len(failed_searches))

#### Failed Searches

*select failed searches to analyze*

In [None]:
for search in failed_searches:
    json = search.json
    print('###########################################################')
    print('criteria: ', json['criteria'])
    print('total expected: ', len(json['matchesExact']['resultsLegacy']), 'missed: ', len(json['matchesExact']['missedMatches']))

In [None]:
### print out json for specific search
# print(failed_searches[0].json)

In [None]:
exact_searches_analysis = []
for search in failed_searches:
    # fill in the clause (if you want all of them then set to True or remove)
    if _clause_:  # i.e. search.search_criteria == _, len(search.missed_matches(TestSearchResult.MatchType.EXACT.value)) > _
        exact_searches_analysis.append(search)

*missed matches*

In [None]:
import re
### manually iterate through exact_searches_analysis one by one to see the missed matches
search = exact_searches_analysis[0]
for match in search.missed_matches(TestSearchResult.MatchType.EXACT.value):
    match['details'] = re.sub(' +', ' ', match['details'])
    print('-------------------------------------------------------------------------------')
    print('result:', match['details'])
    print('reg num:', match['documentId'])
    print('index: ', match['index'])
### print out all of them
# for search in exact_searches_analysis:
#     print('##################################################################################')
#     print('criteria: ', search.search_criteria)
#     print('Missed Matches')
#     for match in search.missed_matches(TestSearchResult.MatchType.EXACT.value):
#         print('-------------------------------------------------------------------------------')
#         print('result:', match['details'])
#         print('reg num:', match['documentId'])
#         print('index: ', match['index'])

*results diff*

In [None]:
import re
### manually iterate through exact_searches_analysis one by one
search = exact_searches_analysis[0]
print('criteria: ', search.search_criteria)
print('-------------------------------------------------------------')
print('  legacy                           api')
print('-------------------------------------------------------------')
legacy_results = search.get_results(TestSearchResult.MatchType.EXACT.value, TestSearchResult.Source.LEGACY.value)
api_results = search.get_results(TestSearchResult.MatchType.EXACT.value, TestSearchResult.Source.API.value)
length = max(len(legacy_results), len(api_results))
for i in range(length):
    if i < len(legacy_results) and i < len(api_results):
        legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
        print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} | {api_results[i]["documentId"]} {api_results[i]["details"]}')
    elif i < len(legacy_results):
        legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
        print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} |')
    elif i < len(api_results):
        print(f'{i}:                    | {api_results[i]["documentId"]} {api_results[i]["details"]}')

### print out all of them
# for search in exact_searches_analysis:
#     print('criteria: ', search.search_criteria)
#     print('-------------------------------------------------------------')
#     print('  legacy                           api')
#     print('-------------------------------------------------------------')
#     legacy_results = search.get_results(TestSearchResult.MatchType.EXACT.value, TestSearchResult.Source.LEGACY.value)
#     api_results = search.get_results(TestSearchResult.MatchType.EXACT.value, TestSearchResult.Source.API.value)
#     length = max(len(legacy_results), len(api_results))
#     for i in range(length):
#         if i < len(legacy_results) and i < len(api_results):
#             legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
#             print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} | {api_results[i]["documentId"]} {api_results[i]["details"]}')
#         elif i < len(legacy_results):
#             legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
#             print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} |')
#         elif i < len(api_results):
#             print(f'{i}:                    | {api_results[i]["documentId"]} {api_results[i]["details"]}')


### Similar Matches

#### Pass rate

In [None]:
searches_total = 0
searches_passed = 0
failed_searches = []
for batch in batches_to_analyze:
    for search in batch.searches:
        searches_total += 1
        # fails if missed matches
        if len(search.missed_matches(TestSearchResult.MatchType.SIMILAR.value)) > 0:
            failed_searches.append(search)
        # fails if order is off
        elif search.avg_index_diff(TestSearchResult.MatchType.SIMILAR.value) != 0:
            failed_searches.append(search)
        else:
            searches_passed += 1
print('pass rate: ', searches_passed/searches_total)
print('number of failed searches: ', len(failed_searches))

#### Failed searches

*select failed searches to analyze*

In [None]:
for search in failed_searches:
    json = search.json
    print('###########################################################')
    print('criteria: ', json['criteria'])
    print('total expected: ', len(json['matchesSimilar']['resultsLegacy']), ' missed: ', len(json['matchesSimilar']['missedMatches']))
    print('first fail index: ', json['matchesSimilar']['firstFailIndex'])
    print('avg index diff: ', json['matchesSimilar']['avgIndexDiff'])

In [None]:
### print out json for specific search
# print(failed_searches[0].json)

In [None]:
similar_searches_analysis = []
for search in failed_searches:
    # fill in the clause (if you want all of them then set to True or remove)
    if _clause_:  # i.e. search.search_criteria == _, len(search.missed_matches(TestSearchResult.MatchType.SIMILAR.value)) > _
        similar_searches_analysis.append(search)

*first failed indexes*

In [None]:
### manually iterate through
search = similar_searches_analysis[0]
print(search.fail_index(TestSearchResult.MatchType.SIMILAR.value))

### print out all of them
# for search in similar_searches_analysis:
#     print(search.fail_index(TestSearchResult.MatchType.SIMILAR.value))

In [None]:
### avgs
total_no_fails = 0
total_fail_index = 0
for search in similar_searches_analysis:
    fail_index = search.fail_index(TestSearchResult.MatchType.SIMILAR.value)
    if fail_index == -1:
        total_no_fails += 1
    else:
        total_fail_index += fail_index

num_searches_failed = len(similar_searches_analysis) - total_no_fails
print('Number of searches with fail indexes: ', num_searches_failed)
print('avg fail index: ', total_fail_index/num_searches_failed)

*avg order difference between legacy and api results (does NOT include missed matches)*

In [None]:
### manually iterate through
search = similar_searches_analysis[0]
print(search.avg_index_diff(TestSearchResult.MatchType.SIMILAR.value))

### print out all of them
# for search in similar_searches_analysis:
#     print(search.avg_index_diff(TestSearchResult.MatchType.SIMILAR.value))

*missed matches*

In [None]:
import re
### manually iterate through
search = similar_searches_analysis[0]
print('criteria: ', search.search_criteria)
print('Missed Matches')
for match in search.missed_matches(TestSearchResult.MatchType.SIMILAR.value):
    match['details'] = re.sub(' +', ' ', match['details'])
    print('-------------------------------------------------------------------------------')
    print('result:', match['details'])
    print('reg num:', match['documentId'])
    print('index: ', match['index'])

### print out all of them
# for search in similar_searches_analysis:
#     print('##################################################################################')
#     print('criteria: ', search.search_criteria)
#     print('Missed Matches')
#     for match in search.missed_matches(TestSearchResult.MatchType.SIMILAR.value):
#         print('-------------------------------------------------------------------------------')
#         print('result:', match['details'])
#         print('reg num:', match['documentId'])
#         print('index: ', match['index'])

*results diff*

In [None]:
import re
### manually iterate through similar_searches_analysis one by one
search = similar_searches_analysis[0]
print('criteria: ', search.search_criteria)
print('-------------------------------------------------------------')
print('  legacy                           api')
print('-------------------------------------------------------------')
legacy_results = search.get_results(TestSearchResult.MatchType.SIMILAR.value, TestSearchResult.Source.LEGACY.value)
api_results = search.get_results(TestSearchResult.MatchType.SIMILAR.value, TestSearchResult.Source.API.value)
length = max(len(legacy_results), len(api_results))
for i in range(length):
    if i < len(legacy_results) and i < len(api_results):
        legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
        print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} | {api_results[i]["documentId"]} {api_results[i]["details"]}')
    elif i < len(legacy_results):
        legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
        print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} |')
    elif i < len(api_results):
        print(f'{i}:                    | {api_results[i]["documentId"]} {api_results[i]["details"]}')

### print out all of them
# for search in similar_searches_analysis:
#     print('criteria: ', search.search_criteria)
#     print('-------------------------------------------------------------')
#     print('  legacy                           api')
#     print('-------------------------------------------------------------')
#     legacy_results = search.get_results(TestSearchResult.MatchType.SIMILAR.value, TestSearchResult.Source.LEGACY.value)
#     api_results = search.get_results(TestSearchResult.MatchType.SIMILAR.value, TestSearchResult.Source.API.value)
#     length = max(len(legacy_results), len(api_results))
#     for i in range(length):
#         if i < len(legacy_results) and i < len(api_results):
#             legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
#             print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} | {api_results[i]["documentId"]} {api_results[i]["details"]}')
#         elif i < len(legacy_results):
#             legacy_results[i]['details'] = re.sub(' +', ' ', legacy_results[i]['details'])
#             print(f'{i}: {legacy_results[i]["documentId"]} {legacy_results[i]["details"]} |')
#         elif i < len(api_results):
#             print(f'{i}:                    | {api_results[i]["documentId"]} {api_results[i]["details"]}')

### Create MD file of notebook run
**NOTE:** save notebook (i.e. _cmd s_) now to have results show in markdown file

In [None]:
%%javascript
IPython.notebook.kernel.execute('nb_name = "' + IPython.notebook.notebook_name + '"')

In [None]:
md_name = nb_name[:-6]+'.md'

In [None]:
%%bash -s "$nb_name" "$md_name"
jupyter nbconvert $1 --to markdown --output $2