In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import pandas as pd
pd.set_option("display.max_rows", None)
pd.set_option('display.max_columns', None)

import pickle

In [None]:
def extractTup(data, tup_i=0, is_frame=True):
    types = {
        'segment': 0,
        'pos': 1,
        'cpos': 2
    }
    if tup_i in types:
        tup_i = types[tup_i]
    else:
        raise ValueError(f'tup_i not in types: {types.keys()}')
    if is_frame:
        return data.applymap(lambda x: x[tup_i])
    else:
        return data.map(lambda x: x[tup_i])

# list out search process

In [None]:
# temp_search_x = pickle.load(open('temp/search_temp_colcount-shiftonly-a.pkl', 'rb'))
temp_search_x = pickle.load(open('temp/search_temp_covtraceembed-shiftonly-a.pkl', 'rb'))
# temp_search_x = pickle.load(open('temp/search_temp_termcolcount-shiftonly-a.pkl', 'rb'))
output_alignmentstate, output_alignmentop, output_alignmentopset, output_alignmentopmode, output_alignmentscore, \
        output_alignmentscorecomponents, output_alignmentscoreraw, output_hyperparams = temp_search_x

print('===   assert sanity   ===')
print()

print('all of these should be the same:')
print(len(output_alignmentstate), len(output_alignmentop), len(output_alignmentopset), len(output_alignmentopmode), len(output_alignmentscore))

print()
print('=== search algorithm! ===')
print()

output_hyperparams
print()
print(f'score components: [numcolumns  colptxtembed  coltoknvarcount  colttuivarcount  termcolcount]')
print()
for i in range(len(output_alignmentop)):
    print(f'step {i}: {output_alignmentopmode[i]}')
    print(f'    performed op: {output_alignmentop[i]}, out of {len(output_alignmentopset[i])} valid op(s)')
    print(f'    result score: {output_alignmentscore[i]}')
    print(f'    components: {output_alignmentscorecomponents[i]}')
    extractTup(output_alignmentstate[i], tup_i='segment').append(output_alignmentscoreraw[i])
#     output_alignmentstate[i].append(output_alignmentscoreraw[i])

# print out an alignment in spreadsheet format

In [None]:
def spreadsheetFormat(alignment_df):
    alignment_df = alignment_df.sort_index()
    alignment_pos = extractTup(alignment_df, tup_i='cpos') # the token part of speech
    alignment_segment = extractTup(alignment_df, tup_i='segment')
    alignment_ppos = extractTup(alignment_df, tup_i='pos') # the phrase part of speech
    output_columns = ['id', 'fulltext', 'datatype', 'empty']+[str(i) for i in range(len(alignment_df.columns))]
    output_data = []
    row_length = len(output_columns)
    for i in alignment_df.index:
        fulltext = ' '.join([e for e in alignment_segment.loc[i].tolist() if len(e.strip())>0])
#         ppos = [i, fulltext, 's-ppos', '']+[e for e in alignment_ppos.loc[i].tolist() if len(e.strip())>0]
#         ppos = ppos + ['']*(len(output_columns)-len(ppos))
#         output_data.append(ppos)
        pos = [i, fulltext, 's-pos', '']+[([f'\'{i}\'' for i in e] if len(e)>0 else '') for e in alignment_pos.loc[i].tolist()]
        pos = pos + ['']*(len(output_columns)-len(pos))
        output_data.append(pos)
        txt = [i, fulltext, 's-txt', '']+alignment_segment.loc[i].tolist()
        txt = txt + ['']*(len(output_columns)-len(txt))
        output_data.append(txt)
    output_df = pd.DataFrame(
        output_data, 
        columns=output_columns)
    return output_df

In [None]:
from IPython.display import display, HTML
display(HTML(spreadsheetFormat(output_alignmentstate[-1]).to_html(index=False)))

# extractTup(output_alignmentstate[-1], tup_i='segment')
# spreadsheetFormat(output_alignmentstate[-1])

# print score for individual search conditions

In [None]:
import pickle

nameprefix = 'covtraceempty-1-2-'
for align_name in ['a', 'b', 'c', 'd', 'e']:    
    temp_search = pickle.load(open(f'temp/search_temp_search_{nameprefix}{align_name}.pkl', 'rb'))
    output_alignmentstate, output_alignmentop, output_alignmentopset, output_alignmentopmode, output_alignmentscore, \
            output_alignmentscorecomponents, output_alignmentscoreraw, output_hyperparams = temp_search
    for i in range(len(output_alignmentop)):
        print(
            f'{i:04d}',
            '*' if output_alignmentscore[i]==max(output_alignmentscore) else ' ', 
            f'{output_alignmentscore[i]:.4f}', 
            f'{output_alignmentopmode[i]:10}',
            f'performed: {output_alignmentop[i]}, total {len(output_alignmentopset[i])} valid op(s)'
        )
    print('===================================')
    print('===================================')
    print('===================================')