In [1]:
import pandas as pd
import os
import re
import numpy as np
from statistics import stdev
import math

In [2]:
def find_cord(image_name,idx):
    return re.findall(r'\d+\.\d+',image_name)[idx]

def getVerDisStd(df,rm_outlier=True):
    ''' Get standard deviation of consecutive vertical difference '''
    df = df[df['prob']>=0.45]
    df.reset_index(drop=True,inplace=True)
    df['pred']=df['pred'].str.strip()
    df['h_level'] = (df['new_3y']+df['new_4y'])/2
    
    # Series of difference between the consecutive values        
    gaps = [y - x for x, y in zip(df['h_level'][:-1], df['h_level'][1:])]
    std = stdev(gaps)
    if rm_outlier:
        pstl_90 = np.quantile(gaps,0.9)
        gaps_90pstl = [i for i in gaps if i <90]
        std = stdev(gaps_90pstl)

    return std,df

def write_menu(df,file_name):
    
    std,df = getVerDisStd(df)
    
    ''' Write menu '''
    list_line = [[df.loc[0,'new_4x']]]
    counter=1
    print()
    for x in df['h_level'][1:]:
        # if the gap from the current item to the previous is more than 1 SD
        # Note: the previous item is the last item in the last list
        # Note: modify '> 0.6' to adjust threshold for separating lines
        if (x-df.loc[counter-1,'h_level']) / std > 0.6:
            list_line.append([])
        list_line[-1].append(df.loc[counter,'new_4x'])
        counter+=1
    
    menu_name=file_name.split('.')[0][:-12]+'_new.txt'
    menu = open('../data/output/'+menu_name,"w") 
    # Decode sentence and write it to menu file
    word_count=0
    for line in list_line:
        # sort words with x coordinate of vertex 4 
        line.sort()
        line_in_preds=[]
        len_sentence = len(line)

        # decode menu content from x coordinate of vertex 4 
        for word in line:
            df_sub_conf = df.loc[word_count:word_count+len_sentence]
            df_sub_conf[df_sub_conf['new_4x']==word]['pred'].values[0]
            line_in_preds.append(df_sub_conf[df_sub_conf['new_4x']==word]['pred'].values[0])

        # update counter using length of sentence
        word_count+=len_sentence

        # ordered list in prediction word format to menu file
        sentence = ' '.join(line_in_preds)
        menu.write(sentence)
        menu.write('\n')
    menu.close()

In [44]:
pred_list = [os.path.join('../data/output/', f) for f in os.listdir('../data/output/') if os.path.isfile(os.path.join('../data/output/', f)) and f.endswith('pred_result.txt')]
for pred_file in pred_list:
    raw_log = pd.read_csv(pred_file,sep="\t", header = None)
    result=raw_log.copy()
    result.columns=['image_name','pred','prob']
    result['vertex1_x']=result.apply(lambda x: find_cord(x.image_name, 0), axis=1).astype(float)
    result['vertex1_y']=result.apply(lambda x: find_cord(x.image_name, 1), axis=1).astype(float)
    result['vertex2_x']=result.apply(lambda x: find_cord(x.image_name, 2), axis=1).astype(float)
    result['vertex2_y']=result.apply(lambda x: find_cord(x.image_name, 3), axis=1).astype(float)
    result['vertex3_x']=result.apply(lambda x: find_cord(x.image_name, 4), axis=1).astype(float)
    result['vertex3_y']=result.apply(lambda x: find_cord(x.image_name, 5), axis=1).astype(float)
    result['vertex4_x']=result.apply(lambda x: find_cord(x.image_name, 6), axis=1).astype(float)
    result['vertex4_y']=result.apply(lambda x: find_cord(x.image_name, 7), axis=1).astype(float)
    result.reset_index(inplace=True)
    result['rotation_x']=result['vertex4_x'][0]
    result['rotation_y']=result['vertex4_y'][0]
    result['ratio']=(result['vertex4_y']-result['vertex3_y'])/(result['vertex4_x']-result['vertex3_x'])
    result['theta']=result['ratio'].apply(lambda x: math.atan(x))
    result['theta'].replace(0,np.NaN,inplace=True)
    result['correction_theta']=result['theta'].apply(lambda x:-x).mean()
    result['cos_theta']=result['correction_theta'].apply(lambda x: math.cos(x))
    result['sin_theta']=result['correction_theta'].apply(lambda x: math.sin(x))
    result['new_3x']=(result['vertex3_x']-result['rotation_x'])*result['cos_theta']-(result['vertex3_y']-result['rotation_y'])*result['sin_theta']+ result['rotation_x']
    result['new_3y']=(result['vertex3_x']-result['rotation_x'])*result['sin_theta']+(result['vertex3_y']-result['rotation_y'])*result['cos_theta']+ result['rotation_y']
    result['new_4x']=(result['vertex4_x']-result['rotation_x'])*result['cos_theta']-(result['vertex4_y']-result['rotation_y'])*result['sin_theta']+ result['rotation_x']
    result['new_4y']=(result['vertex4_x']-result['rotation_x'])*result['sin_theta']+(result['vertex4_y']-result['rotation_y'])*result['cos_theta']+ result['rotation_y']

    result=result[['image_name','vertex1_x','vertex1_y','vertex2_x','vertex2_y','vertex3_x','vertex3_y','vertex4_x','vertex4_y','pred','prob','rotation_x','rotation_y','ratio','theta','correction_theta','cos_theta','sin_theta','new_3x','new_3y','new_4x','new_4y']]
    result['pred']=result['pred'].str.strip()
    print(result.loc[(result['prob']<0.3) & (result['pred'].str.contains('\$?\d+', regex=True)),'pred'])
    print('update!!!!!')
    result[(result['prob']<0.3) & (result['pred'].str.contains('\$?\d+', regex=True))]['pred']=result[(result['prob']<0.3) & (result['pred'].str.contains('\$?\d+', regex=True))]['pred'].apply(lambda x: x[:-1])
    print(result[(result['prob']<0.3) & (result['pred'].str.contains('\$?\d+', regex=True))]['pred'])
    
    result.sort_values('new_4y',inplace=True)
    
    # prepare input for write_menu()
    file_name=pred_file.split('/')[-1]
    df=result.copy()
#     write_menu(df,file_name)
    

41    10-1
42     100
Name: pred, dtype: object
update!!!!!
41    10-
42     10
Name: pred, dtype: object
52     $148H
53     $1680
56    $118--
57     $1380
58      $680
77     (E11)
Name: pred, dtype: object
update!!!!!
52     $148
53     $168
56    $118-
57     $138
58      $68
77     (E11
Name: pred, dtype: object
34    $238-
Name: pred, dtype: object
update!!!!!
34    $238
Name: pred, dtype: object
80       551
101     1850
102     1700
110    13001
113     1200
117    18001
Name: pred, dtype: object
update!!!!!
80       55
101     185
102     170
110    1300
113     120
117    1800
Name: pred, dtype: object
133    $178-
141    $2780
143    $2781
165    $2781
173    $218"
Name: pred, dtype: object
update!!!!!
133    $178
141    $278
143    $278
165    $278
173    $218
Name: pred, dtype: object
