In [None]:
       
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
    

In [None]:
%matplotlib inline

import json, os, sys, re, math
import numpy as np
import pandas as pd
import cv2  
from shutil import copy
import plotly.graph_objects as go
from plotly.subplots import make_subplots



def plot_hist(data, xlab, ylab, title, width, density=True):
    plt.figure()
    n, bins, patches = plt.hist(data, np.arange(0, max(data), width), density=density, facecolor='g', alpha=0.75)
    plt.xlabel(xlab)
    plt.ylabel(ylab)
    plt.title(title)
    plt.grid(True)
    plt.xticks(np.arange(0, max(data), width))

    plt.show()

    print([int(x) for x in n])
    print(patches, bins)
    
def get_iou(bb1, bb2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.

    Parameters
    ----------
    bb1 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner
    bb2 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x, y) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner

    Returns
    -------
    float
        in [0, 1]
    """
#     print(bb1,bb2)
    assert bb1['x1'] <= bb1['x2']
    assert bb1['y1'] <= bb1['y2']
    assert bb2['x1'] <= bb2['x2']
    assert bb2['y1'] <= bb2['y2']

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

def findIOU(pred, truths):
    if len(truths)==0:
        return 0,None
    ious = []
    for truth in truths:
        ious.append(get_iou(pred, truth))
        
    winner = max(ious)
    if winner ==0:
        return (0,None)
    else:    
        return (winner,ious.index(winner))

In [None]:
def get_confusion(papers,iou_thresh=0.5,score_thresh=0.8):
    papers_df = pd.DataFrame()
    '''
    True Positive (TP): A correct detection. Detection with IOU ≥ threshold
    False Positive (FP): A wrong detection. Detection with IOU < threshold
    False Negative (FN): A ground truth not detected
    '''
    tp_total=0
    fp_total=0
    fn_total=0
    
    p_papers=[]
    r_papers=[]
    
    
    p_pages=[]
    r_pages=[]
    
    
    for paper in papers:
        tp_paper = 0
        fp_paper = 0
        fn_paper = 0
        
        for page in papers[paper]['truth']:
#             print(paper,page)
            eqns_truth = [i for i,e in enumerate(papers[paper]['truth'][page])]
            eqns_pred_o = [(i,e['iou'],e['eqn_idx'],e['score']) for i,e in enumerate(papers[paper]['preds'][page])]


            # score_thresh
            eqns_pred = [e for e in  eqns_pred_o if e[3]>score_thresh]

            #TP, FP, FN
            tp_temp = len([e for e in  eqns_pred if e[1]> iou_thresh])
            tp_ids = list(set([e[2] for e in eqns_pred if e[1]> iou_thresh]))
            fp_temp = len([e for e in  eqns_pred if e[1]<= iou_thresh])

            if len(eqns_truth)==0:
                fn_temp = 0
            else:
                fn_temp = len([e for e in eqns_truth if e not in tp_ids])

            tp_total+=tp_temp
            fp_total+=fp_temp
            fn_total+=fn_temp
            
            tp_paper+=tp_temp
            fp_paper+=fp_temp
            fn_paper+=fn_temp
            
            if tp_temp + fp_temp ==0:
                p_pages.append(1)
            else:
                p_pages.append(tp_temp/(tp_temp + fp_temp))
            
            if tp_temp + fn_temp ==0:
                r_pages.append(1)
            else:
                r_pages.append(tp_temp/(tp_temp + fn_temp))
            papers_df = papers_df.append({'paper':paper,'page':page,'total_truth':len(eqns_truth)\
                                         ,'total_pred':len(eqns_pred_o),'total_pred_thresh':len(eqns_pred)\
                                          ,'tp':tp_temp,'fp':fp_temp,'fn':fn_temp}, ignore_index=True)
                
        if tp_paper + fp_paper ==0:
            p_papers.append(1)
        else:
            p_papers.append(tp_paper/(tp_paper + fp_paper))

        if tp_paper + fn_paper ==0:
            r_papers.append(1)
        else:
            r_papers.append(tp_paper/(tp_paper + fn_paper))

#             print(eqns_truth, eqns_pred)
#             print(tp_temp, fp_temp, fn_temp)
        
#         break
    if tp_total+fp_total==0:
        p = 1
    else:
        p = tp_total/(tp_total+fp_total)
    if tp_total+fn_total==0:
        r = 1
    else:
        r = tp_total/(tp_total+fn_total)
    
    return p,r,p_papers, r_papers, p_pages, r_pages, papers_df
            

In [None]:
def paper_statistics(path, valid_bbs, predictions):
    papers = {}
    i = 0
    for paper_page in valid_bbs:
        paper = paper_page.split('-')[0]
        page = f"page-{paper_page.replace('.png','').split('-')[-1]}"
        if paper in papers:
            papers[paper]['pages_count'] +=1

        else:
            papers[paper]={}
            papers[paper]['pages_count']=0
            papers[paper]['truth'] = {}
            papers[paper]['preds'] = {}

        temp_papers = []
        for region in valid_bbs[paper_page]['regions']:
            temp = valid_bbs[paper_page]['regions'][region]
            x2 = temp['shape_attributes']['all_points_x'][2]
            x1 = temp['shape_attributes']['all_points_x'][0]
            width = x2-x1

            y2 = temp['shape_attributes']['all_points_y'][1]
            y1 = temp['shape_attributes']['all_points_y'][0]
            height = y2-y1

            temp_papers.append({'width' : width, 'height' : height, 'x1':x1, 'x2':x2, 'y1':y1, 'y2':y2})

        temp_papers = [dict(y) for y in set(tuple(x.items()) for x in temp_papers)]

        papers[paper]['truth'][page] = temp_papers

    for paper_page in predictions:
        paper = paper_page.split('/')[-1].split('-')[0]
        page = f"page-{paper_page.split('/')[-1].replace('.png','').split('-')[-1]}"
    #     img_path= path+'val/'+paper_page.split('/')[-1]
    #     print(img_path)
    #     img = cv2.imread(img_path)

    #     print(f"{paper}-{page}")
        if paper in papers:
            temp_papers = []
            for i in range(len(predictions[paper_page]['pred_boxes'])):
                temp = predictions[paper_page]['pred_boxes'][i]
                score = predictions[paper_page]['scores'][i]
                x1,y1,x2,y2 = temp
                x1=int(x1)
                x2=int(x2)
                y1=int(y1)
                y2=int(y2)

                width = x2-x1
                height = y2-y1

                temp_papers.append({'width' : width, 'height' : height, 'x1':x1, 'x2':x2, 'y1':y1, 'y2':y2, 'score' : score})


                papers[paper]['preds'][page] = temp_papers

        else:
            print("Not found",paper)
            
    
    
    os.makedirs(os.path.dirname(path+'eval_analysis/'), exist_ok=True)
    ious = []
    for paper in papers:
#         print(paper)
        for page in papers[paper]['truth']:
    #         print(page, len(papers[paper]['truth'][page]))
            img_path = path+'val/'+paper+'-'+page+'.png'
            img = cv2.imread(img_path)
            truths = papers[paper]['truth'][page]
            if page in papers[paper]['preds']:
                preds = papers[paper]['preds'][page]
            else:
                papers[paper]['preds'][page] = []
                preds = []
            cv2.putText(img,'Truth : '+str(len(truths)) + 'Eqns',(10,20),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1)
            cv2.putText(img,'Preds : '+str(len(preds)) + 'Eqns',(10,40),cv2.FONT_HERSHEY_SIMPLEX,0.5,(255,0,0),1)

            for t,truth in enumerate(truths):
    #             print(truths)
                cv2.rectangle(img,(truth["x1"],truth["y1"]),(truth["x2"],truth["y2"]),(0,0,255),1)
    #             cv2.putText(img,'Truth : '+str(t),(truth['x2'],truth['y2']),cv2.FONT_HERSHEY_COMPLEX,0.5,(0,0,255),1)

            for p,pred in enumerate(preds):
                cv2.rectangle(img,(pred["x1"],pred["y1"]),(pred["x2"],pred["y2"]),(255,0,0),1)
                iou, idx = findIOU(pred, truths)
                papers[paper]['preds'][page][p]['iou'] = iou
                ious.append(iou)
                papers[paper]['preds'][page][p]['eqn_idx'] = idx
                cv2.putText(img,'S:'+ str(round(pred['score']*100,2))+'%, IOU:'+str(round(iou,2)),(pred['x2']+5,pred['y2']),cv2.FONT_HERSHEY_SIMPLEX,0.35,(255,0,0),1)



            cv2.imwrite(path+'eval_analysis/'+paper+'-'+page+'.png', img)
        
    return papers, ious

    

In [None]:

def plot_iou_hist(ious):
    fig = go.Figure()

    fig.add_trace(go.Histogram(x=ious,bingroup=1,name="ious"))

    # Overlay both histograms
    fig.update_layout(barmode='overlay')
    # Reduce opacity to see both histograms
    fig.update_traces(opacity=0.65)
    fig.update_layout(
        title_x=0.5,
        title_text='Histograms of IOUs', # title of plot
        xaxis_title_text='IOUs', # xaxis label
        yaxis_title_text='Counts', # yaxis label
        bargap=0, # gap between bars of adjacent location coordinates
        bargroupgap=0.1 # gap between bars of the same location coordinates
    )

    fig.show("notebook")

In [None]:
def plot_heatmap(papers):
    ps = []
    rs = []
    for i in np.arange(0,1,0.05):
        p_t = []
        r_t = []
        for j in np.arange(0,1,0.05):
            p,r,p_papers, r_papers, p_pages, r_pages,_ = get_confusion(papers, i,j)
            p_t.append(p)
            r_t.append(r)
        ps.append(p_t)
        rs.append(r_t)

    fig = make_subplots(rows=1, cols=2,subplot_titles=('Precision', 'Recall'))

    fig.add_trace(
        go.Contour(
            z=ps,
            x=np.arange(0,1,0.05),
            y=np.arange(0,1,0.05),
            contours=dict(
                start=0,
                end=0.8,
                size=0.05,
                showlabels = True, # show labels on contours
                labelfont = dict( # label font properties
                    size = 12,
                    color = 'white',
                )
            ),

        ),
     row=1, col=1)

    fig.add_trace(
        go.Contour(
            z=rs,
            x=np.arange(0,1,0.05),
            y=np.arange(0,1,0.05),
            contours=dict(
                start=0,
                end=0.8,
                size=0.05,
                showlabels = True, # show labels on contours
                labelfont = dict( # label font properties
                    size = 12,
                    color = 'white',
                )
            ),

        ),
     row=1, col=2)

    fig.update_xaxes(title_text="BB Score", row=1, col=1)
    fig.update_xaxes(title_text="BB Score", row=1, col=2)
    fig.update_yaxes(title_text="IOU", row=1, col=1)
    fig.update_yaxes(title_text="IOU", row=1, col=2)

    fig.update_layout(height=600, width=1000, title_text="Precision/Recall vs IOU vs Scores", title_x=0.5)

    fig.show("notebook")


In [None]:
def plot_p_r(papers, iou_thresh, score_thresh):
    p,r,p_papers, r_papers, p_pages, r_pages,_ = get_confusion(papers,iou_thresh, score_thresh)

    fig = go.Figure()
    fig.add_trace(go.Histogram(x=p_papers,name="Precision-Papers",histnorm='percent',nbinsx=10,autobinx=False))
    fig.add_trace(go.Histogram(x=r_papers,name="Recall-Papers",histnorm='percent',nbinsx=10,autobinx=False))

    # Overlay both histograms
    fig.update_layout(barmode='overlay')
    # Reduce opacity to see both histograms
    fig.update_traces(opacity=0.5)
    fig.update_layout(
        title_x=0.5,
        title_text='Histograms of Paper-wise recall and precision', # title of plot
        xaxis_title_text='P/R', # xaxis label
        yaxis_title_text='Percent Papers', # yaxis label
        bargap=0, # gap between bars of adjacent location coordinates
        bargroupgap=0.1, # gap between bars of the same location coordinates

    )

    fig.show("notebook")

    fig = go.Figure()
    fig.add_trace(go.Histogram(x=p_pages,bingroup=1,name="Precision-Pages",histnorm='percent'))
    fig.add_trace(go.Histogram(x=r_pages,bingroup=1,name="Recall-Pages",histnorm='percent'))

    # Overlay both histograms
    fig.update_layout(barmode='overlay')
    # Reduce opacity to see both histograms
    fig.update_traces(opacity=0.5)
    fig.update_layout(
        title_text='Histograms of Page-wise recall and precision', # title of plot
        title_x=0.5,
        xaxis_title_text='P/R', # xaxis label
        yaxis_title_text='Percent Pages', # yaxis label
        bargap=0, # gap between bars of adjacent location coordinates
        bargroupgap=0.1, # gap between bars of the same location coordinates
        xaxis = dict(
            dtick = 0.1
        )
    )

    fig.show("notebook")
    
    print(f"Over All Stats - Precision : {p}, Recall : {r}")

# Evaluation Notebook

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr00025_e1000_b128.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)


In [None]:
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr00025_e1000_b256.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr00025_e1000_b512.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr0005_e1000_b512.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr001_e1000_b512.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr002_e1000_b512.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr005_e1000_b512.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr001_e3000_b512.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
plot_iou_hist(ious)
plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/CNNTest2/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_lr001_e5000_b512.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
# plot_iou_hist(ious)
# plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
p,r,p_papers, r_papers, p_pages, r_pages,papers_df = get_confusion(papers,0.5, 0.8)

In [None]:
f"{(papers_df[papers_df['total_truth']>0].shape[0]/papers_df.shape[0]) * 100} % of all papers have at least one eqn"

In [None]:
papers_df.sum().tp/(papers_df.sum().tp+papers_df.sum().fp),papers_df.sum().tp/(papers_df.sum().tp+papers_df.sum().fn)

In [None]:
papers_df[papers_df['total_truth']>0].sum().tp/(papers_df[papers_df['total_truth']>0].sum().tp+papers_df[papers_df['total_truth']>0].sum().fp),papers_df[papers_df['total_truth']>0].sum().tp/(papers_df[papers_df['total_truth']>0].sum().tp+papers_df[papers_df['total_truth']>0].sum().fn)

In [None]:
papers_df[(papers_df['total_truth']==0)].sort_values('total_pred_thresh',ascending=False).head(20)

In [None]:
papers_df[(papers_df['total_truth']>0)].sort_values('fn',ascending=False).head(20)

In [None]:
papers_df[(papers_df['total_truth']>0)].sort_values('fp',ascending=False).head(20)

In [None]:
path = '../data/large/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
# plot_iou_hist(ious)
# plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)

In [None]:
path = '../data/large/'
valid_bbs = json.load(open(path+'val/bounding_box_data.json','r'))
predictions = json.load(open(path+'pred_results_longer.json','r'))
papers, ious = paper_statistics(path, valid_bbs, predictions)
# plot_iou_hist(ious)
# plot_heatmap(papers)
plot_p_r(papers, iou_thresh=0.5, score_thresh=0.8)