In [16]:
import os
import pandas
import cv2
import numpy as np
import time

### Configuration

In [17]:
# color
color_of_edges = (0, 0, 0) # black
color_of_holes = (255, 255, 255) # white
color_of_nodes = (0, 0, 0)
color_red = (0, 0, 255)
color_blue = (255, 0, 0)
color_green = (0, 255, 0)
color_yellow = (255, 255, 0)
color_of_inner_node = color_green
color_of_outer_node = color_red

# path
data_path = "../../../Data/"
ground_truth_folder = "ground_truth"
sparse_folder = "Sparse"
uniform_folder = "Uniform"
graph_files_folder = "graph_files"
jpg_folder = "jpg"
jpg_fd_files_folder = "jpg_fd_files"
jpg_gt_files_folder = "jpg_gt_files"
jpg_gt_ct_files_folder = "jpg_gt_ct_files"
jpg_fd_files_ln_ct_folder = "jpg_fd_files_ln_ct"
jpg_fd_files_gn_ct_folder = "jpg_fd_files_gn_ct"

### Contour tracing functions for finding the boundary nodes

In [18]:
def fill_color_demo(image):
    '''
    Fill the outer contours using flood fill algorithm
    Args: cv2.img file
    Returns: cv2.img file
    '''
    copyIma = image.copy()
    h, w = image.shape[:2]
    print(h, w)
    mask = np.zeros([h+2, w+2], np.uint8)
    
    cv2.floodFill(copyIma, mask, (0, 0), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)
    cv2.floodFill(copyIma, mask, (w-1, 0), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  
    cv2.floodFill(copyIma, mask, (0, h-1), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  
    cv2.floodFill(copyIma, mask, (w-1, h-1), (0, 0, 0), (100, 100, 100), (50, 50, 50), cv2.FLOODFILL_FIXED_RANGE)  

    return copyIma

In [19]:
def findHoleNodes(hole, gt_nodes_list, all_inner_list, d = -4.99):
    '''
    Find the nodes of hole
    Args: 
        hole: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        img: the image to be painted
        color: the paint color of the nodes in contour
        d: The distance which between the nodes and hole
        thickness: thickness value of the nodes in contour
        label: True->paint the nodes in contour
    Returns: nodes_in_contour_list, nodes_not_in_contour_list
    '''
    inner_list = []  #The nodes of hole 
    # Check if any of the nodes in the given list are lying on the contour
    for node in gt_nodes_list:
        distance = cv2.pointPolygonTest(hole, node, measureDist=True)
        if distance >= d:
            if node not in all_inner_list:
                inner_list.append(node)
    return inner_list

In [20]:
def gt_nodes_ct_contours(fpath, gt_nodes_list, area_min, color_inner = color_of_inner_node, 
                  color_outer = color_of_outer_node, label=True):
    '''
    Calculate the number of the nodes in or not in the contours > area_min
    Args: 
        fpath: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        area_min: based on the size of canvas
    Returns:
        nodes_inner_list: the list of inner nodes in fpath file
        nodes_outer_list: the list of outer nodes in fpath file
        img: painted image with blue holes, red outer nodes and green inner nodes
    '''
    img = cv2.imread(fpath)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find holes (inner contours) if hole's are > are_min
    holes = []
    for i in range(len(contours)):
        if hierarchy[0][i][3] == -1 and cv2.contourArea(contours[i]) > int(area_min):
            holes.append(contours[i])
            # cv2.fillPoly(img, pts=[contours[i]], color=(255,0,0))

    # get list of ground truth nodes in or not in the contours 
    nodes_inner_list = []
    nodes_outer_list = []
    for hole in holes:
        inner_list = findHoleNodes(hole, gt_nodes_list, nodes_inner_list)
        nodes_inner_list.extend(inner_list)
    for node in gt_nodes_list:
        if node in nodes_inner_list:
            continue
        else:
            nodes_outer_list.append(node)
    # img = cv2.drawContours(img, holes, -1, (255, 0, 0), 3)
    if label == True:
        for outer_node in nodes_outer_list:
            cv2.circle(img, outer_node, radius = 10, color = color_outer, thickness = 4)  #Add the node result
        for inner_node in nodes_inner_list:
            cv2.circle(img, inner_node, radius = 10, color = color_inner, thickness = 4)  #Add the node result
    # Print the results
    print("CT Results: Num_Contours=%d , Num_Holes=%d, Num_InnerNodes=%d, Num_OuterNodes=%d" % (len(contours),len(holes),len(nodes_inner_list),len(nodes_outer_list)))
    return nodes_inner_list, nodes_outer_list, img

In [21]:
def gt_nodes_ccl_contours(fpath, gt_nodes_list, area_min, neighbor, color_inner = color_of_inner_node, 
                  color_outer = color_of_outer_node, label=True):
    '''
    Calculate the number of the nodes in or not in the contours > area_min
    Args: 
        fpath: the jpg file generated by fd algorithm
        gt_nodes_list: ground truth nodes list
        area_min: based on the size of canvas
    Returns:
        nodes_inner_list: the list of inner nodes in fpath file
        nodes_outer_list: the list of outer nodes in fpath file
        img: painted image with blue holes, red outer nodes and green inner nodes
    '''
    # Load the image and convert it to grayscale
    img = cv2.imread(fpath)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Threshold the image to create a binary image
    ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

    # Use connected component labeling to find the contours
    if neighbor == 8:
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)
    elif neighbor == 4:
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, connectivity=4)
    else:
        raise ValueError("Invalid neighbor vector. Choose '4' or '8'.")


    # Loop through each label (excluding the background label 0)
    # Find holes (inner contours) if hole's are > are_min
    holes = []
    contours = []
    for i in range(1, num_labels):
        # Create a mask for the current label
        mask = np.zeros(thresh.shape, dtype=np.uint8)
        mask[labels == i] = 255
        # Find the contour for the current label
        contour, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for c in contour:
            contours.append(c)
            if cv2.contourArea(c) > int(area_min):
                holes.append(c)
            # cv2.fillPoly(img, pts=[contours[i]], color=(255,0,0))

    # get list of ground truth nodes in or not in the contours 
    nodes_inner_list = []
    nodes_outer_list = []
    for hole in holes:
        inner_list = findHoleNodes(hole, gt_nodes_list, nodes_inner_list)
        nodes_inner_list.extend(inner_list)
    for node in gt_nodes_list:
        if node in nodes_inner_list:
            continue
        else:
            nodes_outer_list.append(node)
    # img = cv2.drawContours(img, holes, -1, (255, 0, 0), 3)
    # if label == True:
    #     for outer_node in nodes_outer_list:
    #         cv2.circle(img, outer_node, radius = 10, color = color_outer, thickness = 4)  #Add the node result
    #     for inner_node in nodes_inner_list:
    #         cv2.circle(img, inner_node, radius = 10, color = color_inner, thickness = 4)  #Add the node result
    # Print the results
    print("CCL N8 Results: Num_Contours=%d , Num_Holes=%d, Num_InnerNodes=%d, Num_OuterNodes=%d" % (len(contours),len(holes),len(nodes_inner_list),len(nodes_outer_list)))
    return nodes_inner_list, nodes_outer_list, img


### Drawing functions

In [22]:
#Draw all the nodes
def drawAllNodes(img,all_nodes,radiu,color = color_of_nodes,thickness=1,lineType=0,shift=0):
    for i in range (0,int(len(all_nodes))): 
        cv2.circle(img, all_nodes[i], radiu ,color,thickness,lineType,shift)  #Add the node result
        
    return img

#Draw all the edges
def drawAllEdges(img,all_nodes,all_edges,color = color_of_edges,thickness=3,lineType=8,shift=0):
    for i in range (0,int(len(all_edges))):
        cv2.line(img, all_nodes[all_edges[i][0]], all_nodes[all_edges[i][1]],color,thickness, lineType, shift)
        
    return img

### Load ground truth and generate jpg with nodes in or not in contours functions

In [23]:
def load_ground_truth_data(ground_truth_fpath):
    '''
    Load ground truth file and generate the jpg file for finding holes
    Args: ground_truth_fpath, target_fpath
    Returns: the nodes position list and jpg file name
    '''
    data_list = [] # The data list
    num_nodes = int
    all_nodes = []
    node_x = []
    all_edges = []
    node_y = []
    data_f = open(ground_truth_fpath, 'r')
    for line in data_f.readlines():
        data_list.append(line.strip())
    num_nodes = int (data_list.pop(0)) #Pop out the number of nodes 
    
    #Get all the number node coordinate
    for i in range (0,num_nodes):
        node = data_list.pop(0) #Pop out the node coordinate
        node = node.split()
        node.pop(0) #Pop out the node index  
        node_x.append(node[0])
        node_y.append(node[1])
    #Change it to list    
    node_x = list(map(float, node_x))
    node_y = list(map(float, node_y))
    
    #Find the minimun value in x and y list
    min_x = min(node_x)
    min_y = min(node_y)
    #If the coorodinate of nodes are nagative, change it to be positive
    if min_x or min_y < 0:
        for i in range (0,num_nodes):
            node_x[i] = node_x[i] + min_x * -1
            node_y[i] = node_y[i] + min_y * -1
    #Find the maximun value in x and y list        
    max_x = max(node_x)
    max_y = max(node_y)
    
    #Origin coordinates
    xc = 0
    yc = 0
    
    #Calulate the canvas size by the node numbers and radius of node
    canvas_size = (4 * num_nodes)
    #Calulate the ratio for enlarge the coorodinate of node that fitting the canvas size
    node_ratio = (min((canvas_size / max_x), (canvas_size / max_y)))
    area_min_radio = 5000 / 4000
    area_min = area_min_radio * canvas_size
    
    for i in range (0,num_nodes):
        x = int(xc + node_ratio * (node_x[i] - xc))
        y = int(yc + node_ratio * (node_y[i] - yc))
        all_nodes.append((x, y)) #(X, Y) 
    
    num_edges = int (data_list.pop(0)) #Pop out the number of edges 
    #Get all the edge
    for i in range (0,num_edges): 
        edge = data_list.pop(0) #Pop out the edge connection
        edge = edge.split()
        all_edges.append([int(edge[0]),int(edge[1])]) #(point 1, point 2)
    data_f.close

    
    
    return all_nodes, all_edges, num_nodes, canvas_size, area_min

In [24]:
def generate_jpg_for_gt(fname, ground_truth_folder, jpg_gt_files_folder):
    '''
    Generate jpg files for ground truth nodes
    Args:
        fname: ground truth file name
        ground_truth_folder: ground truth folder name
        jpg_gt_files_folder: the folder name of ground truth jpg file
    Returns: 
        jpg_gt_name: ground truth jpg path, 
        all_nodes: all nodes of ground truth  
        area_min: the minimum area to sort contours
    '''
    time_start = time.time()
    all_nodes = []
    all_edges = []
    #Read data 
    all_nodes, all_edges, num_nodes, canvas_size, area_min = load_ground_truth_data(fname) 
    
    results_img = np.full((canvas_size, canvas_size, 3), 255 ,np.uint8) #create a img

    #Draw all the edge
    drawAllEdges(results_img,all_nodes,all_edges,color_of_edges,lineType= cv2.LINE_AA)

    #Draw all the node
    drawAllNodes(results_img,all_nodes,1,color_of_nodes,lineType= cv2.LINE_AA)
    
    jpg_gt_name = fname.replace(ground_truth_folder, os.path.join(jpg_folder, jpg_gt_files_folder)) + ".jpg"
    # print("jpg_gt_name = ", jpg_gt_name)
    cv2.imwrite(jpg_gt_name, results_img)
    time_end = time.time()
    # print('It cost %f seconds' % (time_end - time_start))
    # print ("DONE.")
    return jpg_gt_name, all_nodes, area_min

### Batch process: read files

In [25]:
def add_gt_files_main(node_list, degree_list, data_path, ground_truth_folder, jpg_gt_files_folder, type_folder):
    '''
    1.Traverse the ground truth files and append the files path to the list
    2.Write the ground truth files 
    Args: 
        data_path: Data path
        ground_truth_folder: Ground truth folder name
        type_folder_name: Type folder name
        node_list: The number of nodes we need to process with
        degree_list: The degree we need to process with
    Returns: 
        gt_nodes_dict: key=jpg_gt_path, value=[all_nodes, area_min]
    '''
    # Add all required 
    gt_list = []
    for n in node_list:
        for d in degree_list:
            fname = "n=" + str(n) + "d=" + str(d)
            gt_list.append(os.path.join(data_path, ground_truth_folder, type_folder, fname))

    # Read the ground truth files 
    gt_nodes_dict = {}
    for gt_path in gt_list:
        # jpg_gt_path = gt_path.replace(ground_truth_folder, jpg_gt_files_folder) + ".jpg"
        # if os.path.exists(jpg_gt_path):
        #     print("File " + jpg_gt_path + " exist!")
        #     continue
        jpg_gt_name, all_nodes, area_min = generate_jpg_for_gt(gt_path, ground_truth_folder, jpg_gt_files_folder)    
        gt_nodes_dict[jpg_gt_name] = [all_nodes,area_min]
    return gt_nodes_dict
        

In [26]:
def gt_ct_batch_main(gt_nodes_dict):
    '''
    1. Get the nodes in or not in the contours whose area is smaller than area_min from ground truth nodes dictionary
    2. Write jpg format file of traced ground truth files
    Returns: dictionary key=gt jpg path, value=[gt inner, gt outer, area_min]
    '''
    inner_outer_dict = {}
    for fpath, nodes in gt_nodes_dict.items():
        area_min = nodes[1]
        inner,outer,img = gt_nodes_ct_contours(fpath, nodes[0], area_min)
        cv2.imwrite(fpath.replace("jpg_gt_files", "jpg_gt_ct_files"),img)
        inner_outer_dict[fpath] = [inner,outer,area_min]
    return inner_outer_dict

### Computation for the properties of ground truth nodes in fd graph

In [27]:
def gt_path2fd_jpg_path_batch(inner_outer_dict,fd_list):
    '''
    Convert the file name of ground truth jpg file to fd algorithm file
    Returns: dictionary key=fd path, value=[gt inner, gt outer, area_min]
    '''
    # ../../../Data/jpg/jpg_gt_files/Uniform/n=200d=10.jpg
    result_dict = {}
    for fpath, nodes in inner_outer_dict.items():
        for fd in fd_list:
            fnlist = fpath.split("/")
            file_dir = os.path.join(data_path, "jpg", "jpg_fd_files", fnlist[-2], fd)
            for root, dirs, files in os.walk(file_dir): 
                for file in files: 
                    if fnlist[-1].replace(".jpg","") in os.path.splitext(file)[0]: 
                        print(os.path.join(root,file))
                        result_dict[os.path.join(root,file)] = nodes
    return result_dict

In [28]:
def get_accuracy_batch(inner_outer_dict, fd_list):
    '''
    GT_GN：利用GroundTruth生成的Graph中在大于某个面积的洞的点
    GT_LN：利用GroundTruth生成的Graph中不在大于某个面积的洞的点
    TP（真阳性）= GT_GN in FD-CT/FD-CCL contours
    TN（真阴性）= GT_LN not in FD-CT/FD-CCL contours
    FP（假阳性）= GT_LN in FD-CT/FD-CCL contours
    FN（假阴性）= GT_GN not in FD-CT/FD-CCL contours
    Get TP TN FP FN based on the fd jpg path, inner and outer nodes of ground truth graph
    Args: gt_gn_ln_dict[jpg_fd_fpath] = [gt_gn, gt_ln]
    Returns: dict key=fd_path, value=[TP,TN,FP,FN]
    '''
    nodes_dict = {}
    TPR_dict = {}
    TNR_dict = {}
    PPV_dict = {} #
    NPV_dict = {} # Negative Precision Value
    # Create fd jpg path based on the ground truth path
    fd_files_dict = gt_path2fd_jpg_path_batch(inner_outer_dict, fd_list)
    for fd_path, inner_outer in fd_files_dict.items():
            print("================== Start processing: " + fd_path + " area_min: " + str(inner_outer[2]) + "=======================")
            # print("================== CT TP FP =================================================")
            # for inner nodes of GT if in or not in FD graph 
            gn_nodes_inner_fdct, gn_nodes_outer_fdct, painted_gn_img = gt_nodes_ct_contours(fd_path, inner_outer[0], inner_outer[2])
            # print("================== CT FN TN =================================================")
            # for outer nodes of GT if in or not in FD graph
            ln_nodes_inner_fdct, ln_nodes_outer_fdct, painted_ln_img = gt_nodes_ct_contours(fd_path, inner_outer[1], inner_outer[2])
        #     print("================== CCL N4 TP FP =================================================")
        #     # for inner nodes of GT if in or not in FD graph 
        #     gn_nodes_inner_fdccl, gn_nodes_outer_fdccl, painted_gn_img = gt_nodes_ccl_contours(fd_path, inner_outer[0], inner_outer[2],4)
        #     print("================== CCL N4 FN TN =================================================")
        #     # for outer nodes of GT if in or not in FD graph
        #     ln_nodes_inner_fdccl, ln_nodes_outer_fdccl, painted_ln_img = gt_nodes_ccl_contours(fd_path, inner_outer[1], inner_outer[2],4)
            print("================== Finish processing: "+ fd_path +" ======================")
            # Save image
            cv2.imwrite(fd_path.replace("jpg_fd_files","jpg_fd_files_gn_ct"), painted_gn_img)
            cv2.imwrite(fd_path.replace("jpg_fd_files","jpg_fd_files_ln_ct"), painted_ln_img)
            TP,FP,FN,TN = len(gn_nodes_inner_fdct),len(gn_nodes_outer_fdct),len(ln_nodes_inner_fdct),len(ln_nodes_outer_fdct)
            nodes_dict[fd_path] = [gn_nodes_inner_fdct, gn_nodes_outer_fdct, ln_nodes_inner_fdct, ln_nodes_outer_fdct]
            TPR_dict[fd_path] = TP/(TP+FN)
            TNR_dict[fd_path] = TN/(TN+FP)
            PPV_dict[fd_path] = TP/(TP+FP)
            NPV_dict[fd_path] = TN/(TN+FN)
    return nodes_dict, TPR_dict, TNR_dict, PPV_dict, NPV_dict


In [29]:
def main_process(node_list, degree_list, fd_list, data_path, ground_truth_folder, jpg_gt_files_folder, type_folder):
    '''
    Main process for getting properties and generating painted graph of ground truth data
    Args:
        node_list: the number of nodes you want to generate
        degree_list: the degree you want to generate
        fd_list: the force-directed algorithm you want to test with ground truth data
    Returns:

    '''
    accuracy_dict = {}
    # test get all the files' path with all nodes through nodes number and degrees
    # gt_nodes_dict[gt_jpg_path] = [all_nodes_in_gt, area_min]
    gt_nodes_dict = add_gt_files_main(node_list, degree_list, data_path, ground_truth_folder, jpg_gt_files_folder, type_folder)
    print(gt_nodes_dict)

    # 1. Get the nodes in or not in the contours whose area is smaller than area_min from ground truth nodes dictionary
    # 2. Write jpg format file of traced ground truth files
    # gt_inner_outer_dict[fd_jpg_path] = [gt_inner_nodes, gt_outer_nodes, area_min]
    gt_inner_outer_dict = gt_ct_batch_main(gt_nodes_dict)
    print(gt_inner_outer_dict)
    # test get properties
    # nodes_dict[fd_jpg_path] = [
    #           the list of gt inner nodes in fd contours,
    #           the list of gt inner nodes not in fd contours,
    #           the list of gt outer nodes in fd contours,
    #           the list of gt outer nodes not in fd contours
    #       ]
    # TPR_dict: Sensitivity, TNR_dict: Specificity, PPV_dict:Precision, NPV_dict:Negative Precision
    nodes_dict, TPR_dict, TNR_dict, PPV_dict, NPV_dict =  get_accuracy_batch(gt_inner_outer_dict, fd_list)
    fd_path_list = []
    TPR_list = []
    TNR_list = []
    PPV_list = []
    NPV_list = []
    for fpath,nodes in nodes_dict.items():
        fd_path_list.append(fpath)
        TPR_list.append(TPR_dict[fpath])
        TNR_list.append(TNR_dict[fpath])
        PPV_list.append(PPV_dict[fpath])
        NPV_list.append(NPV_dict[fpath])
        accuracy_dict['FD graph'] = fd_path_list
        accuracy_dict['TPR Sensitivity'] = TPR_list
        accuracy_dict['TNR Specificity'] = TNR_list
        accuracy_dict['PPV Precision'] = PPV_list
        accuracy_dict['NPV Negative Precision'] = NPV_list
    return accuracy_dict



In [30]:
# batch test
# node_list = [500]

node_list = [200, 500, 1000, 2000, 3000]

# degree_list = [6]

degree_list = [6, 8, 10, 12, 15]
# fd_list = ["FR","FA2","JIGGLE","KK"]
fd_list = ["FR","KK","FA2","JIGGLE"]
type_folder = uniform_folder
accuracy_dict = main_process(node_list,degree_list,fd_list,
                             data_path = data_path, ground_truth_folder = ground_truth_folder, 
                             jpg_gt_files_folder = jpg_gt_files_folder,
                             type_folder = type_folder)


{'../../../Data/jpg/jpg_gt_files/Uniform/n=200d=6.jpg': [[(425, 616), (455, 447), (387, 105), (128, 520), (671, 8), (607, 421), (374, 795), (231, 499), (68, 603), (791, 96), (96, 383), (77, 309), (389, 565), (62, 88), (536, 153), (209, 125), (36, 539), (235, 759), (79, 154), (329, 411), (392, 477), (337, 563), (476, 568), (64, 227), (131, 0), (69, 528), (148, 691), (396, 303), (721, 768), (649, 681), (273, 415), (570, 641), (531, 232), (279, 511), (116, 131), (650, 244), (349, 3), (694, 154), (255, 58), (486, 25), (603, 8), (493, 162), (638, 586), (609, 74), (595, 175), (720, 574), (265, 684), (782, 278), (765, 747), (15, 99), (367, 689), (145, 466), (42, 306), (48, 450), (585, 728), (192, 458), (791, 511), (674, 655), (449, 299), (405, 680), (77, 263), (592, 592), (757, 168), (675, 598), (317, 496), (0, 765), (132, 291), (65, 760), (749, 76), (171, 74), (14, 623), (113, 187), (769, 26), (768, 480), (295, 700), (103, 434), (656, 133), (297, 572), (262, 293), (392, 600), (361, 277), (21

In [None]:
import pandas as pd
result_df = pd.DataFrame.from_dict(accuracy_dict)
result_df.to_csv("csv/"+type_folder+"/result_uniform.csv",index=False)
