In [54]:
import numpy as np
import pandas as pd
import os
import pickle
import xlsxwriter
from os.path import join as PT

In [2]:
CODE = "5"
TREE_BASE_DIR = "Tree_Structure/"
SEGMENTS_BASE_DIR = "output_fixation/"+CODE+"/"+"freq_segment/data/"
OUTPUT_DIR = "output_fixation/"+CODE+"/"+"tree_segment/"
VISUAL_DIR = "visual_plots/"
THRESHOLD = 0.05
color_map = {0 : "#dfe7f9", 1 : "#dae5d5", 2 : "#fff2cc", 3 : "#f8cecc", 4: "#e1d5e7", 5: "#e1d537", 6 : "#60a917",
            7 : "#0050ef", 8 : "#a20025"}

In [3]:
if not os.path.exists(os.path.join(OUTPUT_DIR)):
    os.mkdir(os.path.join(OUTPUT_DIR))
if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE)):
    os.mkdir(os.path.join(TREE_BASE_DIR,CODE))
if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR)):
    os.mkdir(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR))

# Tree Data Generation

In [4]:
class Node:
    def __init__(self,name=None,node_num=0):
        self.node_num = node_num
        self.name = name
        self.children = []
        self.lines = []
        
    def insert(self,child):
        self.children.append(child)
    
    def traverse(self):
        print(self)
        for child in self.children:
            child.traverse()
    
    def __str__(self):
        return "["+str(self.node_num)+" : "+self.name+" : "+str(self.lines)\
    +" : "+str([x.node_num for x in self.children])+"]"

In [5]:
def read_and_create_tree(file):
    with open(file) as f:
        nodes = []
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            statements = line.split(" ")
            if statements[0] == "CREATE":
                node = Node(statements[1],len(nodes))
                num_lines = int(statements[2])
                for i in range(num_lines):
                    node.lines.append(int(statements[i+3]))
                nodes.append(node)
            elif statements[0] == "INSERT":
                parent = int(statements[1])
                child = int(statements[2])
                nodes[parent].insert(nodes[child])
            else:
                print("Error: Invalid command!!")
                break
        return nodes[0]

In [44]:
def get_line_to_node_mapping(root,mappings):
    for line in root.lines:
        if line in mappings:
            print("Error : Line "+str(line)+" mapped twice")
            return
        mappings[line] = root
    for child in root.children:
        get_line_to_node_mapping(child,mappings)
    return

def init_node_counts(root,counts):
    counts[root] = 0
    for child in root.children:
        init_node_counts(child,counts)
        
def get_nodes(root,all_nodes):
    all_nodes[root.node_num] = root
    for child in root.children:
        get_nodes(child,all_nodes)
        
def get_major_components(data):
    major = []
    data = np.array(data)
    total = np.sum(data[:,1])
    for row in data:
        if row[1]/total >= THRESHOLD:
            major.append(row[0])
    return major

def update_node_counts(root,node_counts,major_lines,aggregate='min'):
    for child in root.children:
        update_node_counts(child,node_counts,major_lines)
    to_add = False
    for line in root.lines:
        if line in major_lines:
            to_add = True
            break
    if to_add:
        node_counts[root] = node_counts[root] + 1
    if len(root.lines) == 0:
        cts_child = []
        for child in root.children:
            cts_child.append(node_counts[child])
        cts_child = np.array(cts_child)
        if aggregate == 'min':
            func = np.min
        elif aggregate == 'max':
            func = np.max
        elif aggregate == 'sum':
            func = np.sum
        else:
            print("Error : Invalid aggregate!!")
            return
        node_counts[root] = func(cts_child)

In [47]:
root = read_and_create_tree(os.path.join(TREE_BASE_DIR,CODE,CODE+".tree"))
all_nodes = {}
get_nodes(root,all_nodes)
all_nodes

{0: <__main__.Node at 0x7fe7b5b23160>,
 1: <__main__.Node at 0x7fe7b5b238d0>,
 2: <__main__.Node at 0x7fe7b5b239b0>,
 3: <__main__.Node at 0x7fe7b5b23a20>,
 4: <__main__.Node at 0x7fe7b5b23b00>,
 5: <__main__.Node at 0x7fe7b5b23b70>,
 6: <__main__.Node at 0x7fe7b5b23c50>,
 7: <__main__.Node at 0x7fe7b5b23ef0>}

In [8]:
root.traverse()

[0 : root : [] : [1, 2]]
[1 : statement : [1, 2, 3] : []]
[2 : func : [] : [3, 4, 5]]
[3 : statement : [4, 5, 6, 7] : []]
[4 : loop : [] : [6, 7]]
[6 : statement : [8, 9, 10] : []]
[7 : if : [11, 12, 13, 14] : []]
[5 : if : [16, 17, 18, 19] : []]


In [31]:
for file in os.listdir(os.path.join(SEGMENTS_BASE_DIR)):
    print(file)
    xls = pd.read_excel(os.path.join(SEGMENTS_BASE_DIR,file),sheetname=None)
    segment_status = []
    node_counts = {}
    init_node_counts(root,node_counts)
    for segment,data in xls.items():
        major_lines = get_major_components(data)
        print(major_lines)
        update_node_counts(root,node_counts,major_lines)
        segment_status.append(dict(node_counts))
    with open(os.path.join(OUTPUT_DIR,file[:-4]+"_segment_status.pkl"),'wb') as f:
        pickle.dump(segment_status,f)

final_matrix_newfix_Rakesh_2.xlsx
[1, 5, 7, 11, 12, 13, 14]
[20, 21, 22, 26, 28]
[23, 24, 27, 28, 33]
[21, 22, 25, 26, 27, 29]
[7, 15, 27, 30]
final_matrix_newfix_Vamsi2.xlsx
[2, 5, 6, 8, 9, 10]
[12, 18, 20, 22, 23, 25, 26]
[1, 7, 8]
[5, 6, 7, 15, 22]
[11, 14, 17, 18, 19, 20, 22]
[5, 9, 14, 15, 21, 22, 23, 24, 25, 27, 28]
[2, 10, 11, 14, 15]
[22, 23, 26, 28]
[6, 11, 20, 21, 23, 24, 26, 27, 29, 31, 33]
[11, 12]
final_matrix_newfix_Mayur2.xlsx
[2, 3, 4, 6, 13, 20, 21, 23]
[2, 5, 6, 7, 8, 9, 10, 11]
[11, 12, 13, 14, 16, 17, 20, 21, 26]
[13, 14, 15, 25, 27]
[10, 11, 27, 30, 32]
[27, 28, 32, 34, 36, 37]
[11, 35, 36, 37]
[27, 28, 29, 30, 34]
[28, 29, 31, 32, 33, 34]
[19, 20, 21, 22, 35, 36, 37]
[18, 19, 20, 21, 35, 36]
[15, 16, 17, 18, 19, 20, 21]
[23, 29, 31, 32, 33, 34, 35]
final_matrix_newfix_anuraag2.xlsx
[1, 2, 3, 4, 5]
[7, 11, 15, 17, 20]
[13, 14, 16, 19]
[17, 18, 19, 20]
[16, 18, 19, 20, 21, 22]
[21, 22, 27, 30]
[22, 25, 26, 27, 28, 31]
[13, 16, 22, 28, 36, 37]
[13, 14, 18, 19, 20, 22

In [32]:
for node,ct in segment_status[5].items():
    print(node,ct)

[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 3
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 4
[4 : if : [12, 13, 14, 15] : []] 3
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 1
[5 : statement : [18, 19, 20] : []] 3
[6 : loop : [21, 22, 23, 24, 25] : []] 2
[7 : loop-error : [27, 28, 29, 30, 31] : []] 2
[8 : loop-for : [32, 33, 34, 35, 36] : []] 1


In [33]:
for file in os.listdir(os.path.join(OUTPUT_DIR)):
    print("Subject - ",file)
    with open(os.path.join(OUTPUT_DIR,file),'rb') as f:
        seg_stats = pickle.load(f)
    for i,seg_stat in enumerate(seg_stats):
        print("Segment : ",str(i+1))
        for node,ct in seg_stat.items():
            print(node,ct)
    print("==============================================================================")

Subject -  final_matrix_newfix_rajdeep2._segment_status.pkl
Segment :  1
[0 : root : [] : [1, 2]] 0
[1 : func-main : [] : [3, 4]] 0
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 1
[4 : if : [12, 13, 14, 15] : []] 0
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 0
[5 : statement : [18, 19, 20] : []] 0
[6 : loop : [21, 22, 23, 24, 25] : []] 0
[7 : loop-error : [27, 28, 29, 30, 31] : []] 0
[8 : loop-for : [32, 33, 34, 35, 36] : []] 0
Segment :  2
[0 : root : [] : [1, 2]] 0
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 2
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 0
[5 : statement : [18, 19, 20] : []] 1
[6 : loop : [21, 22, 23, 24, 25] : []] 1
[7 : loop-error : [27, 28, 29, 30, 31] : []] 0
[8 : loop-for : [32, 33, 34, 35, 36] : []] 0
Segment :  3
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 2
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagr

# Tree Graph Generation

In [34]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

In [35]:
import networkx as nx
from networkx.drawing.nx_agraph import write_dot, graphviz_layout
G = nx.Graph()

In [36]:
def read_and_create_graph(file):
    G = nx.DiGraph()
    terminal_nodes = []
    labels = {}
    with open(file) as f:
        ct = 0
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            statements = line.split(" ")
            if statements[0] == "CREATE":
                label = statements[1]
                G.add_node(ct)                
                num_lines = int(statements[2])
                
                if num_lines != 0:
                    terminal_nodes.append(ct)
                    label += '\n'
                    label += statements[3]
                    label += " - "
                    label += statements[-1]
                    
                labels[ct] = label
                ct += 1
            elif statements[0] == "INSERT":
                parent = int(statements[1])
                child = int(statements[2])
                G.add_edge(parent,child)
                
                #nodes[parent].insert(nodes[child])
            else:
                print("Error: Invalid command!!")
                break
        return G, terminal_nodes, ct, labels

In [37]:
G, terminal_nodes, total_nodes, labels = read_and_create_graph(os.path.join(TREE_BASE_DIR,CODE,CODE+".tree"))
node_color = [('#dae5d5' if node in terminal_nodes else '#dfe7f9') for node in range(total_nodes)]

In [38]:
plt.figure(figsize=(20,15))
pos=graphviz_layout(G, prog='dot')
nx.draw(G,pos,arrows=False,node_size=20000,node_color=node_color,labels=labels,font_size=22)
plt.suptitle("Code %s Tree" % str(CODE),fontsize = 36)
plt.savefig(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,CODE+".png"))



In [39]:
patches = []
num_colors = len(color_map)
for i in range(num_colors-1):
    pt = mpatches.Patch(color=color_map[i],label=str(i))
    patches.append(pt)
patches.append(mpatches.Patch(color=color_map[num_colors-1],label=str(num_colors-1)+"+"))

In [40]:
for file in os.listdir(os.path.join(OUTPUT_DIR)):
    sub_name = file[:-20]
    if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name)):
        os.mkdir(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name))
    print("Subject - ",sub_name)
    with open(os.path.join(OUTPUT_DIR,file),'rb') as f:
        seg_stats = pickle.load(f)
    for i,seg_stat in enumerate(seg_stats):
        seg_labels = dict(labels)
        for node, label in seg_labels.items():
            seg_labels[node] += "\n"
        segment_num = str(i+1)
        print("Segment : ",segment_num)
        for node,ct in seg_stat.items():
            node_num = node.node_num
            seg_labels[node_num] += "Count : " + str(ct)
            if ct in color_map:
                node_color[node_num] = color_map[ct]
            else:
                node_color[node_num] = color_map[len(color_map)-1]
            print(node,ct)
        plt.figure(figsize=(20,15))
        pos=graphviz_layout(G, prog='dot')
        nx.draw(G,pos,arrows=False,node_size=20000,node_color=node_color,labels=seg_labels,font_size=22)
        plt.suptitle("Segment %d" % (i+1),fontsize = 36)
        plt.legend(handles=patches,fontsize=24)
        plt.savefig(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name,segment_num+".png"))
    print("==============================================================================")

Subject -  final_matrix_newfix_rajdeep2
Segment :  1
[0 : root : [] : [1, 2]] 0
[1 : func-main : [] : [3, 4]] 0
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 1
[4 : if : [12, 13, 14, 15] : []] 0
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 0
[5 : statement : [18, 19, 20] : []] 0
[6 : loop : [21, 22, 23, 24, 25] : []] 0
[7 : loop-error : [27, 28, 29, 30, 31] : []] 0
[8 : loop-for : [32, 33, 34, 35, 36] : []] 0




Segment :  2
[0 : root : [] : [1, 2]] 0
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 2
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 0
[5 : statement : [18, 19, 20] : []] 1
[6 : loop : [21, 22, 23, 24, 25] : []] 1
[7 : loop-error : [27, 28, 29, 30, 31] : []] 0
[8 : loop-for : [32, 33, 34, 35, 36] : []] 0
Segment :  3
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 2
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 1
[5 : statement : [18, 19, 20] : []] 1
[6 : loop : [21, 22, 23, 24, 25] : []] 1
[7 : loop-error : [27, 28, 29, 30, 31] : []] 1
[8 : loop-for : [32, 33, 34, 35, 36] : []] 1
Segment :  4
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 2
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 3
[4 : if : [12, 13, 14, 15] : []] 2
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 1
[5 : statement : [18, 19, 20] : []

Segment :  10
[0 : root : [] : [1, 2]] 2
[1 : func-main : [] : [3, 4]] 5
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 7
[4 : if : [12, 13, 14, 15] : []] 5
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 2
[5 : statement : [18, 19, 20] : []] 3
[6 : loop : [21, 22, 23, 24, 25] : []] 5
[7 : loop-error : [27, 28, 29, 30, 31] : []] 4
[8 : loop-for : [32, 33, 34, 35, 36] : []] 2
Segment :  11
[0 : root : [] : [1, 2]] 2
[1 : func-main : [] : [3, 4]] 6
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 8
[4 : if : [12, 13, 14, 15] : []] 6
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 2
[5 : statement : [18, 19, 20] : []] 4
[6 : loop : [21, 22, 23, 24, 25] : []] 5
[7 : loop-error : [27, 28, 29, 30, 31] : []] 4
[8 : loop-for : [32, 33, 34, 35, 36] : []] 2
Segment :  12
[0 : root : [] : [1, 2]] 2
[1 : func-main : [] : [3, 4]] 7
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 9
[4 : if : [12, 13, 14, 15] : []] 7
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 2
[5 : statement : [18, 19, 20] :

Subject -  final_matrix_newfix_asish2
Segment :  1
[0 : root : [] : [1, 2]] 0
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 1
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 0
[5 : statement : [18, 19, 20] : []] 0
[6 : loop : [21, 22, 23, 24, 25] : []] 0
[7 : loop-error : [27, 28, 29, 30, 31] : []] 0
[8 : loop-for : [32, 33, 34, 35, 36] : []] 0
Segment :  2
[0 : root : [] : [1, 2]] 0
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 1
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 0
[5 : statement : [18, 19, 20] : []] 1
[6 : loop : [21, 22, 23, 24, 25] : []] 1
[7 : loop-error : [27, 28, 29, 30, 31] : []] 0
[8 : loop-for : [32, 33, 34, 35, 36] : []] 0
Segment :  3
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 1
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagram : [] : [5, 6, 7, 8]

Segment :  8
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 2
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 2
[4 : if : [12, 13, 14, 15] : []] 3
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 1
[5 : statement : [18, 19, 20] : []] 4
[6 : loop : [21, 22, 23, 24, 25] : []] 4
[7 : loop-error : [27, 28, 29, 30, 31] : []] 3
[8 : loop-for : [32, 33, 34, 35, 36] : []] 1
Segment :  9
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 2
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 2
[4 : if : [12, 13, 14, 15] : []] 4
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 1
[5 : statement : [18, 19, 20] : []] 5
[6 : loop : [21, 22, 23, 24, 25] : []] 5
[7 : loop-error : [27, 28, 29, 30, 31] : []] 3
[8 : loop-for : [32, 33, 34, 35, 36] : []] 1
Segment :  10
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 2
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 2
[4 : if : [12, 13, 14, 15] : []] 4
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 1
[5 : statement : [18, 19, 20] : [

Segment :  3
[0 : root : [] : [1, 2]] 1
[1 : func-main : [] : [3, 4]] 1
[3 : statement : [3, 4, 5, 6, 7, 8, 9, 10, 11] : []] 1
[4 : if : [12, 13, 14, 15] : []] 1
[2 : func-find_anagram : [] : [5, 6, 7, 8]] 1
[5 : statement : [18, 19, 20] : []] 2
[6 : loop : [21, 22, 23, 24, 25] : []] 2
[7 : loop-error : [27, 28, 29, 30, 31] : []] 1
[8 : loop-for : [32, 33, 34, 35, 36] : []] 1
