In [2]:
import numpy as np
import pandas as pd
import os
import pickle
import xlsxwriter
from os.path import join as PT
import math

In [45]:
CODE = "37"
TREE_BASE_DIR = "Tree_Structure/"
SEGMENTS_BASE_DIR = "output_fixation/"+CODE+"/"+"freq_segment/data/"
OUTPUT_DIR = "output_fixation/"+CODE+"/"+"tree_segment/weighted/"
VISUAL_DIR = "weighted_visual_plots/"
THRESHOLD = 0.05
color_map = {0 : "#dfe7f9", 1 : "#dae5d5", 2 : "#fff2cc", 3 : "#f8cecc", 4: "#e1d5e7", 5: "#e1d537", 6 : "#60a917",
            7 : "#0050ef", 8 : "#a20025"}
color_map_code = {0 : "#d80073", 1 : "#aa00ff", 2 : "#647687", 3 : "#6d8764", 4: "#fa6800", 5: "#e1d537", 6 : "#60a917",
            7 : "#0050ef", 8 : "#a20025"}

In [46]:
if not os.path.exists(os.path.join(OUTPUT_DIR)):
    os.mkdir(os.path.join(OUTPUT_DIR))
if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE)):
    os.mkdir(os.path.join(TREE_BASE_DIR,CODE))
if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR)):
    os.mkdir(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR))

In [47]:
class Node:
    def __init__(self,name=None,node_num=0):
        self.node_num = node_num
        self.name = name
        self.children = []
        self.lines = []
        
    def insert(self,child):
        self.children.append(child)
    
    def traverse(self):
        print(self)
        for child in self.children:
            child.traverse()
    
    def __str__(self):
        return "["+str(self.node_num)+" : "+self.name+" : "+str(self.lines)\
    +" : "+str([x.node_num for x in self.children])+"]"

In [48]:
def read_and_create_tree(file):
    with open(file) as f:
        nodes = []
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            statements = line.split(" ")
            if statements[0] == "CREATE":
                node = Node(statements[1],len(nodes))
                num_lines = int(statements[2])
                for i in range(num_lines):
                    node.lines.append(int(statements[i+3]))
                nodes.append(node)
            elif statements[0] == "INSERT":
                parent = int(statements[1])
                child = int(statements[2])
                nodes[parent].insert(nodes[child])
            else:
                print("Error: Invalid command!!")
                break
        return nodes[0]

In [49]:
def get_line_to_node_mapping(root,mappings):
    for line in root.lines:
        if line in mappings:
            print("Error : Line "+str(line)+" mapped twice")
            return
        mappings[line] = root
    for child in root.children:
        get_line_to_node_mapping(child,mappings)
    return

def init_node_counts(root,counts):
    counts[root] = 0
    for child in root.children:
        init_node_counts(child,counts)
        
def get_nodes(root,all_nodes):
    all_nodes[root.node_num] = root
    for child in root.children:
        get_nodes(child,all_nodes)
        
def get_major_components(data):
    major = []
    data = np.array(data)
    total = np.sum(data[:,1])
    for row in data:
        if row[1]/total >= THRESHOLD:
            major.append(row[0])
    return major

def update_node_counts(root,node_counts,major_lines,node_weights):
    for child in root.children:
        update_node_counts(child,node_counts,major_lines,node_weights)
    to_add = False
    for line in root.lines:
        if line in major_lines:
            to_add = True
            break
    if to_add:
        node_counts[root] = node_counts[root] + 1
    if len(root.lines) == 0:
        tot = 0
        for child in root.children:
            tot += node_counts[child] * node_weights[child][0]
        node_counts[root] = tot
        
def find_parents(root,prevNode,parents):
    parents[root] = prevNode
    for child in root.children:
        find_parents(child,root,parents)

def form_node_weights(root,line_weights,node_weights):
    if len(root.children) == 0:
        tot = 0
        for line in root.lines:
            tot += line_weights[line-1]
        node_weights[root] = (tot/len(root.lines),len(root.lines))
    else:
        tot = 0
        tot_lines = 0
        for child in root.children:
            form_node_weights(child,line_weights,node_weights)
            tot += node_weights[child][0]*node_weights[child][1]
            tot_lines += node_weights[child][1]
        node_weights[root] = (tot/tot_lines,tot_lines)        

In [50]:
root = read_and_create_tree(os.path.join(TREE_BASE_DIR,CODE,CODE+".tree"))
all_nodes = {}
get_nodes(root,all_nodes)
all_nodes

{0: <__main__.Node at 0x7f0ba07dee80>,
 1: <__main__.Node at 0x7f0ba07567f0>,
 2: <__main__.Node at 0x7f0bac155470>,
 3: <__main__.Node at 0x7f0bac155240>,
 4: <__main__.Node at 0x7f0bac1553c8>,
 5: <__main__.Node at 0x7f0bac155198>,
 6: <__main__.Node at 0x7f0bac155278>,
 7: <__main__.Node at 0x7f0bac155a90>,
 8: <__main__.Node at 0x7f0bac1554a8>,
 9: <__main__.Node at 0x7f0bac155208>,
 10: <__main__.Node at 0x7f0bac1558d0>,
 11: <__main__.Node at 0x7f0bac155160>,
 12: <__main__.Node at 0x7f0bac1554e0>,
 13: <__main__.Node at 0x7f0bac1552b0>,
 14: <__main__.Node at 0x7f0bac1556d8>,
 15: <__main__.Node at 0x7f0bac155128>}

In [51]:
root.traverse()

[0 : root : [] : [1, 2, 3, 4, 5, 6]]
[1 : statement : [1, 2, 3] : []]
[2 : struct : [4, 5, 6, 7, 8, 9] : []]
[3 : func-dateUpdate : [] : [7, 8, 9]]
[7 : statement : [10, 11, 12, 13] : []]
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []]
[9 : statement : [26, 27] : []]
[4 : func-noOfDays : [] : [10, 11, 12]]
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []]
[11 : if : [37, 38, 39, 40] : []]
[12 : statement : [41, 42] : []]
[5 : func-isLeapYear : [] : [13, 14, 15]]
[13 : statement : [43, 44, 45] : []]
[14 : if : [46, 47, 48, 49, 50] : []]
[15 : statement : [50, 51] : []]
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []]


In [52]:
line_weights = []
with open(PT(TREE_BASE_DIR,CODE,CODE+"_complexity.txt")) as f:
    for line in f:
        if line[-1] == '\n':
            line = line[:-1]
        line_weights.append(float(line))
line_weights

[0.5,
 0.5,
 0.5,
 0.5,
 0.5,
 1.0,
 1.0,
 1.0,
 0.5,
 0.5,
 0.5,
 1.0,
 1.0,
 2.0,
 0.5,
 1.0,
 0.5,
 2.0,
 0.5,
 1.0,
 0.5,
 0.5,
 0.5,
 1.0,
 0.5,
 1.0,
 0.5,
 0.5,
 0.5,
 1.0,
 1.0,
 2.0,
 0.5,
 0.5,
 0.5,
 0.5,
 2.0,
 1.0,
 0.5,
 1.0,
 1.0,
 0.5,
 0.5,
 0.5,
 1.0,
 2.0,
 1.0,
 0.5,
 1.0,
 1.0,
 0.5,
 0.5,
 0.5,
 1.0,
 1.0,
 0.5,
 0.5,
 2.0,
 0.5,
 1.0,
 0.5]

In [53]:
node_weights = {}
form_node_weights(root,line_weights,node_weights)
for k,v in node_weights.items():
    print(k,v)

[1 : statement : [1, 2, 3] : []] (0.5, 3)
[2 : struct : [4, 5, 6, 7, 8, 9] : []] (0.75, 6)
[7 : statement : [10, 11, 12, 13] : []] (0.75, 4)
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] (0.875, 12)
[9 : statement : [26, 27] : []] (0.75, 2)
[3 : func-dateUpdate : [] : [7, 8, 9]] (0.8333333333333334, 18)
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] (0.7777777777777778, 9)
[11 : if : [37, 38, 39, 40] : []] (1.125, 4)
[12 : statement : [41, 42] : []] (0.75, 2)
[4 : func-noOfDays : [] : [10, 11, 12]] (0.8666666666666667, 15)
[13 : statement : [43, 44, 45] : []] (0.6666666666666666, 3)
[14 : if : [46, 47, 48, 49, 50] : []] (1.1, 5)
[15 : statement : [50, 51] : []] (0.75, 2)
[5 : func-isLeapYear : [] : [13, 14, 15]] (0.9, 10)
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] (0.8, 10)
[0 : root : [] : [1, 2, 3, 4, 5, 6]] (0.8225806451612904, 62)


In [54]:
for file in os.listdir(os.path.join(SEGMENTS_BASE_DIR)):
    print(file)
    xls = pd.read_excel(os.path.join(SEGMENTS_BASE_DIR,file),sheetname=None)
    segment_status = []
    node_counts = {}
    init_node_counts(root,node_counts)
    for segment,data in xls.items():
        major_lines = get_major_components(data)
        print(major_lines)
        update_node_counts(root,node_counts,major_lines,node_weights)
        segment_status.append(dict(node_counts))
    with open(os.path.join(OUTPUT_DIR,file[:-4]+"_segment_status.pkl"),'wb') as f:
        pickle.dump(segment_status,f)

final_matrix_newfix_Gargi3.xlsx
[1, 2, 3, 4, 5, 10, 11, 14]
[9, 11, 17, 18, 19, 20]
[10, 11, 12, 13, 18]
[10, 11, 12, 13]
[12, 13, 14, 20]
[12, 13, 14, 16, 17]
[12, 13, 20, 28, 32, 37]
[18, 19, 22, 25, 27, 28, 30]
[22, 28, 29, 30, 31, 32, 34]
[28, 30, 31, 32, 35, 36, 37, 39]
[3, 9, 23, 25, 27, 28, 29, 30, 31]
[26, 29, 31, 34, 35, 36, 37]
[29, 32, 33, 34, 35, 38, 40]
[45, 46, 54, 55, 56, 57, 58]
[13, 14, 15, 16, 17, 18, 19, 20, 23]
[9, 10, 13, 14, 16, 24]
[30, 31, 33]
[30]
final_matrix_newfix_subham3.xlsx
[12, 13, 17, 23, 37]
[39, 40, 46, 47]
[45, 46, 55, 56, 57, 58, 59]
[55, 56, 57, 58, 59]
[40, 43]
final_matrix_newfix_atrayee3.xlsx
[24, 26, 27, 28]
[14, 24, 31, 33, 34, 37, 40]
[10, 11, 12, 13, 15, 16, 45]
[14, 17, 23, 26, 31, 32, 34, 35, 36, 37, 40]
final_matrix_newfix_rajdeep3.xlsx
[2, 3, 5, 6, 8, 12, 34, 36]
[13, 14, 16]
[14, 15, 18]
[30, 31, 32, 33, 35]
[13, 15, 16, 31, 36, 37, 45]
[30, 31, 53, 54, 55]
[25, 41, 43, 45, 54, 55, 56]
[46, 54, 56, 57]
[14, 28, 52, 55, 56, 57]
[12, 15, 



In [55]:
for file in os.listdir(os.path.join(OUTPUT_DIR)):
    print("Subject - ",file)
    with open(os.path.join(OUTPUT_DIR,file),'rb') as f:
        seg_stats = pickle.load(f)
    for i,seg_stat in enumerate(seg_stats):
        print("Segment : ",str(i+1))
        for node,ct in seg_stat.items():
            print(node,ct)
    print("==============================================================================")

Subject -  final_matrix_newfix_atrayee3._segment_status.pkl
Segment :  1
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 2.028240740740741
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 1.625
[7 : statement : [10, 11, 12, 13] : []] 0
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 1
[9 : statement : [26, 27] : []] 1
[4 : func-noOfDays : [] : [10, 11, 12]] 0.7777777777777778
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 1
[11 : if : [37, 38, 39, 40] : []] 0
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  2
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 4.406481481481482
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 2.5
[7

[7 : statement : [10, 11, 12, 13] : []] 5
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 4
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 1.5555555555555556
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 2
[11 : if : [37, 38, 39, 40] : []] 0
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.6666666666666666
[13 : statement : [43, 44, 45] : []] 1
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 1
Segment :  6
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 13.42888888888889
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 7.25
[7 : statement : [10, 11, 12, 13] : []] 5
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 4
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 3.4583333333333335
[10 : statement : [28, 29, 30, 31

# Tree Graph Generation

In [56]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

In [57]:
import networkx as nx
from networkx.drawing.nx_agraph import write_dot, graphviz_layout
G = nx.Graph()

In [58]:
def read_and_create_graph(file,node_weights,all_nodes):
    G = nx.DiGraph()
    terminal_nodes = []
    labels = {}
    with open(file) as f:
        ct = 0
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            statements = line.split(" ")
            if statements[0] == "CREATE":
                label = statements[1]
                G.add_node(ct)                
                num_lines = int(statements[2])
                
                if num_lines != 0:
                    terminal_nodes.append(ct)
                    label += '\n'
                    label += statements[3]
                    label += " - "
                    label += statements[-1]
                label += '\nWeight: '
                label += '{0:.2f}'.format(node_weights[all_nodes[ct]][0])
                labels[ct] = label
                ct += 1
            elif statements[0] == "INSERT":
                parent = int(statements[1])
                child = int(statements[2])
                G.add_edge(parent,child)
                
                #nodes[parent].insert(nodes[child])
            else:
                print("Error: Invalid command!!")
                break
        return G, terminal_nodes, ct, labels

In [59]:
G, terminal_nodes, total_nodes, labels = read_and_create_graph(os.path.join(TREE_BASE_DIR,CODE,CODE+".tree"),
                                                               node_weights,all_nodes)
node_color = [('#dae5d5' if node in terminal_nodes else '#dfe7f9') for node in range(total_nodes)]

In [60]:
plt.figure(figsize=(20,15))
pos=graphviz_layout(G, prog='dot')
nx.draw(G,pos,arrows=False,node_size=20000,node_color=node_color,labels=labels,font_size=20)
plt.suptitle("Code %s Tree" % str(CODE),fontsize = 36)
plt.savefig(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,CODE+".png"))



In [61]:
patches = []
num_colors = len(color_map)
for i in range(num_colors-1):
    pt = mpatches.Patch(color=color_map[i],label=str(i))
    patches.append(pt)
patches.append(mpatches.Patch(color=color_map[num_colors-1],label=str(num_colors-1)+"+"))

In [62]:
for file in os.listdir(os.path.join(OUTPUT_DIR)):
    sub_name = file[:-20]
    if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name)):
        os.mkdir(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name))
    print("Subject - ",sub_name)
    with open(os.path.join(OUTPUT_DIR,file),'rb') as f:
        seg_stats = pickle.load(f)
    for i,seg_stat in enumerate(seg_stats):
        seg_labels = dict(labels)
        for node, label in seg_labels.items():
            seg_labels[node] += "\n"
        segment_num = str(i+1)
        print("Segment : ",segment_num)
        for node,ct in seg_stat.items():
            node_num = node.node_num
            seg_labels[node_num] += "Count : " + '{0:.2f}'.format(ct)
            if int(ct) in color_map:
                node_color[node_num] = color_map[int(ct)]
            else:
                node_color[node_num] = color_map[len(color_map)-1]
            print(node,ct)
        plt.figure(figsize=(20,15))
        pos=graphviz_layout(G, prog='dot')
        nx.draw(G,pos,arrows=False,node_size=20000,node_color=node_color,labels=seg_labels,font_size=22)
        plt.suptitle("Segment %d" % (i+1),fontsize = 36)
        plt.legend(handles=patches,fontsize=24)
        plt.savefig(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name,segment_num+".png"))
    print("==============================================================================")

Subject -  final_matrix_newfix_atrayee3
Segment :  1
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 2.028240740740741
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 1.625
[7 : statement : [10, 11, 12, 13] : []] 0
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 1
[9 : statement : [26, 27] : []] 1
[4 : func-noOfDays : [] : [10, 11, 12]] 0.7777777777777778
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 1
[11 : if : [37, 38, 39, 40] : []] 0
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0




Segment :  2
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 4.406481481481482
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 2.5
[7 : statement : [10, 11, 12, 13] : []] 0
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 2
[9 : statement : [26, 27] : []] 1
[4 : func-noOfDays : [] : [10, 11, 12]] 2.6805555555555554
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 2
[11 : if : [37, 38, 39, 40] : []] 1
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  3
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 6.360648148148147
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 4.125
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : if : [14, 15, 1

Segment :  9
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 17.156203703703707
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 1
[3 : func-dateUpdate : [] : [7, 8, 9]] 6.625
[7 : statement : [10, 11, 12, 13] : []] 3
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 5
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 5.763888888888889
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 5
[11 : if : [37, 38, 39, 40] : []] 1
[12 : statement : [41, 42] : []] 1
[5 : func-isLeapYear : [] : [13, 14, 15]] 2.4333333333333336
[13 : statement : [43, 44, 45] : []] 2
[14 : if : [46, 47, 48, 49, 50] : []] 1
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 4
Segment :  10
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 19.184444444444445
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 1
[3 : func-dateUpdate : [] : [7, 8, 9]] 8.25
[7 : statement : [10, 11, 12, 13] : []] 4
[8

Segment :  4
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 7.2374074074074075
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 3.25
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 2
[9 : statement : [26, 27] : []] 1
[4 : func-noOfDays : [] : [10, 11, 12]] 0.7777777777777778
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 1
[11 : if : [37, 38, 39, 40] : []] 0
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 4.283333333333333
[13 : statement : [43, 44, 45] : []] 2
[14 : if : [46, 47, 48, 49, 50] : []] 2
[15 : statement : [50, 51] : []] 1
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  5
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 9.802407407407408
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 3.25
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : 

Segment :  6
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 9.395833333333334
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 8.875
[7 : statement : [10, 11, 12, 13] : []] 6
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 5
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 0.0
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 0
[11 : if : [37, 38, 39, 40] : []] 0
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  7
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 12.399074074074074
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 10.5
[7 : statement : [10, 11, 12, 13] : []] 7
[8 : if : [14, 15, 16, 17, 18, 19

Segment :  17
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 31.203425925925927
[1 : statement : [1, 2, 3] : []] 2
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 4
[3 : func-dateUpdate : [] : [7, 8, 9]] 18.625
[7 : statement : [10, 11, 12, 13] : []] 9
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 11
[9 : statement : [26, 27] : []] 3
[4 : func-noOfDays : [] : [10, 11, 12]] 10.722222222222221
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 8
[11 : if : [37, 38, 39, 40] : []] 4
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 1.7666666666666666
[13 : statement : [43, 44, 45] : []] 1
[14 : if : [46, 47, 48, 49, 50] : []] 1
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 1
Segment :  18
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 31.877500000000005
[1 : statement : [1, 2, 3] : []] 2
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 4
[3 : func-dateUpdate : [] : [7, 8, 9]] 18.625
[7 : statement : [10, 11, 12, 13] : []

Segment :  3
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 4.957407407407407
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 3.25
[7 : statement : [10, 11, 12, 13] : []] 2
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 2
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 1.9027777777777777
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 1
[11 : if : [37, 38, 39, 40] : []] 1
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.6666666666666666
[13 : statement : [43, 44, 45] : []] 1
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  4
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 6.311574074074073
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 4.875
[7 : statement : [10, 11, 12, 13] : []] 3
[8 :

Segment :  2
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 2.0833333333333335
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 2.5
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 2
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 0.0
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 0
[11 : if : [37, 38, 39, 40] : []] 0
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  3
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 3.6583333333333337
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 2.5
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : if : [14, 15, 16, 17, 18, 19, 

Segment :  7
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 16.75712962962963
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 8.125
[7 : statement : [10, 11, 12, 13] : []] 5
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 5
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 4.986111111111111
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 4
[11 : if : [37, 38, 39, 40] : []] 1
[12 : statement : [41, 42] : []] 1
[5 : func-isLeapYear : [] : [13, 14, 15]] 3.1833333333333336
[13 : statement : [43, 44, 45] : []] 2
[14 : if : [46, 47, 48, 49, 50] : []] 1
[15 : statement : [50, 51] : []] 1
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 1
Segment :  8
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 20.64620370370371
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 8.125
[7 : statement : [10, 11, 12, 13] : []] 5
[8 :

Segment :  18
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 44.88898148148149
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 11.375
[7 : statement : [10, 11, 12, 13] : []] 6
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 7
[9 : statement : [26, 27] : []] 1
[4 : func-noOfDays : [] : [10, 11, 12]] 17.930555555555557
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 11
[11 : if : [37, 38, 39, 40] : []] 5
[12 : statement : [41, 42] : []] 5
[5 : func-isLeapYear : [] : [13, 14, 15]] 16.3
[13 : statement : [43, 44, 45] : []] 9
[14 : if : [46, 47, 48, 49, 50] : []] 8
[15 : statement : [50, 51] : []] 2
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 4
Segment :  19
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 46.91722222222223
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 13.0
[7 : statement : [10, 11, 12, 13] : []] 7
[8 : if : [14,

Subject -  final_matrix_newfix_subham3
Segment :  1
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 2.3291666666666666
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 1.625
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 1
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 1.125
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 0
[11 : if : [37, 38, 39, 40] : []] 1
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  2
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 4.294166666666667
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 1.625
[7 : statement : [10, 11, 12, 13]

Segment :  2
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 3.024074074074074
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 1
[3 : func-dateUpdate : [] : [7, 8, 9]] 0.75
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 0
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 1.9027777777777777
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 1
[11 : if : [37, 38, 39, 40] : []] 1
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Segment :  3
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 5.224074074074074
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 1
[3 : func-dateUpdate : [] : [7, 8, 9]] 1.5
[7 : statement : [10, 11, 12, 13] : []] 2
[8 : if : [14, 15, 16

Segment :  6
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 10.064814814814817
[1 : statement : [1, 2, 3] : []] 2
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 6.5
[7 : statement : [10, 11, 12, 13] : []] 4
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 4
[9 : statement : [26, 27] : []] 0
[4 : func-noOfDays : [] : [10, 11, 12]] 1.5555555555555556
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 2
[11 : if : [37, 38, 39, 40] : []] 0
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.0
[13 : statement : [43, 44, 45] : []] 0
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 1
Segment :  7
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 12.32888888888889
[1 : statement : [1, 2, 3] : []] 2
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 2
[3 : func-dateUpdate : [] : [7, 8, 9]] 6.5
[7 : statement : [10, 11, 12, 13] : []] 4
[8 : if : [14, 15, 16

# Excel Genrator

In [63]:
workbook = xlsxwriter.Workbook(os.path.join(TREE_BASE_DIR,CODE,CODE+"_node_data_wegihted.xlsx"))
worksheet = workbook.add_worksheet()
cell_format = workbook.add_format({'align': 'center','valign' : 'centre'})
worksheet.write('A1',"S.No.",cell_format)
worksheet.write('B1',"Subjects",cell_format)
worksheet.write('C1',"No. of \nSegments",cell_format)
worksheet.set_row(0,30)
worksheet.set_column(2,2,30)

j = 0
nodenum_to_col_mapping = {}
for node_num, node in all_nodes.items():
    if len(node.children) == 0:
        continue
    worksheet.write(0,3+j,node.name,cell_format)
    nodenum_to_col_mapping[node_num] = 3+j
    j += 1

for i, file in enumerate(os.listdir(os.path.join(OUTPUT_DIR))):
    sub_name = file[20:-20]
    print("Subject - ",sub_name)
    print("Last Segment Details")
    with open(os.path.join(OUTPUT_DIR,file),'rb') as f:
        seg_stats = pickle.load(f)
    seg_stat = seg_stats[-1]
    num_segs = len(seg_stats)
    row_data = [str(i+1)+".",sub_name,str(num_segs)]
    worksheet.write_row(1+i,0,row_data)
    for node,ct in seg_stat.items():
        print(node, ct)
        if len(node.children) == 0:
            continue
        worksheet.write(1+i,nodenum_to_col_mapping[node.node_num],'{0:.2f}'.format(ct))
workbook.close()
        

    
    
    

Subject -  atrayee3
Last Segment Details
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 9.363888888888889
[1 : statement : [1, 2, 3] : []] 0
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 0
[3 : func-dateUpdate : [] : [7, 8, 9]] 5.75
[7 : statement : [10, 11, 12, 13] : []] 1
[8 : if : [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] : []] 4
[9 : statement : [26, 27] : []] 2
[4 : func-noOfDays : [] : [10, 11, 12]] 4.583333333333334
[10 : statement : [28, 29, 30, 31, 32, 33, 34, 35, 36] : []] 3
[11 : if : [37, 38, 39, 40] : []] 2
[12 : statement : [41, 42] : []] 0
[5 : func-isLeapYear : [] : [13, 14, 15]] 0.6666666666666666
[13 : statement : [43, 44, 45] : []] 1
[14 : if : [46, 47, 48, 49, 50] : []] 0
[15 : statement : [50, 51] : []] 0
[6 : func-main : [52, 53, 54, 55, 56, 57, 58, 59, 60, 61] : []] 0
Subject -  rajdeep3
Last Segment Details
[0 : root : [] : [1, 2, 3, 4, 5, 6]] 38.175
[1 : statement : [1, 2, 3] : []] 1
[2 : struct : [4, 5, 6, 7, 8, 9] : []] 1
[3 : func-dateUpdate : [] : [7, 8, 9]] 10.75
[7