In [1]:
import numpy as np
import pandas as pd
import os
import pickle

In [72]:
CODE = "5"
TREE_BASE_DIR = "Tree_Structure/"
SEGMENTS_BASE_DIR = "output_fixation/freq_segment/data/"
OUTPUT_DIR = "output_fixation/tree_segment/"
VISUAL_DIR = "visual_plots/"
THRESHOLD = 0.1

In [73]:
if not os.path.exists(os.path.join(OUTPUT_DIR)):
    os.mkdir(os.path.join(OUTPUT_DIR))
if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE)):
    os.mkdir(os.path.join(TREE_BASE_DIR,CODE))
if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR)):
    os.mkdir(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR))

# Tree Data Generation

In [68]:
class Node:
    def __init__(self,name=None,node_num=0):
        self.node_num = node_num
        self.name = name
        self.children = []
        self.lines = []
        
    def insert(self,child):
        self.children.append(child)
    
    def traverse(self):
        print(self)
        for child in self.children:
            child.traverse()
    
    def __str__(self):
        return "["+str(self.node_num)+" : "+self.name+" : "+str(self.lines)\
    +" : "+str([x.node_num for x in self.children])+"]"

In [69]:
def read_and_create_tree(file):
    with open(file) as f:
        nodes = []
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            statements = line.split(" ")
            if statements[0] == "CREATE":
                node = Node(statements[1],len(nodes))
                num_lines = int(statements[2])
                for i in range(num_lines):
                    node.lines.append(int(statements[i+3]))
                nodes.append(node)
            elif statements[0] == "INSERT":
                parent = int(statements[1])
                child = int(statements[2])
                nodes[parent].insert(nodes[child])
            else:
                print("Error: Invalid command!!")
                break
        return nodes[0]

In [70]:
def get_line_to_node_mapping(root,mappings):
    for line in root.lines:
        if line in mappings:
            print("Error : Line "+str(line)+" mapped twice")
            return
        mappings[line] = root
    for child in root.children:
        get_line_to_node_mapping(child,mappings)
    return

def init_node_counts(root,counts):
    counts[root] = 0
    for child in root.children:
        init_node_counts(child,counts)
        
def get_major_components(data):
    major = []
    data = np.array(data)
    total = np.sum(data[:,1])
    for row in data:
        if row[1]/total >= THRESHOLD:
            major.append(row[0])
    return major

def update_node_counts(root,node_counts,major_lines):
    for child in root.children:
        update_node_counts(child,node_counts,major_lines)
    to_add = False
    for line in root.lines:
        if line in major_lines:
            to_add = True
            break
    if to_add:
        node_counts[root] = node_counts[root] + 1
    if len(root.lines) == 0:
        cts_child = []
        for child in root.children:
            cts_child.append(node_counts[child])
        cts_child = np.array(cts_child)
        node_counts[root] = np.min(cts_child)

In [71]:
root = read_and_create_tree(os.path.join(TREE_BASE_DIR,CODE,CODE+".tree"))

In [96]:
for file in os.listdir(os.path.join(SEGMENTS_BASE_DIR)):
    print(file)
    xls = pd.read_excel(os.path.join(SEGMENTS_BASE_DIR,file),sheetname=None)
    segment_status = []
    node_counts = {}
    init_node_counts(root,node_counts)
    for segment,data in xls.items():
        major_lines = get_major_components(data)
        print(major_lines)
        update_node_counts(root,node_counts,major_lines)
        segment_status.append(dict(node_counts))
    with open(os.path.join(OUTPUT_DIR,file[:-4]+"_segment_status.pkl"),'wb') as f:
        pickle.dump(segment_status,f)

final_matrix_newfix_anshul.xlsx
[6, 7, 8]
[8, 9, 10, 11, 16]
[7, 8, 9, 10]
[7, 8, 16, 17, 18]
[7, 8, 9, 10]
[2, 16, 19]
final_matrix_newfix_harshit.xlsx
[1, 4, 12, 13, 14]
[7, 8, 9, 10]
final_matrix_newfix_koushik.xlsx
[]
[9, 14, 16, 17, 18]
[8]
final_matrix_newfix_shashi.xlsx
[1, 2]
[5, 6, 9, 10]
[6, 7, 16, 17, 18]
[8, 9, 10, 12]
[16]
[7, 8]
final_matrix_newfix_archit.xlsx
[7, 10, 11]
[6, 7, 8]
[10, 11, 12, 16, 17]
[7, 8, 10]
[8, 10, 16]
final_matrix_newfix_farazul.xlsx
[6, 7, 8, 10]
[6, 16]
[8, 9, 10]
[6, 7]
[7, 9, 16]
[7, 9, 13, 16, 17]
final_matrix_newfix_meet.xlsx
[1, 3, 6, 9, 11]
[6, 7]
[4, 9, 11, 15]
[16, 17]
final_matrix_newfix_nikhil.xlsx
[1, 2]
[5, 6, 8]
[17, 18]
[8, 9, 10]
[6, 10, 12]
[11]




In [87]:
for node,ct in segment_status[5].items():
    print(node,ct)

[0 : root : [] : [1, 2]] 1
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 1
[3 : statement : [4, 5, 6, 7] : []] 4
[4 : loop : [] : [6, 7]] 1
[6 : statement : [8, 9, 10] : []] 5
[7 : if : [11, 12, 13, 14] : []] 1
[5 : if : [16, 17, 18, 19] : []] 3


In [100]:
for file in os.listdir(os.path.join(OUTPUT_DIR)):
    print("Subject - ",file)
    with open(os.path.join(OUTPUT_DIR,file),'rb') as f:
        seg_stats = pickle.load(f)
    for i,seg_stat in enumerate(seg_stats):
        print("Segment : ",str(i+1))
        for node,ct in seg_stat.items():
            print(node,ct)
    print("==============================================================================")

Subject -  final_matrix_newfix_nikhil._segment_status.pkl
Segment :  1
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 0
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 0
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 0
Segment :  2
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 1
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 1
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 0
Segment :  3
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 1
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 1
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 1
Segment :  4
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : stat

# Tree Graph Generation

In [None]:
import matplotlib.pyplot as plt

In [7]:
import networkx as nx
from networkx.drawing.nx_agraph import write_dot, graphviz_layout
G = nx.Graph()

In [58]:
def read_and_create_graph(file):
    G = nx.DiGraph()
    terminal_nodes = []
    labels = {}
    with open(file) as f:
        ct = 0
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            statements = line.split(" ")
            if statements[0] == "CREATE":
                label = statements[1]
                G.add_node(ct)                
                num_lines = int(statements[2])
                
                if num_lines != 0:
                    terminal_nodes.append(ct)
                    label += '\n'
                    label += statements[3]
                    label += " - "
                    label += statements[-1]
                    
                labels[ct] = label
                ct += 1
            elif statements[0] == "INSERT":
                parent = int(statements[1])
                child = int(statements[2])
                G.add_edge(parent,child)
                
                #nodes[parent].insert(nodes[child])
            else:
                print("Error: Invalid command!!")
                break
        return G, terminal_nodes, ct, labels

In [59]:
G, terminal_nodes, total_nodes, labels = read_and_create_graph(os.path.join(TREE_BASE_DIR,CODE+".tree"))
node_color = [('#dae5d5' if node in terminal_nodes else '#dfe7f9') for node in range(total_nodes)]

In [74]:
plt.figure(figsize=(20,15))
pos=graphviz_layout(G, prog='dot')
nx.draw(G,pos,arrows=False,node_size=20000,node_color=node_color,labels=labels,font_size=22)
plt.savefig(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,CODE+".png"))

In [86]:
for file in os.listdir(os.path.join(OUTPUT_DIR)):
    sub_name = file[:-20]
    if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name)):
        os.mkdir(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name))
    print("Subject - ",sub_name)
    with open(os.path.join(OUTPUT_DIR,file),'rb') as f:
        seg_stats = pickle.load(f)
    for i,seg_stat in enumerate(seg_stats):
        seg_labels = dict(labels)
        for node, label in seg_labels.items():
            seg_labels[node] += "\n"
        segment_num = str(i+1)
        print("Segment : ",segment_num)
        for node,ct in seg_stat.items():
            node_num = node.node_num
            seg_labels[node_num] += "Count : " + str(ct)
            print(node,ct)
        plt.figure(figsize=(20,15))
        pos=graphviz_layout(G, prog='dot')
        nx.draw(G,pos,arrows=False,node_size=20000,node_color=node_color,labels=seg_labels,font_size=22)
        plt.savefig(os.path.join(TREE_BASE_DIR,CODE,VISUAL_DIR,sub_name,segment_num+".png"))
    print("==============================================================================")

Subject -  final_matrix_newfix_nikhil
Segment :  1
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 0
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 0
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 0
Segment :  2
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 1
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 1
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 0




Segment :  3
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 1
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 1
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 1
Segment :  4
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 1
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 2
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 1
Segment :  5
[0 : root : [] : [1, 2]] 1
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 1
[3 : statement : [4, 5, 6, 7] : []] 2
[4 : loop : [] : [6, 7]] 1
[6 : statement : [8, 9, 10] : []] 3
[7 : if : [11, 12, 13, 14] : []] 1
[5 : if : [16, 17, 18, 19] : []] 1
Segment :  6
[0 : root : [] : [1, 2]] 1
[1 : statement : [1, 2, 3] : []] 1
[2 : func : [] : [3, 4, 5]] 1
[3 : statement : [4, 5, 6, 7] : []] 2
[4 : loop : [] : [6, 7]] 2
[6

Segment :  2
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 0
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 2
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 1
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 1
Segment :  3
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 0
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 2
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 2
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 1
Segment :  4
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 0
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 3
[4 : loop : [] : [6, 7]] 0
[6 : statement : [8, 9, 10] : []] 2
[7 : if : [11, 12, 13, 14] : []] 0
[5 : if : [16, 17, 18, 19] : []] 1
Segment :  5
[0 : root : [] : [1, 2]] 0
[1 : statement : [1, 2, 3] : []] 0
[2 : func : [] : [3, 4, 5]] 0
[3 : statement : [4, 5, 6, 7] : []] 4
[4 : loop : [] : [6, 7]] 0
[6