In [1]:
import numpy as np
import pandas as pd
import os
import pickle
import xlsxwriter
from os.path import join as PT

In [24]:
CODE = "24"
TREE_BASE_DIR = "Tree_Structure/"
SEGMENTS_BASE_DIR = "output_fixation/"+CODE+"/"+"freq_segment/data/"
THRESHOLD = 0.05

In [25]:
if not os.path.exists(os.path.join(TREE_BASE_DIR,CODE)):
    os.mkdir(os.path.join(TREE_BASE_DIR,CODE))

In [26]:
class Node:
    def __init__(self,name=None,node_num=0):
        self.node_num = node_num
        self.name = name
        self.children = []
        self.lines = []
        
    def insert(self,child):
        self.children.append(child)
    
    def traverse(self):
        print(self)
        for child in self.children:
            child.traverse()
    
    def __str__(self):
        return "["+str(self.node_num)+" : "+self.name+" : "+str(self.lines)\
    +" : "+str([x.node_num for x in self.children])+"]"

In [27]:
def read_and_create_tree(file):
    with open(file) as f:
        nodes = []
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            statements = line.split(" ")
            if statements[0] == "CREATE":
                node = Node(statements[1],len(nodes))
                num_lines = int(statements[2])
                for i in range(num_lines):
                    node.lines.append(int(statements[i+3]))
                nodes.append(node)
            elif statements[0] == "INSERT":
                parent = int(statements[1])
                child = int(statements[2])
                nodes[parent].insert(nodes[child])
            else:
                print("Error: Invalid command!!")
                break
        return nodes[0]

In [28]:
def get_line_to_node_mapping(root,mappings):
    for line in root.lines:
        if line in mappings:
            print("Error : Line "+str(line)+" mapped twice")
            return
        mappings[line] = root
    for child in root.children:
        get_line_to_node_mapping(child,mappings)
    return

def init_node_counts(root,counts):
    counts[root] = 0
    for child in root.children:
        init_node_counts(child,counts)
        
def get_nodes(root,all_nodes):
    all_nodes[root.node_num] = root
    for child in root.children:
        get_nodes(child,all_nodes)
        
def get_major_components(data):
    major = []
    data = np.array(data)
    total = np.sum(data[:,1])
    for row in data:
        if row[1]/total >= THRESHOLD:
            major.append(row[0])
    return major

def update_node_counts(root,node_counts,major_lines,aggregate='min'):
    for child in root.children:
        update_node_counts(child,node_counts,major_lines)
    to_add = False
    for line in root.lines:
        if line in major_lines:
            to_add = True
            break
    if to_add:
        node_counts[root] = node_counts[root] + 1
    if len(root.lines) == 0:
        cts_child = []
        for child in root.children:
            cts_child.append(node_counts[child])
        cts_child = np.array(cts_child)
        if aggregate == 'min':
            func = np.min
        elif aggregate == 'max':
            func = np.max
        elif aggregate == 'sum':
            func = np.sum
        else:
            print("Error : Invalid aggregate!!")
            return
        node_counts[root] = func(cts_child)

In [29]:
root = read_and_create_tree(os.path.join(TREE_BASE_DIR,CODE,CODE+".tree"))
all_nodes = {}
get_nodes(root,all_nodes)
all_nodes

{0: <__main__.Node at 0x7faf4c2d5630>,
 1: <__main__.Node at 0x7faf4c2b83c8>,
 2: <__main__.Node at 0x7faf4c2d5780>,
 3: <__main__.Node at 0x7faf4c2d5470>,
 4: <__main__.Node at 0x7faf4c2d56a0>,
 5: <__main__.Node at 0x7faf4c2d5a20>,
 6: <__main__.Node at 0x7faf4c2d5860>,
 7: <__main__.Node at 0x7faf4c2b6f98>,
 8: <__main__.Node at 0x7faf4c2b6dd8>}

In [30]:
root.traverse()

[0 : root : [] : [1, 2, 3]]
[1 : statement : [1, 2] : []]
[2 : func-binary_to_gray : [] : [4, 5, 6, 7]]
[4 : if : [5, 6] : []]
[5 : statement : [7, 8] : []]
[6 : if : [9, 10] : []]
[7 : statement : [11, 12] : []]
[3 : func-main : [] : [8]]
[8 : statement : [14, 15, 16, 17, 18] : []]


In [31]:
def get_imp_nodes(file):
    imp_nodes = []
    with open(file) as f:
        for line in f:
            if line[-1] == '\n':
                line = line[:-1]
            temp = line.split(" ")
            for el in temp:
                imp_nodes.append(int(el))
    return imp_nodes 
IMP_NODES = get_imp_nodes(os.path.join(TREE_BASE_DIR, CODE, CODE + "_imp_nodes.txt"))
print(IMP_NODES)

FileNotFoundError: [Errno 2] No such file or directory: 'Tree_Structure/24/24_imp_nodes.txt'

In [23]:
workbook = xlsxwriter.Workbook(os.path.join(TREE_BASE_DIR,CODE,CODE+"_node_data.xlsx"))
worksheet = workbook.add_worksheet()
cell_format = workbook.add_format({'align': 'center','valign' : 'centre'})
worksheet.merge_range("A1:A2","S.No.",cell_format)
worksheet.merge_range("B1:B2","Subjects",cell_format)
worksheet.merge_range("C1:C2","No. of \nSegments",cell_format)

for j, node in enumerate(IMP_NODES):
        cell_data = all_nodes[node].name
        if len(all_nodes[node].lines) != 0:
            cell_data += ' ('+ str(all_nodes[node].lines[0])+"-" +str(all_nodes[node].lines[-1])+')'
        worksheet.merge_range(0,3+j*3,0,5+j*3,cell_data,cell_format)
        worksheet.write_row(1,3+j*3,["min","max","sum"])

for i,file in enumerate(os.listdir(os.path.join(SEGMENTS_BASE_DIR))):
    sub_name = file[20:-5]
    print("Subject - ",sub_name)
    xls = pd.read_excel(os.path.join(SEGMENTS_BASE_DIR,file),sheetname=None)

    nc_min = {}
    nc_max = {}
    nc_sum = {}
    init_node_counts(root,nc_min)
    init_node_counts(root,nc_max)
    init_node_counts(root,nc_sum)
    num_segs = 0
    for segment,data in xls.items():
        major_lines = get_major_components(data)
        print(major_lines)
        update_node_counts(root,nc_min,major_lines,"min")
        update_node_counts(root,nc_max,major_lines,"max")
        update_node_counts(root,nc_sum,major_lines,"sum")
        num_segs += 1
    row_data = [str(i+1)+".",sub_name,num_segs]
    worksheet.write_row(2+i,0,row_data)
    for j, node in enumerate(IMP_NODES):
        ptr = all_nodes[node]
        vals = [nc_min[ptr],nc_max[ptr],nc_sum[ptr]]
        vals = [str(x) for x in vals]
        worksheet.write_row(2+i,3+j*3,vals)
workbook.close()

Subject -  anshul
[5, 6, 7, 8]
[8, 9, 10, 11, 14, 16]
[6, 7, 8, 9, 10, 15, 16]
[7, 8, 10, 12, 13, 16, 17, 18]
[7, 8, 9, 10, 13]
[2, 3, 4, 6, 7, 13, 15, 16, 17, 19]
Subject -  harshit
[1, 4, 12, 13, 14]
[6, 7, 8, 9, 10]
Subject -  koushik
[]
[8, 9, 11, 14, 16, 17, 18]
[8]
Subject -  shashi
[1, 2]
[5, 6, 7, 9, 10]
[6, 7, 16, 17, 18]
[7, 8, 9, 10, 11, 12]
[10, 16]
[7, 8]
Subject -  archit
[5, 6, 7, 9, 10, 11, 12]
[6, 7, 8, 10]
[8, 10, 11, 12, 15, 16, 17]
[6, 7, 8, 10, 13]
[8, 10, 16]
Subject -  farazul
[4, 5, 6, 7, 8, 10, 11]
[4, 5, 6, 7, 8, 14, 16]
[6, 8, 9, 10, 11, 12, 13]
[3, 6, 7]
[6, 7, 8, 9, 12, 13, 14, 16]
[7, 9, 13, 16, 17]
Subject -  meet
[1, 3, 6, 9, 11]
[3, 5, 6, 7]
[4, 9, 11, 15]
[16, 17]
Subject -  nikhil
[1, 2]
[5, 6, 7, 8, 9, 10]
[12, 13, 16, 17, 18, 19]
[5, 6, 7, 8, 9, 10, 12]
[6, 10, 11, 12, 17]
[11]


