In [2]:
# parameters

# input
network = 'dados-brutos-tomcat.csv.network'
reg_data = 'dados-brutos-tomcat.csv.labels'

# output
reg_model = 'dados-brutos-tomcat.csv.reg.model'

In [12]:
import numpy as np
import os
import random
import sys
from os import listdir
from os.path import isfile, join
 

def network_labeling(graph, labels):
    print('Reading '+labels)
    with open(labels) as fileobj:
        for line in fileobj:  
            data = line.split('\t')
            nodeName = data[0]
            if(len(data)!=2): continue
            nodeKey = data[0].strip()
            f=(list(map(float,data[1].split(','))))
            if (nodeKey not in graph): continue
            f = np.array(f)
            graph[nodeKey].setLabeled(True)
            graph[nodeKey].setF(f)
    print('Reading '+labels+': OK')
    
def get_dimension(labels):
    dim = -1
    with open(labels) as fileobj:
        for line in fileobj:  
            data = line.split('\t')
            nodeName = data[0]
            if(len(data)!=2): continue
            nodeKey = data[0].strip()
            f=(list(map(float,data[1].split(','))))
            dim = len(f)
            break
    return dim

def network_builder(network, labels, directed=False):
    index = 0
    graph = {}
    numNodes = 0
    numEdges = 0
    dimensions = get_dimension(labels)
    print('Reading '+network)
    with open(network) as fileobj:
        for line in fileobj:  

            data = line.split('\t')
            if(len(data)!=3): continue
            
            ln = data[0].split(':')
            nodeSourceName = ln[0].strip()
            nodeSourceType = ln[1].strip()
            
            ln = data[1].split(':')
            nodeTargetName = ln[0].strip()
            nodeTargetType = ln[1].strip()
            
            edgeWeight = int(data[2].strip())
            
            nodeKeySource = nodeSourceName+":"+nodeSourceType
            if nodeKeySource not in graph:
                node = Node(nodeSourceName, nodeSourceType)
                node.setF([0]*dimensions)
                graph[nodeKeySource] = node
                numNodes+=1
            
            nodeKeyTarget = nodeTargetName+":"+nodeTargetType
            if nodeKeyTarget not in graph:
                node = Node(nodeTargetName, nodeTargetType)
                node.setF([0]*dimensions)
                graph[nodeKeyTarget] = node
                numNodes+=1
            
            if(directed):
                graph[nodeKeySource].addOut(nodeKeyTarget, edgeWeight)
                graph[nodeKeyTarget].addIn(nodeKeySource, edgeWeight)
            else:
                graph[nodeKeySource].addOut(nodeKeyTarget, edgeWeight)
                graph[nodeKeyTarget].addIn(nodeKeySource, edgeWeight)
                graph[nodeKeySource].addIn(nodeKeyTarget, edgeWeight)
                graph[nodeKeyTarget].addOut(nodeKeySource, edgeWeight)
            
            numEdges+=1
    
    network_labeling(graph,labels)
    
    print('Reading '+network+': OK')
    print('#Nodes: '+str(numNodes))
    print('#Edges: '+str(numEdges))
    
    return graph



    
    
import numpy as np
class Node:
    
    def __init__(self, nodeName, nodeType):
        self.nodeName = nodeName
        self.nodeType = nodeType
        self.inNeighbors = {}
        self.outNeighbors = {}
        self.labeled = False
        self.f = None
        self.stats = {}

    def addIn(self, nodeKey, weight):
        self.inNeighbors[nodeKey]=weight
        
    def addOut(self, nodeKey, weight):
        self.outNeighbors[nodeKey]=weight
        
    def getIn(self):
        return self.inNeighbors
        
    def getOut(self):
        return self.outNeighbors
    
    def getInDegree(self):
        if ('inDegree' not in self.stats):
            self.stats['inDegree'] = float(len(self.getIn().keys()))
        return self.stats['inDegree']
        
    def getOutDegree(self):
        if ('outDegree' not in self.stats):
            self.stats['outDegree'] = float(len(self.getOut().keys()))
        return self.stats['outDegree']
    
    def getWeightedInDegree(self):
        if ('weightedInDegree' not in self.stats):
            self.stats['weightedInDegree'] = float(0)
            for nodeKey in self.getIn():
                self.stats['weightedInDegree'] += self.getIn()[nodeKey]
        return self.stats['weightedInDegree']
        
    def getWeightedOutDegree(self):
        if ('weightedOutDegree' not in self.stats):
            self.stats['weightedOutDegree'] = float(0)
            for nodeKey in self.getOut():
                self.stats['weightedOutDegree'] += self.getOut()[nodeKey]
        return self.stats['weightedOutDegree']
    
    def setLabeled(self,b):
        self.labeled=b

    def isLabeled(self):
        return self.labeled
        
    def setF(self,f):
        self.f=np.array(f)
    
    def getF(self):
        return self.f


class SimpleRegularizer:
    
    def __init__(self, graph, min_iterations, max_iterations, convergence):
        self.graph = graph
        self.min_iterations = min_iterations
        self.max_iterations = max_iterations
        self.convergence = convergence
        

    
    def walking(self):
        nodes = list(self.graph.keys())
        iteration = 1
        while(True):
            loss = 0
            random.shuffle(nodes)
            for nodeKey in nodes:
                loss += self.propagate(nodeKey)

            print('Iteration '+str(iteration)+" | Loss="+str(loss))
            if(iteration >= self.max_iterations and iteration > self.min_iterations): break
            if(loss < self.convergence and iteration > self.min_iterations): break
            iteration+=1

        # last iteration for labeled data
        for nodeKey in nodes:
            graph = self.graph
            if(graph[nodeKey].isLabeled()):
                self.propagate(nodeKey,lastIteration=True)

    
    def propagate(self,nodeKey,lastIteration=False):
        graph = self.graph
        loss = 0
        
        # regularizer
        if(graph[nodeKey].isLabeled() and lastIteration==False): return 0
        
        f = graph[nodeKey].getF()
        #print(f)
        f_new = np.array([0]*len(f))
        
        counter = 0
        for nodeIn in graph[nodeKey].getIn():
            
            penalty = graph[nodeIn].getOutDegree()
            if(penalty < 1): penalty=1.0
            if(graph[nodeIn].isLabeled()): penalty = 1.0
            
            f_new = f_new + (graph[nodeIn].getF()/penalty)
            
            counter += 1
            
        f_new = f_new/counter
        #print(f_new)
        #print("====")
        loss = np.sum(np.abs(f - f_new))
        graph[nodeKey].setF(f_new)
        
        return loss
    
    def saveModel(self,output):
        
        print("Writing model "+output)
        nodes = list(self.graph.keys())
        
        num_items = len(nodes)
        dimension = -1
        for nodeKey in nodes:
            f = graph[nodeKey].getF()
            dimension = len(f)
            break
        
        if os.path.exists(output): os.remove(output)
        
        file = open(output, 'a')
        file.write(str(num_items)+" "+str(dimension)+"\n")
        
        for nodeKey in nodes:
            f = graph[nodeKey].getF()
            s = (str(list(f)).replace('[','').replace(']','').replace(' ','').replace(',',' '))
            file.write(nodeKey.replace(' ','_')+" "+s+"\n")

        file.close()
        print("Writing model "+output+": OK")
        

In [13]:
graph = network_builder(network,reg_data,directed=False)

regularizer = SimpleRegularizer(graph,10,30,0.0001)
regularizer.walking()
regularizer.saveModel(reg_model)

Reading dados-brutos-tomcat.csv.network
Reading dados-brutos-tomcat.csv.labels
Reading dados-brutos-tomcat.csv.labels: OK
Reading dados-brutos-tomcat.csv.network: OK
#Nodes: 9804
#Edges: 135671
Iteration 1 | Loss=81235.51490616487
Iteration 2 | Loss=1862.2250610408112
Iteration 3 | Loss=51.385717052507324
Iteration 4 | Loss=2.6259099638168926
Iteration 5 | Loss=0.19900384025927495
Iteration 6 | Loss=0.015595830114272275
Iteration 7 | Loss=0.0007813593283762033
Iteration 8 | Loss=3.8221035008144616e-05
Iteration 9 | Loss=2.8801129771268035e-06
Iteration 10 | Loss=2.864127489884507e-07
Iteration 11 | Loss=1.1929061465632959e-08
Writing model dados-brutos-tomcat.csv.reg.model
Writing model dados-brutos-tomcat.csv.reg.model: OK
