In [1]:
# Utils
import json
import os
from os import path
import subprocess
import time
from datetime import datetime

# Dependency Tree
from xml.etree import ElementTree
from xml.dom import minidom
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, Comment, tostring, ElementTree

# BBDD
from elasticsearch import Elasticsearch
from py2neo import Graph, Node, Relationship
from py2neo.matching import *

ramas = ["develop", "master"]

In [None]:
# Read XML - MAVEN

In [16]:
# Find if the edge is between repository or component
def findType(tree, elements):
    
    types = {"sourceType":"", "targetType":""}
    types = json.loads(json.dumps(types))
    
    for elem in tree.iter():
        if(elem.attrib.get('id') == elements['source']):
            for subelem in elem.iter():
                if(subelem.tag.split('}')[1] == 'NodeLabel'):
                    info = subelem.text.split(':')
                    if(len(info) == 4):
                        types['sourceType'] = "REPOSITORY"
                    else:
                        types['sourceType'] = "LIBRARY"
                        
        if(elem.attrib.get('id') == elements['target']):
            for subelem in elem.iter():
                if(subelem.tag.split('}')[1] == 'NodeLabel'):
                    info = subelem.text.split(':')
                    if(len(info) == 4):
                        types['targetType'] = "REPOSITORY"
                    else:
                        types['targetType'] = "LIBRARY"

    return types


def find_ids(tree, elements):
    types = {"sourceID":"", "targetID":""}
    types = json.loads(json.dumps(types))
    
    for elem in tree.iter():
        if(elem.attrib.get('id') == elements['source']):
            for subelem in elem.iter():
                if(subelem.tag.split('}')[1] == 'NodeLabel'):
                    info = subelem.text.split(':')
                    if(len(info) == 4):
                        types['sourceID'] = info[1]
                    else:
                        new_id = info[1]+'@'+info[3]
                        types['sourceID'] = new_id
                        
        if(elem.attrib.get('id') == elements['target']):
            for subelem in elem.iter():
                if(subelem.tag.split('}')[1] == 'NodeLabel'):
                    info = subelem.text.split(':')
                    if(len(info) == 4):
                        types['targetID'] = info[1]
                    else:
                        new_id = info[1]+'@'+info[3]
                        types['targetID'] = new_id

    return types

# Parse XML
def parse_xml(path_to_file, branch):
    # Parse the dependency tree
    tree = ET.parse(path_to_file)

    # For each element in the XML
    for elem in tree.iter():
        
        # If element is a Node
        if(elem.tag.split('}')[1] == 'node'):
            id = elem.attrib.get('id')
            #print('{}  {}'.format(elem.tag.split('}')[1], elem.attrib))
            for subelem in elem.iter():
                if(subelem.tag.split('}')[1] == 'NodeLabel'):
                    info = subelem.text.split(':')
                    
                    # If it is the repo info
                    if(len(info) == 4):
                        # Create repo document
                        docu = {"id":info[1], "origin":info[0], "packing_type":info[2], "technology":"java"}
                        docu = json.loads(json.dumps(docu))
                        create_node(docu, "REPOSITORY")
                    elif(len(info) == 5):
                        # Create dependency Document
                        dep_id = info[1]+'@'+info[3]
                        docu = {"id":dep_id, "origin":info[0], "name":info[1], "packing_type":info[2], 
                                "version":info[3], "validated":"true", "technology":"java"}
                        docu = json.loads(json.dumps(docu))
                        create_node(docu, "LIBRARY")       

    for elem in tree.iter():
        if(elem.tag.split('}')[1] == 'edge'):
            atributos = json.loads(json.dumps(elem.attrib))
            types = findType(tree, atributos)
            elems_ids = find_ids(tree, atributos)
            create_edge(elems_ids, types, branch)

In [17]:
def read_GML(path_to_file, branch):

    # Parse the dependency tree
    parse_xml(path_to_file, branch)

# Read JSON - NODEJS

In [18]:
# Create document of repository in database
def create_node_repo_document(repo):
    exists = NodeMatcher(connection).match('LIBRARY', id=repo['name']).first()
    # Check if exists
    if (exists is None):

        # Create Node in Neo4J
        new_node = Node('REPOSITORY', id=repo['name'], technology= 'javascript')
        connection.create(new_node)
        
# Create document of dependency in database
def create_node_depend_document(dep_data, dep_name):
    
    #dep_id = dep_name+'@'+dep_data['version']
    dep_id = dep_data['from']
    exists = NodeMatcher(connection).match('LIBRARY', id=dep_id).first()
    
    # Check if exists
    if (exists is None):
        
        # Create Node in Neo4J
        new_node = Node('LIBRARY', name=dep_name, 
                               origin=dep_data['from'], 
                               version=dep_data['version'], 
                               id=dep_id, 
                               validated= 'true', 
                               technology= 'javascript')
        
        connection.create(new_node)
    
# Create document of relationship in database
def create_node_edge_document(parent, origin, parent_type, branch):
    
    source = NodeMatcher(connection).match(parent_type, id=parent).first()
    destiny = NodeMatcher(connection).match("LIBRARY", id=origin['from']).first()
    
    # Check if exists
    if (source is not None and destiny is not None):
        depends = Relationship(source, branch , destiny)
        connection.create(depends)
        
# Check if key has '/'
def check_key_format(key):
    clave = key
    if("/" in key):
        clave = clave.replace("/", ":")
    return clave
    
def get_depend_depth(source, parent, from_type, branch):
        
    # Check if it has dependencies at this level
    if ('dependencies' in source):
        deps = source['dependencies']

        # For each dependency
        for dep in deps:
            create_node_depend_document(deps[dep], dep)
            create_node_edge_document(parent, deps[dep], from_type, branch)
            get_depend_depth(deps[dep], deps[dep]['from'], 'LIBRARY', branch)
            
# Read JSON with npm dependencies
def read_JSON(json_path, branch):

    with open(json_path) as f:
        data = json.load(f)

        create_node_repo_document(data)
        get_depend_depth(data, data['name'], 'REPOSITORY', branch)

# Neo4J Utils

In [19]:
connection = Graph("bolt://localhost:7687", password="admin")

# Delete all DB
def clean_db():
    delete_query = "MATCH p=()-->() DELETE p"
    connection.run(delete_query)
    delete_query = "MATCH (p) DELETE p"
    connection.run(delete_query)
    print("Deleted DB data")
    
# Create node
def create_node(data, tipo):
    exists = NodeMatcher(connection).match(tipo, id=data['id']).first()
    
    # Check if exists
    if (exists is None):
        
        # Create Node in Neo4J
        new_node = Node(tipo, **data)
        connection.create(new_node)
                
# Create relationship
def create_edge(vertices, tipos, branch):
    source = NodeMatcher(connection).match(tipos['sourceType'], id=vertices['sourceID']).first()
    destiny = NodeMatcher(connection).match(tipos['targetType'], id=vertices['targetID']).first()
    
    # Check if exists
    if (source is not None and destiny is not None):
        depends = Relationship(source, branch , destiny)
        connection.create(depends)
        
    #else:
    #    print("{} - {}".format(vertices['sourceID'], vertices['targetID']))
        
# Update node 
def update_node(dep_name, dep_version, dep_field, dep_value):
    query = 'MATCH (p:LIBRARY) WHERE p.id="{name}@{version}" SET p.{field} = "{new_value}" RETURN p'.format(name=dep_name,
    version=dep_version,
    field=dep_field,
    new_value=dep_value)
    
    result = connection.run(query)
    
    print(result.data()[0]['p'])
    
# Search node
def search_node(dep_name, dep_version):
    query = 'MATCH (p:LIBRARY) WHERE p.id = "{name}@{version}" RETURN p.id'.format(name=dep_name,
                                                                            version=dep_version)
    
    cursor = connection.run(query).data()
    return cursor[0]['p.id']

# Get shortest path between node and not validated one
def get_path(dep_name, branch):
    query_check = 'MATCH (a), (b) WHERE (a)-[*]-(b) AND a.validated="false" AND b.id="{}" RETURN a.id, b.id'.format(dep_name)
    result = connection.run(query_check).data()

    if(len(result)>=1):
        
        unvalid_dep = result[0]['a.id']
        query_search = 'MATCH (from:REPOSITORY {{ id:"{}" }}) , (to:LIBRARY {{ id: "{}" }}) , path = (from)-[:{}*]->(to) RETURN path AS shortestPath, Nodes(path) LIMIT 1'.format(dep_name, unvalid_dep, branch)
        path = connection.run(query_search).data()
        
        if(len(path)>=1):
            result_path = path[0]
            print("{} - {} - KO".format(dep_name, branch))
            print('###############################################')
            tab = '\t'
            print(dep_name)
            for idx, nodo in enumerate(result_path['Nodes(path)']):
                if idx != 0:
                    print(tab*idx + '|')
                    print(tab*idx + nodo['id'])
                    
            return 'KO'
            
    else:
        print("{} - {} - OK".format(dep_name, branch))
        return 'OK'

# Check if one node is affected by not validated deps
def check_node(dep_type, dep_name):
    
    for rama in ramas:
        
        query_check = 'MATCH (a), (b) WHERE (a)-[*]-(b) AND a.validated="false" AND b.id="{}" RETURN a.id, b.id'.format(dep_name)
        result = connection.run(query_check).data()[0]

        if(len(result)>=1):

            unvalid_dep = result['a.id']
            query_search = 'MATCH (from:{} {{ id:"{}" }}) , (to:LIBRARY {{ id: "{}" }}) , path = (from)-[:{}*]->(to) RETURN path AS shortestPath, Nodes(path) LIMIT 1'.format(dep_type, dep_name, unvalid_dep, rama)

            if len(connection.run(query_search).data()) > 0:
                path = connection.run(query_search).data()[0]

                '''
                print(path['Nodes(path)'][0]['id'])
                print(path['Nodes(path)'][1]['id'])
                print(path['Nodes(path)'][len(path['Nodes(path)'])-1]['id'])
                '''

                return rama, path['Nodes(path)'][1]['id'], path['Nodes(path)'][len(path['Nodes(path)'])-1]['id']

            else:
                print("NODE {} in branch {} - OK".format(dep_name, branch))
    
    return 0

#update_node("hamcrest-core", "1.3", "validated", "false")
#search_node("hamcrest-core", "1.3")
check_node("LIBRARY", "crypto@3.6.0")

('develop', 'rlp@3.3.0', 'rlp@3.3.0')

# Elastic

In [20]:
# Get the top branch in wich the dependency is used
def get_top_branch(dep_id):
    query_check = "MATCH (:LIBRARY {{id: '{}' }})-[r]-() RETURN TYPE(r)".format(dep_id)
    result = connection.run(query_check).data()
    
    for resultado in result:
        branch_name = resultado['TYPE(r)']
        
        if branch_name == "master":
            return "master"
        else:
            return "develop"
        
def write_catalog():
    DATE = datetime.now()
    
    query_check = 'MATCH (n) RETURN n'
    result = connection.run(query_check).data()
    
    for nodo in result:
        write_elastic(str(nodo['n'].labels), nodo['n'], DATE)

def write_elastic(dep_type, data, fecha_actual):
    doc = {}
    index_sufix = ""
    
    if dep_type == ':LIBRARY':
        
        if data['id'].startswith('@'):
            nombre, version = data['id'].split('@')[1:]
        else:
            nombre, version = data['id'].split('@')
            
        index_sufix = "library"
        
        doc = {
            'timestamp': fecha_actual,
            'library.id': data['id'],
            'library.name': nombre,
            'library.type': index_sufix,
            'library.version': version,
            'library.validated': data['validated'],
            'library.top-branch': get_top_branch(data['id']),
            'library.technology': data['technology']
        }
        
    elif dep_type == ':REPOSITORY':
        index_sufix = "repository"
        
        rama = check_node("REPOSITORY", data['id'])
                    
        doc = {
            'timestamp': fecha_actual,
            'type': index_sufix,
            'repository.name': data['id'],
            'repository.technology': data['technology']
        }            
        
    elif dep_type == ':APPLICATION':
        index_sufix = "application"
        doc = {
            'timestamp': fecha_actual,
            'type': index_sufix,
            'application.name': data['id'],
            'application.technology': data['technology']
        }
        
    res = es.index(index="test-inventory-{}".format(index_sufix), doc_type='_doc', body=doc)
    #print(res['result'])
    #es.indices.refresh(index="test-index")
    
# Clear Elastic Index
def clean_elastic():
    
    idx_list = [x for x in es.indices.get_alias("test-*").keys() ]

    for index in idx_list:
        es.indices.delete(index=index, ignore=[400, 404])
    
    print('Deleted Elasticsearch Data')

In [21]:
def write_pipeline(pipe_status, branch):
    DATE = datetime.now()

    doc = {}
    
    if pipe_status == 'OK':
                    
        doc = {
            'timestamp': DATE,
            'pipeline.id': 'sample-project-maven',
            'pipeline.status': 'OK',
            'pipeline.technology': 'java'
        }
        
        
    elif pipe_status == 'KO':

        results = check_node("sample-project-maven", branch)

        doc = {
            'timestamp': DATE,
            'pipeline.id': 'sample-project-maven',
            'pipeline.status': 'KO',
            'pipeline.origin': results[1],
            'pipeline.library_compromised': results[0],
            'pipeline.technology': 'java'
        }
                
    res = es.index(index="test-pipeline-{}".format(pipe_status.lower()), doc_type='_doc', body=doc)

# Extend

In [22]:
# Crear un contenedor para ampliar la jerarquia
def create_container():
    
    application_name = "my-application"
    # Create document of dependency in database    
    exists = NodeMatcher(connection).match('APPLICATION', id=application_name).first()
    
    # Check if exists
    if (exists is None):
        
        # Create Node in Neo4J
        new_node = Node('APPLICATION', id=application_name)
        aux_repo = Node('REPOSITORY', id="auxiliar-repo")
        
        connection.create(new_node)
        connection.create(aux_repo)
        
        source = NodeMatcher(connection).match("APPLICATION", id=application_name).first()
        destiny = NodeMatcher(connection).match("REPOSITORY", id="sample-project-maven").first()
    
        # Check if exists
        if (source is not None and destiny is not None):
            depends = Relationship(source, "contiene" , destiny)
            connection.create(depends)
            
        source = NodeMatcher(connection).match("APPLICATION", id=application_name).first()
        destiny = NodeMatcher(connection).match("REPOSITORY", id="auxiliar-repo").first()
    
        # Check if exists
        if (source is not None and destiny is not None):
            depends = Relationship(source, "contiene" , destiny)
            connection.create(depends)

# Step 0: Reset BBDD

In [23]:
clean_db()
es = Elasticsearch(hosts="http://elastic:changeme@localhost:9200/")

clean_elastic()

Deleted DB data
Deleted Elasticsearch Data


# Step 1: Linea Base

In [24]:
maven_path = 'C:\\Users\\d.garcia.sousa\\Documents\\Vodafone\\repositories\\sample-project-maven\\out-{}.gml'
npm_path = 'C:\\Users\\d.garcia.sousa\\Documents\\Vodafone\\repositories\\sample-project-npm\\outfile-{}.json'


for rama in ramas:
    read_GML(maven_path.format(rama), rama)
    create_container()
    write_catalog()
    #read_JSON(npm_path.format(rama), rama)
    
    # Check deps
    status = get_path("sample-project-maven", rama)
    write_pipeline(status, rama)

IndexError: list index out of range

# Step 2: Marcar Dependencia

In [13]:
update_node("rlp", "3.3.0", "validated", "false")

(_10:LIBRARY {id: 'rlp@3.3.0', name: 'rlp', origin: 'org.web3j', packing_type: 'jar', technology: 'java', validated: 'false', version: '3.3.0'})


# Step 3: Comprobar Repositorio

In [25]:
maven_path = 'C:\\Users\\d.garcia.sousa\\Documents\\Vodafone\\repositories\\sample-project-maven\\out-{}.gml'
npm_path = 'C:\\Users\\d.garcia.sousa\\Documents\\Vodafone\\repositories\\sample-project-npm\\outfile-{}.json'

for rama in ramas:
    print(rama)
    read_GML(maven_path.format(rama), rama)
    create_container()
    write_catalog()
    #read_JSON(npm_path.format(rama), rama)
    
    # Check deps
    status = get_path("sample-project-maven", rama)
    
    if(rama == "master"):
        status='OK'

    write_pipeline(status, rama)

develop


IndexError: list index out of range