In [16]:
from neo4j import GraphDatabase

# Connection setup
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "16zomole"))  # Replace with your credentials

def add_ast(tx, node_id, node_type, name=None):
    """ Function to add an AST node to the Neo4j database """
    if name:
        query = (
            "CREATE (n:ASTNode {id: $node_id, type: $node_type, name: $name}) "
            "RETURN n"
        )
        result = tx.run(query, node_id=node_id, node_type=node_type, name=name)
    else:
        query = (
            "CREATE (n:ASTNode {id: $node_id, type: $node_type}) "
            "RETURN n"
        )
        result = tx.run(query, node_id=node_id, node_type=node_type)
    return result.single()[0]

def add_relationship(tx, parent_id, child_id):
    """ Function to add a relationship between two AST nodes """
    query = (
        "MATCH (a:ASTNode), (b:ASTNode) "
        "WHERE a.id = $parent_id AND b.id = $child_id "
        "CREATE (a)-[:PARENT_OF]->(b)"
    )
    tx.run(query, parent_id=parent_id, child_id=child_id)


In [14]:
from pathlib import Path
import hashlib
import subprocess
import json
from functools import wraps
import inspect

text_cache = Path('cache')

def sha1(input_string):
    """Helper to hash input strings"""
    try:

        # Step 5: Create a new SHA-1 hash object
        hash_object = hashlib.sha1()

        # Step 6: Update the hash object with the bytes-like object
        hash_object.update(input_string.encode('utf-8'))

        # Step 7: Get the hexadecimal representation of the hash
        return hash_object.hexdigest()
    except Exception as e:
        raise ValueError(input_string) from e


def stored(func):
    """
    implements nix-like durable memoisation of function results.

    Lazy way to avoid recomputing expensive calls. Expects results to be JSON-serializable
    """
    @wraps(func)
    def CACHE(*args, **kwargs):
        name = func.__name__
        meta = {}

        meta["name"] = name
        meta["func"] = inspect.getsource(func)
        meta["args"] = args
        meta["kwargs"] = kwargs

        js = json.dumps(meta)
        sha = hashlib.sha1(js.encode('utf-8'))

        digest = sha.hexdigest()

        path = text_cache / f"{digest}-{name}.json"

        if path.exists():
            with path.open('r') as r:
                cached = json.load(r)
            return cached["result"]
        result = func(*args, **kwargs)
        meta["result"] = result
        with path.open('w') as w:
            json.dump(meta, w)
        return result

    return CACHE

In [17]:
def main():
    # Hardcoded AST nodes (simplified version of your structure)
    ast_nodes = {
        "file": ("File", "main"),
        "import": ("GenDecl", "import"),
        "func": ("FuncDecl", "main"),
    }

    # Relationships based on AST structure
    relationships = [
        ("file", "import"),
        ("file", "func"),
    ]

    with driver.session() as session:
        # Adding nodes
        for node_id, (node_type, name) in ast_nodes.items():
            session.write_transaction(add_ast, node_id, node_type, name)

        # Adding relationships
        for parent_id, child_id in relationships:
            session.write_transaction(add_relationship, parent_id, child_id)

        print("AST loaded into Neo4j successfully!")


In [18]:
main()

AST loaded into Neo4j successfully!


  session.write_transaction(add_ast, node_id, node_type, name)
  session.write_transaction(add_relationship, parent_id, child_id)


In [24]:
def get_ast_node(tx, node_id):
    """ Function to retrieve an AST node from the Neo4j database """
    query = "MATCH (n:ASTNode {id: $node_id}) RETURN n"
    result = tx.run(query, node_id=node_id)
    record = result.single()  # Get the single record if available
    return record["n"] if record else None


def get_children(tx, parent_id):
    """ Function to retrieve child nodes of a specified parent node """
    query = (
        "MATCH (p:ASTNode {id: $parent_id})-[:PARENT_OF]->(c:ASTNode) "
        "RETURN c"
    )
    result = tx.run(query, parent_id=parent_id)
    return [record["c"] for record in result]


In [25]:
session = driver.session()
node = session.execute_read(get_ast_node, "file")
if node:
    print("Retrieved Node:", node)
else:
    print("Node not found.")

        # Get children of a specific node
children = session.read_transaction(get_children, "file")
print("Children of the node:", [child for child in children])


Retrieved Node: <Node element_id='4:5e1d3220-7b75-49f7-9fdd-98681d96641a:0' labels=frozenset({'ASTNode'}) properties={'name': 'main', 'id': 'file', 'type': 'File'}>
Children of the node: [<Node element_id='4:5e1d3220-7b75-49f7-9fdd-98681d96641a:5' labels=frozenset({'ASTNode'}) properties={'name': 'main', 'id': 'func', 'type': 'FuncDecl'}>, <Node element_id='4:5e1d3220-7b75-49f7-9fdd-98681d96641a:2' labels=frozenset({'ASTNode'}) properties={'name': 'main', 'id': 'func', 'type': 'FuncDecl'}>, <Node element_id='4:5e1d3220-7b75-49f7-9fdd-98681d96641a:4' labels=frozenset({'ASTNode'}) properties={'name': 'import', 'id': 'import', 'type': 'GenDecl'}>, <Node element_id='4:5e1d3220-7b75-49f7-9fdd-98681d96641a:1' labels=frozenset({'ASTNode'}) properties={'name': 'import', 'id': 'import', 'type': 'GenDecl'}>, <Node element_id='4:5e1d3220-7b75-49f7-9fdd-98681d96641a:2' labels=frozenset({'ASTNode'}) properties={'name': 'main', 'id': 'func', 'type': 'FuncDecl'}>, <Node element_id='4:5e1d3220-7b75-49

  children = session.read_transaction(get_children, "file")


In [27]:
node = session.execute_read(get_ast_node, "file")


In [28]:
node

<Node element_id='4:5e1d3220-7b75-49f7-9fdd-98681d96641a:0' labels=frozenset({'ASTNode'}) properties={'name': 'main', 'id': 'file', 'type': 'File'}>

In [42]:
import yaml
from neo4j import GraphDatabase

# Function to parse the YAML files
def parse_yaml(file_path):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)

# Extract environment and vault blocks from values.yml
def extract_values_info(data):
    env_block = data.get('env', None)
    vault_block = data.get('vault', None)
    return env_block, vault_block

# Extract project name and dependencies from .gitlab-ci.yml
def extract_gitlab_info(data):
    project_name = data.get('variables', {}).get('CI_TMPL_HELM_RELEASE_NAMES', None)
    registry_project = data.get('variables', {}).get('REGISTRY_PROJECT', None)
    return project_name, registry_project

# Function to create graph database
def create_graph(env_block, vault_block, project_name, registry_project):
    # Connect to Neo4j
    
    uri = "bolt://localhost:7687"
    driver = GraphDatabase.driver(uri, auth=("neo4j", "16zomole"))  # Replace with your credentials
    
    with driver.session(database='alljobswb') as session:
        # Create nodes and relationships in Neo4j
        if project_name:
            session.run("MERGE (p:Project {name: $name})", name=project_name)
        
        if registry_project:
            session.run("MERGE (r:Registry {name: $name})", name=registry_project)
        
        if env_block:
            for key, value in env_block.items():
                session.run("MERGE (e:Env {key: $key, value: $value})", key=key, value=value)
                if project_name:
                    session.run("""
                        MATCH (p:Project {name: $project_name}), (e:Env {key: $key, value: $value})
                        MERGE (p)-[:USES_ENV]->(e)
                    """, project_name=project_name, key=key, value=value)
        
        if vault_block:
            session.run("MERGE (v:Vault {enabled: $enabled})", enabled=vault_block.get('enabled', False))
            if project_name:
                session.run("""
                    MATCH (p:Project {name: $project_name}), (v:Vault {enabled: $enabled})
                    MERGE (p)-[:USES_VAULT]->(v)
                """, project_name=project_name, enabled=vault_block.get('enabled', False))
    
    driver.close()



In [38]:
values_data = parse_yaml('/Users/valuamba/wb/requisites-api/deploy/k8s.prod-dp/values.yml')


env_block, vault_block = extract_values_info(values_data)

env_block, vault_block

({'DEBUG': 'false',
  'HTTP_PORT': 8080,
  'HR_EMPLOYEES_URL': 'http://hr-employees.hr.svc.k8s.prod-dl',
  'HR_WBUSER_URL': 'http://hr-wbusers.hr.svc.k8s.prod-dl',
  'AUTH_URL': 'http://auth.alljobswb.svc.k8s.prod-dp',
  'NATS_CLUSTER_URL': 'nats://hr-rabbitmq.dl.wb.ru:4222',
  'HR_PERSONAL_ACCOUNT_URL': 'http://hr-personal-account.hr.svc.k8s.prod-dl',
  'RBAC_URL': 'http://rbac.alljobswb.svc.k8s.prod-dl',
  'WBJOB_URL': 'http://admin.wbjobs.svc.k8s.prod-dp',
  'IDENTIFICATION_URL': 'http://identification.wbx-authorization.svc.k8s.auth-dl/',
  'USER_INFO_URL': 'https://api.user-info.svc.k8s.prod-dp',
  'PUB_KEYS_URL': 'https://auth.wb.ru/public/pubs.json'},
 None)

In [43]:
path = '/Users/valuamba/wb/requisites-api/deploy/k8s.prod-dp/values.yml'
ci_path = '/Users/valuamba/wb/requisites-api/.golangci.yml'

# Parse the YAML files
values_data = parse_yaml(path)
gitlab_data = parse_yaml(ci_path)

# Extract necessary information
env_block, vault_block = extract_values_info(values_data)
project_name, registry_project = extract_gitlab_info(gitlab_data)

# Create graph database
create_graph(env_block, vault_block, project_name, registry_project)

In [66]:
import os
import re
import yaml
from neo4j import GraphDatabase

def load_yaml_with_error_handling(file_path):
    with open(file_path, 'r') as file:
        try:
            data = yaml.safe_load(file)
        except yaml.YAMLError as exc:
            print(f"Error parsing YAML file: {exc}")
            data = None
    return data

def load_yaml_ignore_errors(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    valid_lines = []
    for line in lines:
        try:
            yaml.safe_load(line)
            valid_lines.append(line)
        except yaml.YAMLError:
            continue

    valid_content = ''.join(valid_lines)
    try:
        return yaml.safe_load(valid_content)
    except yaml.YAMLError as exc:
        print(f"Error parsing combined valid YAML content: {exc}\nPath: {file_path}")
        return None

# Extract environment and vault blocks from values.yml
def extract_values_info(data):
    env_block = {k: v for k, v in data.get('env', {}).items() if is_url(v)}
    vault_block = {k: v for k, v in data.get('vault', {}).items()} if data.get('vault') else None
    return env_block, vault_block

def is_url(value):
    return isinstance(value, str) and re.match(r'http[s]?://', value) is not None

def extract_project_and_namespace(url):
    match = re.match(r'http[s]?://([^\.]+)\.([^\.]+)\.svc\.(k8s\..+)', url)
    if match:
        project_name, namespace = match.group(1), match.group(2)
        return project_name, namespace
    return None, None

# Extract project name and dependencies from .gitlab-ci.yml
def extract_gitlab_info(data):
    project_name = data.get('variables', {}).get('CI_TMPL_HELM_RELEASE_NAMES', None)
    registry_project = data.get('variables', {}).get('REGISTRY_PROJECT', None)
    return project_name, registry_project

# Function to create or merge nodes and relationships in the graph database
def create_graph(session, project_name, env_block, vault_block, environment):
    if project_name:
        session.run("MERGE (p:Project {name: $name})", name=project_name)
    
    if env_block:
        for key, value in env_block.items():
            ref_project, namespace = extract_project_and_namespace(value)
            if ref_project and namespace:
                session.run("MERGE (e:Env {key: $key, value: $value})", key=key, value=value)
                session.run("""
                    MATCH (p:Project {name: $project_name}), (e:Env {key: $key, value: $value})
                    MERGE (p)-[:USES_ENV]->(e)
                """, project_name=project_name, key=key, value=value)
                session.run("MERGE (rp:Project {name: $ref_project})", ref_project=ref_project)
                session.run("MERGE (n:Namespace {name: $namespace})", namespace=namespace)
                session.run("""
                    MATCH (p:Project {name: $project_name}), (rp:Project {name: $ref_project})
                    MERGE (p)-[:INTERACTS_WITH {namespace: $namespace}]->(rp)
                """, project_name=project_name, ref_project=ref_project, namespace=namespace)
    
    if vault_block:
        for secret in vault_block.get('contents', []):
            for path, secrets in secret.items():
                for secret_key, secret_value in secrets.items():
                    ref_project, namespace = extract_project_and_namespace(secret_value)
                    if ref_project and namespace:
                        session.run("MERGE (v:Vault {path: $path, key: $secret_key, value: $secret_value})", 
                                    path=path, secret_key=secret_key, secret_value=secret_value)
                        session.run("""
                            MATCH (p:Project {name: $project_name}), (v:Vault {path: $path, key: $secret_key, value: $secret_value})
                            MERGE (p)-[:USES_VAULT]->(v)
                        """, project_name=project_name, path=path, secret_key=secret_key, secret_value=secret_value)
                        session.run("MERGE (rp:Project {name: $ref_project})", ref_project=ref_project)
                        session.run("MERGE (n:Namespace {name: $namespace})", namespace=namespace)
                        session.run("""
                            MATCH (p:Project {name: $project_name}), (rp:Project {name: $ref_project})
                            MERGE (p)-[:INTERACTS_WITH {namespace: $namespace}]->(rp)
                        """, project_name=project_name, ref_project=ref_project, namespace=namespace)

# Function to process each project directory
def process_project(project_dir, session):
    gitlab_ci_path = os.path.join(project_dir, '.gitlab-ci.yml')
    deploy_dir = os.path.join(project_dir, 'deploy')
    
    if not os.path.isfile(gitlab_ci_path) or not os.path.isdir(deploy_dir):
        return
    
    gitlab_data = load_yaml_with_error_handling(gitlab_ci_path)
    project_name, registry_project = extract_gitlab_info(gitlab_data)
    
    for root, _, files in os.walk(deploy_dir):
        for file in files:
            try:
                if file.endswith('values.yml'):
                    values_path = os.path.join(root, file)
                    values_data = load_yaml_ignore_errors(values_path)
                    if values_data:
                        env_block, vault_block = extract_values_info(values_data)
                        
                        environment = os.path.basename(os.path.dirname(values_path))
                        create_graph(session, project_name, env_block, vault_block, environment)
                        print(f'Processed: {file}')
            except Exception as exc:
                print(f'Error processing {file}: {exc}')

# Main function to iterate over all projects and create the graph
def main(base_dir):
    uri = "bolt://localhost:7687"
    driver = GraphDatabase.driver(uri, auth=("neo4j", "16zomole"))  # Use your credentials
    
    with driver.session(database="alljobswb") as session:
        for project in os.listdir(base_dir):
            project_dir = os.path.join(base_dir, project)
            if os.path.isdir(project_dir):
                process_project(project_dir, session)
    
    driver.close()

# Replace 'path/to/projects' with the actual path to the directory containing your projects
# main('path/to/projects')


In [None]:
group_dicts = [
    '/Users/valuamba/wb/alljobswb/alljobswb',
    '/Users/valuamba/wb/hr',
    '/Users/valuamba/wb/hr/hr'
]

for path in group_dicts[1:]:
    main(path)


In [None]:
file_path = '/Users/valuamba/wb/executors-api/deploy/common/base-values.yml'
with open(file_path, 'r') as file:
    yaml.safe_load(file)

In [47]:
import yaml

def load_yaml_with_error_handling(file_path):
    with open(file_path, 'r') as file:
        try:
            data = yaml.safe_load(file)
        except yaml.YAMLError as exc:
            print(f"Error parsing YAML file: {exc}")
            data = None
    return data

def load_yaml_ignore_errors(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    valid_lines = []
    for line in lines:
        try:
            yaml.safe_load(line)
            valid_lines.append(line)
        except yaml.YAMLError:
            continue

    valid_content = ''.join(valid_lines)
    try:
        return yaml.safe_load(valid_content)
    except yaml.YAMLError as exc:
        print(f"Error parsing combined valid YAML content: {exc}")
        return None

data = load_yaml_ignore_errors(file_path)
if data:
    print("Parsed YAML data:")
    print(data)
else:
    print("Failed to parse YAML data.")


Parsed YAML data:
{'replicaCount': 1, 'image': {'pullPolicy': 'IfNotPresent'}, 'ports': [{'name': 'http', 'containerPort': 8080, 'protocol': 'TCP'}], 'annotations': {'prometheus.io/scrape': 'true', 'prometheus.io/path': '/metrics', 'prometheus.io/port': '8080'}, 'readinessProbe': {'tcpSocket': {'port': 8080}, 'initialDelaySeconds': 5, 'periodSeconds': 10}, 'livenessProbe': {'tcpSocket': {'port': 8080}, 'initialDelaySeconds': 15, 'periodSeconds': 20}, 'imagePullSecrets': [{'name': 'gitlab-registry-secret'}, {'name': 'harbor-registry-secret'}], 'resources': {'requests': {'memory': '100Mi', 'cpu': '50m'}, 'limits': {'memory': '200Mi', 'cpu': '100m'}}, 'service': {'type': 'NodePort', 'externalTrafficPolicy': 'Local', 'ports': [{'name': 'http', 'port': 80, 'protocol': 'TCP', 'targetPort': 8080}, {'protocol': 'TCP', 'name': 'https', 'port': 443, 'targetPort': 443}]}, 'env': {'DEBUG': 'true', 'DEFAULT_AGREEMENT_ID': 1, 'DEFAULT_EXECUTOR_STATUS_ID': 1, 'ADMINKA_API_URL': 'http://admin-api.allj

In [61]:
file_path = '/Users/valuamba/wb/executors-api/deploy/common/base-values.yml'


import os
import re
import yaml
from neo4j import GraphDatabase

def load_yaml_with_error_handling(file_path):
    with open(file_path, 'r') as file:
        try:
            data = yaml.safe_load(file)
        except yaml.YAMLError as exc:
            print(f"Error parsing YAML file: {exc}")
            data = None
    return data

def load_yaml_ignore_errors(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    valid_lines = []
    for line in lines:
        try:
            yaml.safe_load(line)
            valid_lines.append(line)
        except yaml.YAMLError:
            continue

    valid_content = ''.join(valid_lines)
    try:
        return yaml.safe_load(valid_content)
    except yaml.YAMLError as exc:
        print(f"Error parsing combined valid YAML content: {exc}\nPath: {file_path}")
        return None

# Extract environment and vault blocks from values.yml
def extract_values_info(data):
    env_block = {k: v for k, v in data.get('env', {}).items() if is_url(v)}
    vault_block = {k: v for k, v in data.get('vault', {}).items()} if data.get('vault') else None
    return env_block, vault_block

def is_url(value):
    return re.match(r'http[s]?://', value) is not None

def extract_project_and_namespace(url):
    match = re.match(r'http[s]?://([^\.]+)\.([^\.]+)\.svc\.(k8s\..+)', url)
    if match:
        project_name, namespace = match.group(1), match.group(2)
        return project_name, namespace
    return None, None

# Extract project name and dependencies from .gitlab-ci.yml
def extract_gitlab_info(data):
    project_name = data.get('variables', {}).get('CI_TMPL_HELM_RELEASE_NAMES', None)
    registry_project = data.get('variables', {}).get('REGISTRY_PROJECT', None)
    return project_name, registry_project

In [62]:
data = load_yaml_ignore_errors(file_path)

In [None]:
env_block, vault_block = extract_values_info(data)

In [144]:
import os
import re
import yaml
import hvac
from neo4j import GraphDatabase

def preprocess_yaml(file_path, environment, cluster):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    filtered_lines = []
    for line in lines:
        # Ignore commented lines
        if line.strip().startswith('#'):
            continue
        # Ignore lines with problematic variables
        if any(var in line for var in ['{{ .Helm.Release.Namespace }}', '{{.Helm.Release.Image}}', '{{.Helm.Release.Tag}}']):
            continue
        # Replace environment and cluster placeholders
        line = line.replace('{{.Helm.Release.Environment}}', environment).replace('{{.Helm.Release.Cluster}}', cluster)
        filtered_lines.append(line)
    
    return ''.join(filtered_lines)

def load_yaml(file_path, environment, cluster):
    try:
        preprocessed_content = preprocess_yaml(file_path, environment, cluster)
        return yaml.safe_load(preprocessed_content)
    except yaml.YAMLError as exc:
        print(f"Error parsing YAML file at {file_path}: {exc}")
        return None

def load_yaml_ignore_errors(file_path, environment, cluster):
    try:
        preprocessed_content = preprocess_yaml(file_path, environment, cluster)
        return yaml.safe_load(preprocessed_content)
    except yaml.YAMLError as exc:
        print(f"Error parsing combined valid YAML content at {file_path}: {exc}")
        return None

def extract_values_info(data):
    env_block = data.get('env', {})
    vault_block = data.get('vault', None)
    return env_block, vault_block

def extract_project_and_namespace(url):
    match = re.match(r'http[s]?://([^\.]+)\.([^\.]+)\.svc\.(k8s\..+)', url)
    if match:
        project_name, namespace = match.group(1), match.group(2)
        return project_name, namespace
    return None, None

def extract_gitlab_info(data):
    project_name = data.get('variables', {}).get('CI_TMPL_HELM_RELEASE_NAMES', None)
    registry_project = data.get('variables', {}).get('REGISTRY_PROJECT', None)
    return project_name, registry_project

def determine_environment(folder_name):
    if "prod" in folder_name:
        return "prd"
    elif "stage" in folder_name:
        return "stage"
    elif "dev" in folder_name:
        return "dev"
    return None

def fetch_vault_values(vault_block, environment, cluster, vault_url, vault_token):
    # client = hvac.Client(url=vault_url, token=vault_token)
    secrets = {}
    if vault_block and vault_block.get('env'):
        for secret in vault_block.get('env', []):
            for path_template, secret_keys in secret.items():
                path = path_template.replace('{{.Helm.Release.Environment}}', environment).replace('{{.Helm.Release.Cluster}}', cluster)
                # response = client.read(path)
                print(path)
                if response and 'data' in response:
                    for secret_key in secret_keys:
                        secrets[secret_keys[secret_key]] = response['data'].get(secret_keys[secret_key])
    return secrets

def create_graph(session, project_name, env_block, vault_values, environment):
    if project_name:
        session.run("MERGE (p:Project {name: $name})", name=project_name)
    
    if env_block:
        for key, value in env_block.items():
            if isinstance(value, str) and re.match(r'http[s]?://', value):
                ref_project, namespace = extract_project_and_namespace(value)
                if ref_project and namespace:
                    session.run("MERGE (e:Env {key: $key, value: $value})", key=key, value=value)
                    session.run("""
                        MATCH (p:Project {name: $project_name}), (e:Env {key: $key, value: $value})
                        MERGE (p)-[:USES_ENV]->(e)
                    """, project_name=project_name, key=key, value=value)
                    session.run("MERGE (rp:Project {name: $ref_project})", ref_project=ref_project)
                    session.run("MERGE (n:Namespace {name: $namespace})", namespace=namespace)
                    session.run("""
                        MATCH (p:Project {name: $project_name}), (rp:Project {name: $ref_project})
                        MERGE (p)-[:INTERACTS_WITH {namespace: $namespace}]->(rp)
                    """, project_name=project_name, ref_project=ref_project, namespace=namespace)
    
    if vault_values:
        for secret_key, secret_value in vault_values.items():
            if isinstance(secret_value, str) and re.match(r'http[s]?://', secret_value):
                ref_project, namespace = extract_project_and_namespace(secret_value)
                if ref_project and namespace:
                    session.run("MERGE (v:Vault {key: $secret_key, value: $secret_value})", 
                                secret_key=secret_key, secret_value=secret_value)
                    session.run("""
                        MATCH (p:Project {name: $project_name}), (v:Vault {key: $secret_key, value: $secret_value})
                        MERGE (p)-[:USES_VAULT]->(v)
                    """, project_name=project_name, secret_key=secret_key, secret_value=secret_value)
                    session.run("MERGE (rp:Project {name: $ref_project})", ref_project=ref_project)
                    session.run("MERGE (n:Namespace {name: $namespace})", namespace=namespace)
                    session.run("""
                        MATCH (p:Project {name: $project_name}), (rp:Project {name: $ref_project})
                        MERGE (p)-[:INTERACTS_WITH {namespace: $namespace}]->(rp)
                    """, project_name=project_name, ref_project=ref_project, namespace=namespace)

def merge_dicts(dict1, dict2):
    if dict1 is None:
        return dict2
    if dict2 is None:
        return dict1
    result = dict1.copy()
    for key, value in dict2.items():
        if isinstance(value, dict) and key in result:
            result[key] = merge_dicts(result[key], value)
        else:
            result[key] = value
    return result

def process_project(project_dir, session, vault_url, vault_token):
    gitlab_ci_path = os.path.join(project_dir, '.gitlab-ci.yml')
    deploy_dir = os.path.join(project_dir, 'deploy')
    common_values_path = os.path.join(deploy_dir, 'common', 'base-values.yml')
    
    if not os.path.isfile(gitlab_ci_path) or not os.path.isdir(deploy_dir):
        return
    
    environment = determine_environment(deploy_dir)
    cluster = os.path.basename(deploy_dir)

    gitlab_data = load_yaml(gitlab_ci_path, environment, cluster)
    if gitlab_data is None:
        print(f"Skipping project {project_dir} due to invalid .gitlab-ci.yml")
        return

    project_name, registry_project = extract_gitlab_info(gitlab_data)
    
    common_values_data = load_yaml(common_values_path, environment, cluster) if os.path.isfile(common_values_path) else {}

    if common_values_data is None:
        print(f"Error processing {common_values_path}: invalid YAML content")
    
    for root, _, files in os.walk(deploy_dir):
        for file in files:
            try:
                if file.endswith('values.yml'):
                    values_path = os.path.join(root, file)
                    values_data = load_yaml_ignore_errors(values_path, environment, cluster)
                    if values_data:
                        merged_values_data = merge_dicts(common_values_data, values_data)
                        env_block, vault_block = extract_values_info(merged_values_data)
                        
                        vault_values = fetch_vault_values(vault_block, environment, cluster, vault_url, vault_token)
                        create_graph(session, project_name, env_block, vault_values, environment)
                        print(f'Processed: {file}')
            except Exception as exc:
                print(f'Error processing {file}: {exc}')

def main(base_dir, vault_url, vault_token):
    uri = "bolt://localhost:7687"
    driver = GraphDatabase.driver(uri, auth=("alljobswb", "16zomole"))  # Use your credentials
    
    with driver.session(database="alljobswb") as session:
        for project in os.listdir(base_dir):
            project_dir = os.path.join(base_dir, project)
            if os.path.isdir(project_dir):
                process_project(project_dir, session, vault_url, vault_token)
    
    driver.close()


In [None]:
vault_token = 'hvs.CAESIMz_vD1qEobGAkG0lON-YmKsqgQp9y07RHmc3CY2JcKPGh4KHGh2cy5oN0ljT3FQS3NOMThYNEJRUm5acDNQYkk'
vault_url = 'https://vault.wildberries.ru:8200'

main('/Users/valuamba/wb/alljobswb/alljobswb/', vault_url, vault_token)

In [None]:
file_path = '/Users/valuamba/wb/alljobswb/alljobswb/customfields-api/deploy/k8s.stage-xc/values.yml'
file_path = '/Users/valuamba/wb/alljobswb/alljobswb/certificates-api/deploy/common/base-values.yml'


def preprocess_yaml(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    filtered_lines = []
    for line in lines:
        # Ignore commented lines
        if line.strip().startswith('#'):
            continue
        # Ignore lines with problematic variables
        if any(var in line for var in ['{{ .Helm.Release.Namespace }}', '{{.Helm.Release.Image}}', '{{.Helm.Release.Tag}}', '{{.Helm.Release.Environment}}', '{{.Helm.Release.Cluster}}']):
            continue
        filtered_lines.append(line)
    
    return ''.join(filtered_lines)

def load_yaml(file_path):
    try:
        preprocessed_content = preprocess_yaml(file_path)
        return yaml.safe_load(preprocessed_content)
    except yaml.YAMLError as exc:
        print(f"Error parsing YAML file at {file_path}: {exc}")
        return None

print(preprocess_yaml(file_path))

In [74]:
file_path = '/Users/valuamba/wb/executors-api/deploy/common/base-values.yml'
file_path = '/Users/valuamba/wb/alljobswb/alljobswb/certificates-api/deploy/common/base-values.yml'


data = load_yaml_ignore_errors(file_path)
env, vault = extract_values_info(data)

In [90]:
token = 'hvs.CAESIMz_vD1qEobGAkG0lON-YmKsqgQp9y07RHmc3CY2JcKPGh4KHGh2cy5oN0ljT3FQS3NOMThYNEJRUm5acDNQYkk'
client = hvac.Client(url='https://vault.wildberries.ru:8200', token=token)


res = client.is_authenticated()
print("res:", res)


res: True


In [76]:
fetch_vault_values(vault)

{}

In [None]:
https://vault.wildberries.ru:8200/ui/vault/secrets/services/list/alljobswb/stage/secrets/k8s.stage-xc/

In [75]:
vault

{'enabled': True,
 'env': [{'services/alljobswb/{{.Helm.Release.Environment}}/secrets/{{.Helm.Release.Cluster}}/executors-api': {'FNS_SMNPD_TOKEN': 'smnpd_token',
    'WBBALANCE_KEY': 'wbbalance_key',
    'USER_INFO_CLIENT': 'user_info_client_id',
    'USER_INFO_TOKEN': 'user_info_client_token',
    'ADMINKA_API_TOKEN': 'adminka_key',
    'BILLING_TOKEN': 'billing_token'}},
  {'services/alljobswb/{{.Helm.Release.Environment}}/postgres/user': {'PG_USER': 'login',
    'PG_PASSWORD': 'password'}},
  {'services/alljobswb/{{.Helm.Release.Environment}}/astra': {'ASTRA_EDI_DOC_URL': 'edi_doc_url',
    'ASTRA_EDI_LOGIN': 'edi_login',
    'ASTRA_EDI_PASSWORD': 'edi_pass',
    'HR_EMPL_SUPPLIER_URL': 'hr_supplier_url',
    'HR_USER_SUPPLIER_URL': 'hr_user_supplier_url',
    'HR_SUPPLIER_TOKEN': 'hr_supplier_token'}},
  {'services/alljobswb/{{.Helm.Release.Environment}}/nats': {'NATS_URL': 'alljobswb_nats'}},
  {'services/alljobswb/{{.Helm.Release.Environment}}/secrets/{{.Helm.Release.Cluster}}/s

In [None]:
services/alljobswb/{{.Helm.Release.Environment}}/secrets/{{.Helm.Release.Cluster}}/executors-api

In [None]:
@stored
def get_client(path):
    return client.read(path)


get_client('services/alljobswb/stage/secrets/k8s.stage-xc/crm-api')

In [None]:
services/list/alljobswb/stage/secrets/k8s.stage-xc/executors-api

In [52]:
import os
import yaml
import re
import hvac
from neo4j import GraphDatabase


neo4j_uri = "bolt://localhost:7687"
neo4j_user = "neo4j"
neo4j_password = "16zomole"
neo4j_db = 'alljobswbv2'

# Define Vault connection details
vault_url = "https://vault.wildberries.ru:8200"
vault_token = "hvs.CAESIM6thdfIasMeAKS-xQF2TtbZhG1vXAewgAzFGOxnllVnGh4KHGh2cy5qWlVvQ0dmb0N0MVVtazh5TGtmZUVOOXk"

# Initialize Vault client
vault_client = hvac.Client(url=vault_url, token=vault_token)


def determine_environment(folder_name):
    if "prod" in folder_name:
        return "prd"
    elif "stage" in folder_name:
        return "stage"
    elif "dev" in folder_name:
        return "dev"
    return None

def preprocess_yaml(file_path, environment, cluster):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    filtered_lines = []
    for line in lines:
        # Ignore commented lines
        if line.strip().startswith('#'):
            continue
        # Replace environment and cluster variables
        if re.search(r'\{\{\s*\.Helm\.Release\.Environment\s*\}\}', line):
            line = re.sub(r'\{\{\s*\.Helm\.Release\.Environment\s*\}\}', environment, line)
        if re.search(r'\{\{\s*\.Helm\.Release\.Cluster\s*\}\}', line):
            line = re.sub(r'\{\{\s*\.Helm\.Release\.Cluster\s*\}\}', cluster, line)
        # Ignore other variables
        pattern = re.compile(r'\{\{\s*\.Helm\.Release\.(Namespace|Image|Tag)\s*\}\}')
        if pattern.search(line):
            continue
        filtered_lines.append(line)
    
    return ''.join(filtered_lines)

def parse_yaml_file(filepath, environment, cluster):
    preprocessed_content = preprocess_yaml(filepath, environment, cluster)
    return yaml.safe_load(preprocessed_content)

def merge_dicts(dict1, dict2):
    for key, value in dict2.items():
        if key in dict1:
            if isinstance(dict1[key], dict) and isinstance(value, dict):
                merge_dicts(dict1[key], value)
            else:
                dict1[key] = value
        else:
            dict1[key] = value

def extract_values_info(data):
    env_block = data.get('env', {})
    vault_block = data.get('vault', None)
    return env_block, vault_block

def extract_values(folder_path):
    env_cluster_values = {}
    common_base_values_path = os.path.join(folder_path, 'common', 'base-values.yml')

    for subdir, _, files in os.walk(folder_path):
        for file in files:
            if file == 'values.yml':
                environment = determine_environment(subdir)
                cluster = os.path.basename(subdir)
                cluster_values_path = os.path.join(subdir, file)

                # Parse common values if the file exists
                common_values = parse_yaml_file(common_base_values_path, environment, cluster) if os.path.exists(common_base_values_path) else {}
                # Parse cluster-specific values
                cluster_values = parse_yaml_file(cluster_values_path, environment, cluster)
                
                # Extract env and vault blocks from both files
                common_env_block, common_vault_block = extract_values_info(common_values)
                cluster_env_block, cluster_vault_block = extract_values_info(cluster_values)
                
                # Merge env and vault blocks, with cluster-specific values taking precedence
                merged_env_block = common_env_block.copy()
                merge_dicts(merged_env_block, cluster_env_block)
                
                merged_vault_block = common_vault_block if common_vault_block else {}
                if cluster_vault_block:
                    merge_dicts(merged_vault_block, cluster_vault_block)
                
                env_cluster_values[f'{environment}_{cluster}'] = {
                    'vault': merged_vault_block,
                    'env': merged_env_block
                }

    return env_cluster_values

@stored
def get_vault_value(path: str):
    return vault_client.read(path=path)


def fetch_vault_secrets(vault_data):
    secrets = {}
    if vault_data and vault_data['enabled']:
        for vault_entry in vault_data['env']:
            for path, keys in vault_entry.items():
                vault_response = get_vault_value(path=path)
                # print(vault_response)
                vault_secrets = vault_response['data']
                for key, alias in keys.items():
                    if alias in vault_secrets:
                        secrets[key] = vault_secrets[alias]
    return secrets

def extract_project_and_namespace(url):
    # Regex to capture from start of the domain to the second dot
    match = re.match(r'https?://([^\.]+)\.([^\.]+)', url)
    if match:
        return match.group(1), match.group(2)
    return None, None

def create_neo4j_graph(configs):
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
    print(neo4j_db)
    with driver.session(database=neo4j_db) as session:
        for project, values in configs.items():
            environment, cluster = project.split('_')
            vault = values['vault']
            vault_secrets = fetch_vault_secrets(vault)
            if vault and vault['enabled']:
                for secret_key, secret_value in vault_secrets.items():
                            if isinstance(secret_value, str) and re.match(r'http[s]?://', secret_value):
                                # print('pp')
                                ref_project, namespace = extract_project_and_namespace(secret_value)
                                print(secret_value)
                                if ref_project and namespace:
                                    session.run("MERGE (v:Vault {key: $secret_key, value: $secret_value})", 
                                                secret_key=secret_key, secret_value=secret_value)
                                    session.run("""
                                        MATCH (p:Project {name: $project_name}), (v:Vault {key: $secret_key, value: $secret_value})
                                        MERGE (p)-[:USES_VAULT]->(v)
                                    """, project_name=project, secret_key=secret_key, secret_value=secret_value)
                                    session.run("MERGE (rp:Project {name: $ref_project})", ref_project=ref_project)
                                    session.run("MERGE (n:Namespace {name: $namespace})", namespace=namespace)
                                    session.run("""
                                        MATCH (p:Project {name: $project_name}), (rp:Project {name: $ref_project})
                                        MERGE (p)-[:INTERACTS_WITH {namespace: $namespace}]->(rp)
                                    """, project_name=project, ref_project=ref_project, namespace=namespace)


In [53]:
folder_path = '/Users/valuamba/wb/alljobswb/alljobswb/executors-api/deploy'
values = extract_values(folder_path)
create_neo4j_graph(values)
# for key, value in values.items():
#     print(f"{key}: {value}")

alljobswbv2
https://edi-wb-documents.finance.svc.k8s.prod-dl
http://hr-employees.hr.svc.k8s.prod-dl
http://hr-wbusers.hr.svc.k8s.prod-dl
http://user-balance.user-balance-el.wb.ru
https://edi-wb-documents.finance.svc.k8s.prod-dl
http://hr-employees.hr.svc.k8s.prod-dl
http://hr-wbusers.hr.svc.k8s.prod-dl
http://user-balance.user-balance-el.wb.ru
http://edi-wb-documents.finance.svc.k8s.dataline1
http://user-balance.user-balance.svc.k8s.stage-dp
https://edi-wb-documents.finance.svc.k8s.dataline
http://hr-employees.hr.svc.k8s.prod-dl
http://hr-wbusers.hr.svc.k8s.prod-dl
http://user-balance.user-balance-dev.wb.ru


In [None]:
folder_path = 'deploy'
values = extract_values('/Users/valuamba/wb/alljobswb/alljobswb/executors-api/deploy')
# for key, value in values.items():
#     print(f"{key}: {value}")

values

In [None]:
values

In [None]:
parse_yaml_file('/Users/valuamba/wb/alljobswb/alljobswb/certificates-api/deploy/common/base-values.yml', 'prd', 'k8s.stage-xc')

In [None]:
values

## V2

In [86]:
import os
import yaml
import re
import hvac
from neo4j import GraphDatabase
import traceback

neo4j_uri = "bolt://localhost:7687"
neo4j_user = "neo4j"
neo4j_password = "16zomole"
neo4j_db = 'alljobswbv2'

# Define Vault connection details
vault_url = "https://vault.wildberries.ru:8200"
vault_token = "hvs.CAESIM6thdfIasMeAKS-xQF2TtbZhG1vXAewgAzFGOxnllVnGh4KHGh2cy5qWlVvQ0dmb0N0MVVtazh5TGtmZUVOOXk"

# Initialize Vault client
vault_client = hvac.Client(url=vault_url, token=vault_token)

def determine_environment(folder_name):
    if "prod" in folder_name:
        return "prd"
    elif "stage" in folder_name:
        return "stage"
    elif "dev" in folder_name:
        return "dev"
    return None

def preprocess_yaml(file_path, environment, cluster):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    filtered_lines = []
    for line in lines:
        # Ignore commented lines
        if line.strip().startswith('#'):
            continue
        # Replace environment and cluster variables
        if re.search(r'\{\{\s*\.Helm\.Release\.Environment\s*\}\}', line):
            line = re.sub(r'\{\{\s*\.Helm\.Release\.Environment\s*\}\}', environment, line)
        if re.search(r'\{\{\s*\.Helm\.Release\.Cluster\s*\}\}', line):
            line = re.sub(r'\{\{\s*\.Helm\.Release\.Cluster\s*\}\}', cluster, line)
        # Ignore other variables
        pattern = re.compile(r'\{\{\s*\.Helm\.Release\.(Namespace|Image|Tag)\s*\}\}')
        if pattern.search(line):
            continue
        filtered_lines.append(line)
    
    return ''.join(filtered_lines)

def parse_yaml_file(filepath, environment, cluster):
    preprocessed_content = preprocess_yaml(filepath, environment, cluster)
    return yaml.safe_load(preprocessed_content)

def merge_dicts(dict1, dict2):
    for key, value in dict2.items():
        if key in dict1:
            if isinstance(dict1[key], dict) and isinstance(value, dict):
                merge_dicts(dict1[key], value)
            else:
                dict1[key] = value
        else:
            dict1[key] = value

def extract_values_info(data):
    env_block = data.get('env', {})
    vault_block = data.get('vault', None)
    return env_block, vault_block

def extract_values(folder_path):
    env_cluster_values = {}
    common_base_values_path = os.path.join(folder_path, 'common', 'base-values.yml')

    for subdir, _, files in os.walk(folder_path):
        for file in files:
            if file == 'values.yml':
                environment = determine_environment(subdir)
                cluster = os.path.basename(subdir)
                cluster_values_path = os.path.join(subdir, file)

                # Parse common values if the file exists
                common_values = parse_yaml_file(common_base_values_path, environment, cluster) if os.path.exists(common_base_values_path) else {}
                # Parse cluster-specific values
                cluster_values = parse_yaml_file(cluster_values_path, environment, cluster)
                
                # Extract env and vault blocks from both files
                common_env_block, common_vault_block = extract_values_info(common_values)
                cluster_env_block, cluster_vault_block = extract_values_info(cluster_values)
                
                # Merge env and vault blocks, with cluster-specific values taking precedence
                merged_env_block = common_env_block.copy()
                merge_dicts(merged_env_block, cluster_env_block)
                
                merged_vault_block = common_vault_block if common_vault_block else {}
                if cluster_vault_block:
                    merge_dicts(merged_vault_block, cluster_vault_block)
                
                env_cluster_values[f'{environment}_{cluster}'] = {
                    'vault': merged_vault_block,
                    'env': merged_env_block
                }

    return env_cluster_values

@stored
def get_vault_value(path: str):
    return vault_client.read(path=path)

def fetch_vault_secrets(vault_data):
    secrets = {}
    if vault_data and vault_data['enabled']:
        for vault_entry in vault_data['env']:
            for path, keys in vault_entry.items():
                vault_response = get_vault_value(path=path)
                # print(path)
                if vault_response:
                    vault_secrets = vault_response['data']
                    for key, alias in keys.items():
                        if alias in vault_secrets:
                            secrets[key] = vault_secrets[alias]
    return secrets

def extract_project_and_namespace(url):
    match = re.match(r'https?://([^\.]+)\.([^\.]+)', url)
    if match:
        return match.group(1), match.group(2)
    return None, None

def create_neo4j_graph(configs):
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
    with driver.session(database=neo4j_db) as session:
        for project, values in configs.items():
            environment, cluster = project.split('_')
            vault = values['vault']
            vault_secrets = fetch_vault_secrets(vault)
            if vault and vault['enabled']:
                for secret_key, secret_value in vault_secrets.items():
                    if isinstance(secret_value, str) and re.match(r'http[s]?://', secret_value):
                        # print('pp')
                        ref_project, namespace = extract_project_and_namespace(secret_value)
                        if ref_project and namespace:
                            session.run("MERGE (v:Vault {key: $secret_key, value: $secret_value})", 
                                        secret_key=secret_key, secret_value=secret_value)
                            session.run("""
                                MATCH (p:Project {name: $project_name}), (v:Vault {key: $secret_key, value: $secret_value})
                                MERGE (p)-[:USES_VAULT]->(v)
                            """, project_name=project, secret_key=secret_key, secret_value=secret_value)
                            session.run("MERGE (rp:Project {name: $ref_project})", ref_project=ref_project)
                            session.run("MERGE (n:Namespace {name: $namespace})", namespace=namespace)
                            session.run("""
                                MATCH (p:Project {name: $project_name}), (rp:Project {name: $ref_project})
                                MERGE (p)-[:INTERACTS_WITH {namespace: $namespace}]->(rp)
                            """, project_name=project, ref_project=ref_project, namespace=namespace)

def process_project(project_dir, session):
    deploy_dir = os.path.join(project_dir, 'deploy')
    
    if not os.path.isdir(deploy_dir):
        return
    
    values = extract_values(deploy_dir)
    create_neo4j_graph(values)

def main(base_dir):
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
    
    with driver.session(database=neo4j_db) as session:
        for project in os.listdir(base_dir):
            try:
                project_dir = os.path.join(base_dir, project)
                print(project_dir)
                if os.path.isdir(project_dir):
                    process_project(project_dir, session)
            except Exception as exc:
                print(f'Error .\nStack trace:\n{traceback.format_exc()}')
                # print('Error')
    
    driver.close()

# Replace 'path/to/projects' with the actual path to the directory containing your projects
# main('path/to/projects')



In [None]:
group_dicts = [
    '/Users/valuamba/wb/alljobswb/alljobswb',
    # '/Users/valuamba/wb/hr',
    # '/Users/valuamba/wb/hr/hr'
]

for path in group_dicts:
    main(path)

In [None]:
service, environment, cluster, env, value
crm-api, prod, k8s.stage-xc, DB_URL, postgres://sdgdsfgs