In [57]:
import os
import yaml
from collections import defaultdict

In [59]:
root_path = "../k8s_files"

In [2]:
import os
import yaml
import re
from collections import defaultdict


def parse_kubernetes_yaml(yaml_content):
    """
    Parse a Kubernetes YAML file and extract service and deployment attributes.
    """
    try:
        data = yaml.safe_load(yaml_content)
        if not data:
            return {}
    except yaml.YAMLError:
        return {}
    
    parsed_data = defaultdict(list)
    
    if isinstance(data, list):
        for item in data:
            if not isinstance(item, dict):
                continue
            if item.get('kind', '').lower() == 'service':
                ports = [p.get('port', 'UNKNOWN') for p in item.get('spec', {}).get('ports', [])]
                protocols = [p.get('protocol', 'UNKNOWN') for p in item.get('spec', {}).get('ports', [])]
                service_type = item.get('spec', {}).get('type', 'UNKNOWN')
                selector = item.get('spec', {}).get('selector', {}).get('app', 'UNKNOWN')
                parsed_data['Exposed Ports'].extend(ports)
                parsed_data['Protocol'].extend(protocols)
                parsed_data['Service Type'].append(service_type)
                parsed_data['Selector'].append(selector)
            elif item.get('kind', '').lower() == 'deployment':
                replicas = item.get('spec', {}).get('replicas', 'UNKNOWN')
                container_images = [c.get('image', 'UNKNOWN') for c in item.get('spec', {}).get('template', {}).get('spec', {}).get('containers', [])]
                parsed_data['Replicas'].append(replicas)
                parsed_data['Container Images'].extend(container_images)
    elif isinstance(data, dict):
        if data.get('kind', '').lower() == 'service':
            ports = [p.get('port', 'UNKNOWN') for p in data.get('spec', {}).get('ports', [])]
            protocols = [p.get('protocol', 'UNKNOWN') for p in data.get('spec', {}).get('ports', [])]
            service_type = data.get('spec', {}).get('type', 'UNKNOWN')
            selector = data.get('spec', {}).get('selector', {}).get('app', 'UNKNOWN')
            parsed_data['Exposed Ports'].extend(ports)
            parsed_data['Protocol'].extend(protocols)
            parsed_data['Service Type'].append(service_type)
            parsed_data['Selector'].append(selector)
        elif data.get('kind', '').lower() == 'deployment':
            replicas = data.get('spec', {}).get('replicas', 'UNKNOWN')
            container_images = [c.get('image', 'UNKNOWN') for c in data.get('spec', {}).get('template', {}).get('spec', {}).get('containers', [])]
            parsed_data['Replicas'].append(replicas)
            parsed_data['Container Images'].extend(container_images)
    
    return parsed_data


def calculate_precision_recall(generated, manual):
    """
    Calculate precision and recall for service and deployment components.
    """
    results = {}
    all_components = set(generated.keys()).union(set(manual.keys()))

    for component in all_components:
        generated_items = set(generated.get(component, []))
        manual_items = set(manual.get(component, []))

        tp_items = generated_items.intersection(manual_items)
        tp = len(tp_items)
        fp_items = generated_items - manual_items
        fp = len(fp_items)
        fn_items = manual_items - generated_items
        fn = len(fn_items)

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0

        results[component] = {
            'precision': precision,
            'recall': recall,
            'tp': tp,
            'fp': fp,
            'fn': fn,
            'tp_items': list(tp_items),
            'fp_items': list(fp_items),
            'fn_items': list(fn_items)
        }
    return results

def find_kubernetes_files(root_dir):
    """
    Recursively find all service and deployment YAML files along with their MiDKo versions.
    """
    kubernetes_tuples = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.endswith(".yaml"):
                file_path = os.path.join(dirpath, filename)
                midko_path = os.path.join(dirpath, filename.replace('.yaml', '_MiDKo.yaml'))
                if os.path.exists(midko_path):
                    kubernetes_tuples.append((file_path, midko_path))
    return kubernetes_tuples

def process_kubernetes_directory(root_dir):
    """
    Process all Kubernetes YAML files in the root directory and compare them.
    """
    file_results = []
    precision_dict = defaultdict(list)
    recall_dict = defaultdict(list)
    total_tp = 0
    total_fp = 0
    total_fn = 0

    kubernetes_tuples = find_kubernetes_files(root_dir)

    for original_path, midko_path in kubernetes_tuples:
        with open(original_path, 'r') as f:
            original_content = f.read()
        with open(midko_path, 'r') as f:
            midko_content = f.read()
        print("-------------- Original")
        print(original_content)
        print("-------------- MIDKO")
        print(midko_content)
        original_parsed = parse_kubernetes_yaml(original_content)
        midko_parsed = parse_kubernetes_yaml(midko_content)

        results = calculate_precision_recall(midko_parsed, original_parsed)

        file_results.append({
            'original_path': original_path,
            'midko_path': midko_path,
            'results': results
        })

        for component in results:
            precision_dict[component].append(results[component]['precision'])
            recall_dict[component].append(results[component]['recall'])
            total_tp += results[component]['tp']
            total_fp += results[component]['fp']
            total_fn += results[component]['fn']

    overall_precision = {
        component: sum(precision_dict[component]) / len(precision_dict[component])
        for component in precision_dict
    }
    overall_recall = {
        component: sum(recall_dict[component]) / len(recall_dict[component])
        for component in recall_dict
    }
    total_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 1.0
    total_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 1.0

    return (
        file_results,
        overall_precision,
        overall_recall,
        total_precision,
        total_recall
    )

def main(root_dir):
    (
        file_results,
        overall_precision,
        overall_recall,
        total_precision,
        total_recall
    ) = process_kubernetes_directory(root_dir)

    print("Comparison Results for Each File:")
    for result in file_results:
        print(f"File: {result['original_path']}")
        for component in sorted(result['results'].keys()):
            metrics = result['results'][component]
            print(f"  Component: {component}")
            print(f"    True Positives (TP): {metrics['tp']}")
            print(f"    False Positives (FP): {metrics['fp']}")
            print(f"    False Negatives (FN): {metrics['fn']}")
            print(f"    Precision: {metrics['precision']:.2f}")
            print(f"    Recall: {metrics['recall']:.2f}")
        print()


In [None]:
main(root_path)