In [2]:
import os
import json

DEBUG = False

In [3]:
def check_k8sgpt(k8sgpt_file):
    with open(k8sgpt_file, "r") as f:
        try:
            k8sgpt_data = json.load(f)
            if k8sgpt_data["results"]:
                for result in k8sgpt_data["results"]:
                    if result.get("kind") == "Service":
                        continue # Skip Services
                    if result.get("kind") == "PersistentVolumeClaim":
                        for error in result.get("error", []):
                            print(f"PVC error: {error['Text']} in {k8sgpt_file}")
                            return "PVC_failed"
                    if result.get("kind") == "Pod":
                        if "mysql" in result.get("parentObject"):
                            for error in result.get("error", []):
                                if "Back-off pulling image" in error["Text"]:
                                    # raise Exception
                                    raise Exception(f"Back-off pulling image in {k8sgpt_file}")
                                elif "PersistentVolumeClaims" in error["Text"]:
                                    print(f"MySQL pod has unbound immediate PersistentVolumeClaims in {k8sgpt_file}")
                                    return "MySQL_PVC_unbound"
                                elif "Readiness probe failed" in error["Text"]:
                                    print(f"MySQL Readiness probe failed in {k8sgpt_file}")
                                    return "MySQL_not_ready"
                                else:
                                    print(f"MySQL unknown error in {k8sgpt_file}")
                                    return "Unknown_error"
                        else:
                            for error in result.get("error", []):
                                if "Back-off pulling image" in error["Text"]:
                                    # raise Exception
                                    raise Exception(f"Back-off pulling image in {k8sgpt_file}")
                                elif "PersistentVolumeClaims" in error["Text"]:
                                    print(f"Wordpress pod has unbound immediate PersistentVolumeClaims in {k8sgpt_file}")
                                    return "Wordpress_PVC_unbound"
                                elif "Readiness probe failed" in error["Text"]:
                                    print(f"Wordpress Readiness probe failed in {k8sgpt_file}")
                                    return "Wordpress_not_ready"
                                else:
                                    print(f"Wordpress unknown error in {k8sgpt_file}")
                                    return "Unknown_error"
                    else:
                        kind = result.get("kind")
                        for error in result.get("error", []):
                            #raise Exception(f"{kind} error: {error['Text']} in {k8sgpt_file}")
                            print(f"{kind} error: {error['Text']} in {k8sgpt_file}")
                            return "Unknown_error"
        except json.JSONDecodeError:
            raise Exception(f"Error: Failed to parse JSON in {k8sgpt_file}")

In [4]:
import os  
import json
from collections import defaultdict

def aggregate_test_results(base_directory):

    aggregated_results = {
        "total_responses": 0,
        "secrets_used": 0,
        "base64_encoding_needed": 0,
        "invalid_yaml": 0,
        "kubeconform_failed": 0,
        "no_mysql_or_wordpress": 0,
        "deployment_failed": 0,
        "duplicate_resources": 0,
        "healthy": 0,
        "unhealthy": 0,
        "PVC_failed": 0,
        "MySQL_PVC_unbound": 0,
        "MySQL_not_ready": 0,
        "Wordpress_PVC_unbound": 0,
        "Wordpress_not_ready": 0,
        "Unknown_error": 0
    }
    aggregate_test_results
    scores_healthy = []
    
    # Iterate over all directories in the base directory
    for response_dir in os.listdir(base_directory):
        response_path = os.path.join(base_directory, response_dir)
        
        # Check if it's a directory and matches the response-* pattern
        if os.path.isdir(response_path) and response_dir.startswith("response-"):
            testing_file = os.path.join(response_path, "testing.json")
            
            # Check if the testing.json file exists
            if os.path.exists(testing_file):
                with open(testing_file, "r") as f:
                    try:
                        testing_data = json.load(f)

                        # Aggregate results  
                        aggregated_results["total_responses"] += 1

                        # Get extra information
                        if testing_data.get("secrets_found"):
                            aggregated_results["secrets_used"] += 1
                            print(f"secrets used in {response_path}")

                        if testing_data.get("base64_needed"):
                            aggregated_results["base64_encoding_needed"] += 1

                        if not testing_data.get("valid_yaml"):
                            aggregated_results["invalid_yaml"] += 1
                        
                        elif not testing_data.get("kubeconform"):
                            aggregated_results["kubeconform_failed"] += 1
                            kubeconform_file = os.path.join(response_path, "conform.json")
                            if not os.path.exists(kubeconform_file):
                                raise Exception(f"conform.json not found in {response_path}")

                        elif not testing_data.get("mysql_found") or not testing_data.get("wordpress_found"):
                            aggregated_results["no_mysql_or_wordpress"] += 1

                        elif not testing_data.get("deployed_successful"):
                            aggregated_results["deployment_failed"] += 1

                            for deployment_error in testing_data.get("deploy_errors"):
                                print(f"Deployment failed for {response_path}, error: {deployment_error}")
                                if "AlreadyExists" in deployment_error:
                                    aggregated_results["duplicate_resources"] += 1
                                    break

                        elif testing_data.get("healthy"):
                            aggregated_results["healthy"] += 1
                            scores_healthy.append(int(testing_data.get("polaris_score")))

                        else:
                            # why did it fail if it was not healthy
                            aggregated_results["unhealthy"] += 1

                            # Check why it failed in the k8sgpt.json file
                            k8sgpt_file = os.path.join(response_path, "k8sgpt.json")
                            if os.path.exists(k8sgpt_file):
                                cause = check_k8sgpt(k8sgpt_file)
                                if cause == "PVC_failed":
                                    aggregated_results["PVC_failed"] += 1
                                elif cause == "MySQL_PVC_unbound":
                                    aggregated_results["MySQL_PVC_unbound"] += 1
                                elif cause == "MySQL_not_ready":
                                    aggregated_results["MySQL_not_ready"] += 1
                                elif cause == "Wordpress_PVC_unbound":
                                    aggregated_results["Wordpress_PVC_unbound"] += 1
                                elif cause == "Wordpress_not_ready":
                                    aggregated_results["Wordpress_not_ready"] += 1
                                elif cause == "Unknown_error":
                                    aggregated_results["Unknown_error"] += 1
                                
                            else:
                                raise Exception(f"k8sgpt.json not found in {response_path}")
                        
                    except json.JSONDecodeError:
                        raise Exception(f"Error: Failed to parse JSON in {testing_file}")
            else:
                raise Exception(f"testing.json not found in {response_path}")
    return aggregated_results, scores_healthy

# GPT-4o: Zero-Shot

In [27]:
base_directory = "./gpt4o/zero_shot/baseline_system_prompt"
gpt4o_zs_baseline_system_prompt_results, gpt4o_zs_baseline_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")  
print(json.dumps(gpt4o_zs_baseline_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4o_zs_baseline_system_prompt_scores}")

Wordpress Readiness probe failed in ./gpt4o/zero_shot/baseline_system_prompt/response-24/k8sgpt.json
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-18
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-20
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-5
Wordpress Readiness probe failed in ./gpt4o/zero_shot/baseline_system_prompt/response-5/k8sgpt.json
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-21
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-46
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-29
Wordpress Readiness probe failed in ./gpt4o/zero_shot/baseline_system_prompt/response-29/k8sgpt.json
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-32
Wordpress Readiness probe failed in ./gpt4o/zero_shot/baseline_system_prompt/response-32/k8sgpt.json
secrets used in ./gpt4o/zero_shot/baseline_system_prompt/response-38
Wordpress Readiness probe failed in ./gpt4o/ze

In [28]:
base_directory = "./gpt4o/zero_shot/baseline_system_prompt_detailed"
gpt4o_zs_baseline_system_prompt_detailed_results, gpt4o_zs_baseline_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")  
print(json.dumps(gpt4o_zs_baseline_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4o_zs_baseline_system_prompt_detailed_scores}")

secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-24
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-18
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-20
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-26
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-22
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-42
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-5
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-36
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-21
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-46
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-29
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed/response-32
secrets used in ./gpt4o/zero_shot/baseline_system_prompt_detailed

In [29]:
base_directory = "./gpt4o/zero_shot/role_system_prompt"
gpt4o_zs_role_system_prompt_results, gpt4o_zs_role_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_zs_role_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4o_zs_role_system_prompt_scores}")

secrets used in ./gpt4o/zero_shot/role_system_prompt/response-24
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_system_prompt/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_system_prompt/response-18/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_system_prompt/response-20
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_system_prompt/response-20/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_system_prompt/response-26
secrets used in ./gpt4o/zero_shot/role_system_prompt/response-42
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_system_prompt/response-42/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_system_prompt/response-36
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_system_prompt/response-21/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_system_prompt/response-46
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_system_prompt/response-46/k8sgpt.json
Wordpress Readiness probe fa

In [30]:
base_directory = "./gpt4o/zero_shot/role_system_prompt_detailed"
gpt4o_zs_role_system_prompt_detailed_results, gpt4o_zs_role_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_zs_role_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4o_zs_role_system_prompt_detailed_scores}")

secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-24
MySQL unknown error in ./gpt4o/zero_shot/role_system_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-18
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-20
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-26
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_system_prompt_detailed/response-26/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-22
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-42
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-5
MySQL pod has unbound immediate PersistentVolumeClaims in ./gpt4o/zero_shot/role_system_prompt_detailed/response-5/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-36
secrets used in ./gpt4o/zero_shot/role_system_prompt_detailed/response-21
secrets u

In [31]:
base_directory = "./gpt4o/zero_shot/role_best_system_prompt"
gpt4o_zs_role_best_system_prompt_results, gpt4o_zs_role_best_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_zs_role_best_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4o_zs_role_best_system_prompt_scores}")

secrets used in ./gpt4o/zero_shot/role_best_system_prompt/response-24
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt/response-24/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_best_system_prompt/response-18
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt/response-18/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_best_system_prompt/response-26
secrets used in ./gpt4o/zero_shot/role_best_system_prompt/response-22
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt/response-22/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_best_system_prompt/response-42
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt/response-42/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_best_system_prompt/response-21
secrets used in ./gpt4o/zero_shot/role_best_system_prompt/response-46
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt/response-46/k8sgpt.json


In [32]:
base_directory = "./gpt4o/zero_shot/role_best_system_prompt_detailed"
gpt4o_zs_role_best_system_prompt_detailed_results, gpt4o_zs_role_best_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_zs_role_best_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4o_zs_role_best_system_prompt_detailed_scores}")

secrets used in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-24
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-18
secrets used in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-20
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-20/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-26
secrets used in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-22
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-22/k8sgpt.json
secrets used in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-5
secrets used in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-21
Wordpress Readiness probe failed in ./gpt4o/zero_shot/role_best_system_prompt_detailed/response-21/k8sgpt.json
secr

# GPT-3.5: Zero-Shot

In [33]:
base_directory = "./gpt3_5/zero_shot/baseline_system_prompt"
gpt3_5_zs_baseline_system_prompt_results, gpt3_5_zs_baseline_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_zs_baseline_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt3_5_zs_baseline_system_prompt_scores}")

Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-18/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-20/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-26/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-42/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-36/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-21/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/response-46/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt/respo

In [34]:
base_directory = "./gpt3_5/zero_shot/baseline_system_prompt_detailed"
gpt3_5_zs_baseline_system_prompt_detailed_results, gpt3_5_zs_baseline_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_zs_baseline_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt3_5_zs_baseline_system_prompt_detailed_scores}")

secrets used in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-24
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-18
secrets used in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-20
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-20/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-26
secrets used in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-22
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-22/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-42
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-42/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/baseline_system_prompt_detailed/response-5
Word

In [35]:
base_directory = "./gpt3_5/zero_shot/role_system_prompt"
gpt3_5_zs_role_system_prompt_results, gpt3_5_zs_role_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_zs_role_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt3_5_zs_role_system_prompt_scores}")

Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt/response-24/k8sgpt.json
PVC error: storageclass.storage.k8s.io "manual" not found in ./gpt3_5/zero_shot/role_system_prompt/response-18/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt/response-20/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt/response-26/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt/response-42/k8sgpt.json
PVC error: storageclass.storage.k8s.io "manual" not found in ./gpt3_5/zero_shot/role_system_prompt/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt/response-36/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt/response-46/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt/response-32/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_pr

In [36]:
base_directory = "./gpt3_5/zero_shot/role_system_prompt_detailed"
gpt3_5_zs_role_system_prompt_detailed_results, gpt3_5_zs_role_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_zs_role_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt3_5_zs_role_system_prompt_detailed_scores}")

secrets used in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-24
MySQL unknown error in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-18
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-18/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-20
Wordpress unknown error in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-20/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-26
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-26/k8sgpt.json
MySQL unknown error in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-22/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-42
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_system_prompt_detailed/response-42/k8sgpt.json
secrets used in ./

In [37]:
base_directory = "./gpt3_5/zero_shot/role_best_system_prompt"
gpt3_5_zs_role_best_system_prompt_results, gpt3_5_zs_role_best_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_zs_role_best_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt3_5_zs_role_best_system_prompt_scores}")

Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt/response-18/k8sgpt.json
Wordpress unknown error in ./gpt3_5/zero_shot/role_best_system_prompt/response-20/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt/response-26/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt/response-22/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt/response-42/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt/response-36/k8sgpt.json
PVC error: storageclass.storage.k8s.io "manual" not found in ./gpt3_5/zero_shot/role_best_system_prompt/response-21/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt/response-46/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/ro

In [39]:
base_directory = "./gpt3_5/zero_shot/role_best_system_prompt_detailed"
gpt3_5_zs_role_best_system_prompt_detailed_results, gpt3_5_zs_role_best_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_zs_role_best_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt3_5_zs_role_best_system_prompt_detailed_scores}")

secrets used in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-24
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-18
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-18/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-20
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-20/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-26
Wordpress Readiness probe failed in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-26/k8sgpt.json
secrets used in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-22
secrets used in ./gpt3_5/zero_shot/role_best_system_prompt_detailed/response-42
MySQL unknown error in ./gpt3_5/zero_shot/role_best_system_prompt_detail

# GPT-4: Zero-Shot

In [40]:
base_directory = "./gpt4/zero_shot/baseline_system_prompt"
gpt4_zs_baseline_system_prompt_results, gpt4_zs_baseline_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_zs_baseline_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4_zs_baseline_system_prompt_scores}")

MySQL unknown error in ./gpt4/zero_shot/baseline_system_prompt/response-24/k8sgpt.json
MySQL unknown error in ./gpt4/zero_shot/baseline_system_prompt/response-18/k8sgpt.json
secrets used in ./gpt4/zero_shot/baseline_system_prompt/response-20
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt/response-20/k8sgpt.json
secrets used in ./gpt4/zero_shot/baseline_system_prompt/response-26
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt/response-26/k8sgpt.json
secrets used in ./gpt4/zero_shot/baseline_system_prompt/response-22
secrets used in ./gpt4/zero_shot/baseline_system_prompt/response-42
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt/response-42/k8sgpt.json
secrets used in ./gpt4/zero_shot/baseline_system_prompt/response-5
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt/response-36/

In [47]:
base_directory = "./gpt4/zero_shot/baseline_system_prompt_detailed"
gpt4_zs_baseline_system_prompt_detailed_results, gpt4_zs_baseline_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_zs_baseline_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4_zs_baseline_system_prompt_detailed_scores}")

secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-24
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-18
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-18/k8sgpt.json
secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-20
secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-26
secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-22
secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-42
Wordpress Readiness probe failed in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-42/k8sgpt.json
secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-5
secrets used in ./gpt4/zero_shot/baseline_system_prompt_detailed/response-36
Wordpress unknown error in ./gpt4/zero_shot/baseline_syste

In [56]:
base_directory = "./gpt4/zero_shot/role_system_prompt"
gpt4_zs_role_system_prompt_results, gpt4_zs_role_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_zs_role_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4_zs_role_system_prompt_scores}")

MySQL unknown error in ./gpt4/zero_shot/role_system_prompt/response-24/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt/response-18
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt/response-18/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt/response-26
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt/response-26/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt/response-22
secrets used in ./gpt4/zero_shot/role_system_prompt/response-42
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt/response-42/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt/response-5/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt/response-36
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt/response-36/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt/response-21
MySQL unknown error in ./gpt4/zero_shot/role_system_pr

In [51]:
base_directory = "./gpt4/zero_shot/role_system_prompt_detailed"
gpt4_zs_role_system_prompt_detailed_results, gpt4_zs_role_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_zs_role_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4_zs_role_system_prompt_detailed_scores}")

secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-24
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-18
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-20
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt_detailed/response-20/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-26
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt_detailed/response-26/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-22
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-42
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-5
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_system_prompt_detailed/response-5/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-36
secrets used in ./gpt4/zero_shot/role_system_prompt_detailed/response-21
secrets used in ./gpt4/zero_sh

In [57]:
base_directory = "./gpt4/zero_shot/role_best_system_prompt"
gpt4_zs_role_best_system_prompt_results, gpt4_zs_role_best_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_zs_role_best_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4_zs_role_best_system_prompt_scores}")

secrets used in ./gpt4/zero_shot/role_best_system_prompt/response-24
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_best_system_prompt/response-24/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_best_system_prompt/response-18
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_best_system_prompt/response-18/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_best_system_prompt/response-20
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_best_system_prompt/response-20/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_best_system_prompt/response-26
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_best_system_prompt/response-26/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_best_system_prompt/response-42
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_best_system_prompt/response-42/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_best_system_prompt/response-5
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_best_system_prompt/respo

In [58]:
base_directory = "./gpt4/zero_shot/role_best_system_prompt_detailed"
gpt4_zs_role_best_system_prompt_detailed_results, gpt4_zs_role_best_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_zs_role_best_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4_zs_role_best_system_prompt_detailed_scores}")

secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-24
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-18
Wordpress Readiness probe failed in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-18/k8sgpt.json
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-20
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-26
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-22
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-42
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-5
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-36
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-21
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-46
secrets used in ./gpt4/zero_shot/role_best_system_prompt_detailed/response-29
secrets used in ./gpt4/zero_shot/

# GPT-4o: CoT

In [59]:
base_directory = "./gpt4o/cot/human_prompt"
gpt4o_cot_human_prompt_results, gpt4o_cot_human_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_cot_human_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4o_cot_human_prompt_scores}")

Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt/response-26/k8sgpt.json
secrets used in ./gpt4o/cot/human_prompt/response-5
secrets used in ./gpt4o/cot/human_prompt/response-29
secrets used in ./gpt4o/cot/human_prompt/response-28
secrets used in ./gpt4o/cot/human_prompt/response-23
Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt/response-23/k8sgpt.json
secrets used in ./gpt4o/cot/human_prompt/response-43
secrets used in ./gpt4o/cot/human_prompt/response-33
secrets used in ./gpt4o/cot/human_prompt/response-4
Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt/response-4/k8sgpt.json
secrets used in ./gpt4o/cot/human_prompt/response-15
secrets used in ./gpt4o/cot/human_prompt/response-40
secrets used in ./gpt4o/cot/human_prompt/response-41
secrets used in ./gpt4o/cot/human_prompt/response-2
secrets used in ./gpt4o/cot/human_prompt/response-39
Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt/response-39/k8sgpt.json
secrets used in ./gpt4o/cot/

In [64]:
base_directory = "./gpt4o/cot/human_prompt_detailed"
gpt4o_cot_human_prompt_detailed_results, gpt4o_cot_human_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_cot_human_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4o_cot_human_prompt_detailed_scores}")

secrets used in ./gpt4o/cot/human_prompt_detailed/response-42
Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt_detailed/response-42/k8sgpt.json
secrets used in ./gpt4o/cot/human_prompt_detailed/response-46
Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt_detailed/response-46/k8sgpt.json
secrets used in ./gpt4o/cot/human_prompt_detailed/response-32
secrets used in ./gpt4o/cot/human_prompt_detailed/response-38
PVC error: storageclass.storage.k8s.io "manual" not found in ./gpt4o/cot/human_prompt_detailed/response-38/k8sgpt.json
secrets used in ./gpt4o/cot/human_prompt_detailed/response-6
secrets used in ./gpt4o/cot/human_prompt_detailed/response-11
Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt_detailed/response-11/k8sgpt.json
secrets used in ./gpt4o/cot/human_prompt_detailed/response-0
secrets used in ./gpt4o/cot/human_prompt_detailed/response-45
Wordpress Readiness probe failed in ./gpt4o/cot/human_prompt_detailed/response-45/k8sgpt.json
secrets use

In [62]:
base_directory = "./gpt4o/cot/ape_prompt"
gpt4o_cot_ape_prompt_results, gpt4o_cot_ape_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_cot_ape_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4o_cot_ape_prompt_scores}")

Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt/response-26/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt/response-22
secrets used in ./gpt4o/cot/ape_prompt/response-36
secrets used in ./gpt4o/cot/ape_prompt/response-21
secrets used in ./gpt4o/cot/ape_prompt/response-46
secrets used in ./gpt4o/cot/ape_prompt/response-32
Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt/response-32/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt/response-38
secrets used in ./gpt4o/cot/ape_prompt/response-11
Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt/response-11/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt/response-0
secrets used in ./gpt4o/cot/ape_prompt/response-45
Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt/response-45/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt/response-27
secrets used in ./gpt4o/cot/ape_prompt/response-19
secrets used in ./gpt4o/c

In [63]:
base_directory = "./gpt4o/cot/ape_prompt_detailed"
gpt4o_cot_ape_prompt_detailed_results, gpt4o_cot_ape_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_cot_ape_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4o_cot_ape_prompt_detailed_scores}")

secrets used in ./gpt4o/cot/ape_prompt_detailed/response-24
Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-18
Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt_detailed/response-18/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-20
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-26
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-22
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-5
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-36
MySQL pod has unbound immediate PersistentVolumeClaims in ./gpt4o/cot/ape_prompt_detailed/response-36/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-46
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-29
Wordpress Readiness probe failed in ./gpt4o/cot/ape_prompt_detailed/response-29/k8sgpt.json
secrets used in ./gpt4o/cot/ape_prompt_detailed/response-38
secrets use

# GPT-4: CoT

In [72]:
base_directory = "./gpt4/cot/human_prompt"
gpt4_cot_human_prompt_results, gpt4_cot_human_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_cot_human_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4_cot_human_prompt_scores}")

secrets used in ./gpt4/cot/human_prompt/response-18
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt/response-18/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt/response-20
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt/response-20/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt/response-26
secrets used in ./gpt4/cot/human_prompt/response-22
secrets used in ./gpt4/cot/human_prompt/response-42
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt/response-42/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt/response-5
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt/response-5/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt/response-36
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt/response-36/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt/response-46
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt/response-46/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt/response-29
secrets used in ./gpt4/cot/hum

In [73]:
base_directory = "./gpt4/cot/human_prompt_detailed"
gpt4_cot_human_prompt_detailed_results, gpt4_cot_human_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_cot_human_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4_cot_human_prompt_detailed_scores}")

secrets used in ./gpt4/cot/human_prompt_detailed/response-24
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt_detailed/response-24/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt_detailed/response-18
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt_detailed/response-18/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt_detailed/response-26
secrets used in ./gpt4/cot/human_prompt_detailed/response-22
Wordpress Readiness probe failed in ./gpt4/cot/human_prompt_detailed/response-22/k8sgpt.json
secrets used in ./gpt4/cot/human_prompt_detailed/response-42
secrets used in ./gpt4/cot/human_prompt_detailed/response-5
secrets used in ./gpt4/cot/human_prompt_detailed/response-36
secrets used in ./gpt4/cot/human_prompt_detailed/response-21
secrets used in ./gpt4/cot/human_prompt_detailed/response-46
secrets used in ./gpt4/cot/human_prompt_detailed/response-29
secrets used in ./gpt4/cot/human_prompt_detailed/response-32
Wordpress Readiness probe failed in ./gpt4/cot/huma

In [76]:
base_directory = "./gpt4/cot/ape_prompt"
gpt4_cot_ape_prompt_results, gpt4_cot_ape_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_cot_ape_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4_cot_ape_prompt_scores}")

secrets used in ./gpt4/cot/ape_prompt/response-24
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt/response-24/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt/response-18
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt/response-18/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt/response-20
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt/response-20/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt/response-26
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt/response-26/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt/response-42
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt/response-42/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt/response-46
MySQL unknown error in ./gpt4/cot/ape_prompt/response-46/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt/response-32
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt/response-32/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt/response-11
Wordpress Readiness probe failed in ./g

In [77]:
base_directory = "./gpt4/cot/ape_prompt_detailed"
gpt4_cot_ape_prompt_detailed_results, gpt4_cot_ape_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_cot_ape_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4_cot_ape_prompt_detailed_scores}")

secrets used in ./gpt4/cot/ape_prompt_detailed/response-24
secrets used in ./gpt4/cot/ape_prompt_detailed/response-18
secrets used in ./gpt4/cot/ape_prompt_detailed/response-20
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt_detailed/response-20/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt_detailed/response-26
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt_detailed/response-26/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt_detailed/response-22
secrets used in ./gpt4/cot/ape_prompt_detailed/response-42
MySQL unknown error in ./gpt4/cot/ape_prompt_detailed/response-42/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt_detailed/response-5
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt_detailed/response-5/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt_detailed/response-36
Wordpress Readiness probe failed in ./gpt4/cot/ape_prompt_detailed/response-36/k8sgpt.json
secrets used in ./gpt4/cot/ape_prompt_detailed/response-21
secrets used in ./gpt4/cot/ap

# GPT-3.5: CoT TODO run tests

In [None]:
base_directory = "./gpt3_5/cot/human_prompt"
gpt3_5_cot_human_prompt_results, gpt3_5_cot_human_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_cot_human_prompt_results, indent=4))
print(f"Polaris Scores: {gpt3_5_cot_human_prompt_scores}")

In [None]:
base_directory = "./gpt3_5/cot/human_prompt_detailed"
gpt3_5_cot_human_prompt_detailed_results, gpt3_5_cot_human_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_cot_human_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt3_5_cot_human_prompt_detailed_scores}")

In [None]:
base_directory = "./gpt3_5/cot/ape_prompt"
gpt3_5_cot_ape_prompt_results, gpt3_5_cot_ape_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_cot_ape_prompt_results, indent=4))
print(f"Polaris Scores: {gpt3_5_cot_ape_prompt_scores}")

In [None]:
base_directory = "./gpt3_5/cot/ape_prompt_detailed"
gpt3_5_cot_ape_prompt_detailed_results, gpt3_5_cot_ape_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_cot_ape_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt3_5_cot_ape_prompt_detailed_scores}")

# GPT-4o: ToT

In [130]:
base_directory = "./gpt4o/tot/tot_prompt_1"
gpt4o_tot_prompt_1_results, gpt4o_tot_prompt_1_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_tot_prompt_1_results, indent=4))
print(f"Polaris Scores: {gpt4o_tot_prompt_1_scores}")


secrets used in ./gpt4o/tot/tot_prompt_1/response-24
secrets used in ./gpt4o/tot/tot_prompt_1/response-18
secrets used in ./gpt4o/tot/tot_prompt_1/response-20
secrets used in ./gpt4o/tot/tot_prompt_1/response-22
secrets used in ./gpt4o/tot/tot_prompt_1/response-5
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_1/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_1/response-36/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_1/response-32
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_1/response-32/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_1/response-11
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_1/response-11/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_1/response-0
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_1/response-0/k8sgpt.json
PVC error: storageclass.storage.k8s.io "manual" not found in ./gpt4o/tot/tot_prompt_1/response-45/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4o/t

In [131]:
base_directory = "./gpt4o/tot/tot_prompt_2"
gpt4o_tot_prompt_2_results, gpt4o_tot_prompt_2_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_tot_prompt_2_results, indent=4))
print(f"Polaris Scores: {gpt4o_tot_prompt_2_scores}")

secrets used in ./gpt4o/tot/tot_prompt_2/response-24
secrets used in ./gpt4o/tot/tot_prompt_2/response-18
secrets used in ./gpt4o/tot/tot_prompt_2/response-20
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_2/response-20/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_2/response-26
Deployment failed for ./gpt4o/tot/tot_prompt_2/response-22, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"deployments.apps \"wordpress\" already exists","reason":"AlreadyExists","details":{"name":"wordpress","group":"apps","kind":"deployments"},"code":409}

secrets used in ./gpt4o/tot/tot_prompt_2/response-42
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_2/response-42/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_2/response-5
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_2/response-5/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_2/response-36
MySQL unknown error in ./gpt4o/tot/tot_prompt_2/response-36/k8sgpt.json
secret

In [132]:
base_directory = "./gpt4o/tot/tot_prompt_3"
gpt4o_tot_prompt_3_results, gpt4o_tot_prompt_3_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_tot_prompt_3_results, indent=4))
print(f"Polaris Scores: {gpt4o_tot_prompt_3_scores}")

Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_3/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_3/response-18/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_3/response-20
secrets used in ./gpt4o/tot/tot_prompt_3/response-22
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_3/response-22/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_3/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_3/response-29/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_3/response-32
secrets used in ./gpt4o/tot/tot_prompt_3/response-38
secrets used in ./gpt4o/tot/tot_prompt_3/response-11
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_3/response-0/k8sgpt.json
secrets used in ./gpt4o/tot/tot_prompt_3/response-45
secrets used in ./gpt4o/tot/tot_prompt_3/response-49
secrets used in ./gpt4o/tot/tot_prompt_3/response-7
Wordpress Readiness probe failed in ./gpt4o/tot/tot_prompt_3/response

In [133]:
base_directory = "./gpt4o/tot/tot_detailed_prompt_1"
gpt4o_tot_detailed_prompt_1_results, gpt4o_tot_detailed_prompt_1_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_tot_detailed_prompt_1_results, indent=4))
print(f"Polaris Scores: {gpt4o_tot_detailed_prompt_1_scores}")

secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-24
secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-18
secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-26
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_1/response-26/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-42
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_1/response-42/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-5
MySQL unknown error in ./gpt4o/tot/tot_detailed_prompt_1/response-5/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-36
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_1/response-36/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-21
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_1/response-21/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_1/response-46
Wordpress Readiness probe failed in ./gpt4o/tot/t

In [134]:
base_directory = "./gpt4o/tot/tot_detailed_prompt_2"
gpt4o_tot_detailed_prompt_2_results, gpt4o_tot_detailed_prompt_2_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_tot_detailed_prompt_2_results, indent=4))
print(f"Polaris Scores: {gpt4o_tot_detailed_prompt_2_scores}")

secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-24
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-18
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_2/response-18/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-20
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-26
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-22
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-42
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_2/response-42/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-5
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-36
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-21
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_2/response-21/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-46
secrets used in ./gpt4o/tot/tot_detailed_prompt_2/response-29
secrets used in ./gpt4o/tot/tot_detai

In [135]:
base_directory = "./gpt4o/tot/tot_detailed_prompt_3"
gpt4o_tot_detailed_prompt_3_results, gpt4o_tot_detailed_prompt_3_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_tot_detailed_prompt_3_results, indent=4))
print(f"Polaris Scores: {gpt4o_tot_detailed_prompt_3_scores}")

secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-24
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-18
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-20
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-26
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-22
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-42
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_3/response-42/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-5
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-36
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-21
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_3/response-21/k8sgpt.json
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-46
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-29
secrets used in ./gpt4o/tot/tot_detailed_prompt_3/response-32
Wordpress Readiness probe failed in ./gpt4o/tot/tot_detailed_prompt_3

# GPT-4: ToT

In [136]:
base_directory = "./gpt4/tot/tot_prompt_1"
gpt4_tot_prompt_1_results, gpt4_tot_prompt_1_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_tot_prompt_1_results, indent=4))
print(f"Polaris Scores: {gpt4_tot_prompt_1_scores}")

Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-18/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-26/k8sgpt.json
secrets used in ./gpt4/tot/tot_prompt_1/response-22
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-22/k8sgpt.json
MySQL unknown error in ./gpt4/tot/tot_prompt_1/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-32/k8sgpt.json
MySQL unknown error in ./gpt4/tot/tot_prompt_1/response-38/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-6/k8sgpt.json
secrets used in ./gpt4/tot/tot_prompt_1/response-0
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-0/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_1/response-45/k8sgpt.json
secrets used in ./gpt4/tot/tot_prompt_1/response-28
Wordpress Readiness probe failed i

In [139]:
base_directory = "./gpt4/tot/tot_prompt_2"
gpt4_tot_prompt_2_results, gpt4_tot_prompt_2_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_tot_prompt_2_results, indent=4))
print(f"Polaris Scores: {gpt4_tot_prompt_2_scores}")

Wordpress unknown error in ./gpt4/tot/tot_prompt_2/response-24/k8sgpt.json
Deployment failed for ./gpt4/tot/tot_prompt_2/response-18, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"deployments.apps \"mysql\" already exists","reason":"AlreadyExists","details":{"name":"mysql","group":"apps","kind":"deployments"},"code":409}

secrets used in ./gpt4/tot/tot_prompt_2/response-20
Deployment failed for ./gpt4/tot/tot_prompt_2/response-20, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"deployments.apps \"mysql\" already exists","reason":"AlreadyExists","details":{"name":"mysql","group":"apps","kind":"deployments"},"code":409}

secrets used in ./gpt4/tot/tot_prompt_2/response-26
Deployment failed for ./gpt4/tot/tot_prompt_2/response-26, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"Deployment.apps \"mysql\" is invalid: spec.template.spec.containers[0].volumeMounts[0].name: Not f

In [140]:
base_directory = "./gpt4/tot/tot_prompt_3"
gpt4_tot_prompt_3_results, gpt4_tot_prompt_3_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_tot_prompt_3_results, indent=4))
print(f"Polaris Scores: {gpt4_tot_prompt_3_scores}")

MySQL unknown error in ./gpt4/tot/tot_prompt_3/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_3/response-20/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_3/response-42/k8sgpt.json
secrets used in ./gpt4/tot/tot_prompt_3/response-5
StatefulSet error: create Pod mysql-0 in StatefulSet mysql failed error: Pod "mysql-0" is invalid: spec.containers[0].volumeMounts[0].name: Not found: "mysql-persistent-storage" in ./gpt4/tot/tot_prompt_3/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_3/response-21/k8sgpt.json
secrets used in ./gpt4/tot/tot_prompt_3/response-38
Deployment failed for ./gpt4/tot/tot_prompt_3/response-38, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"services \"wordpress\" already exists","reason":"AlreadyExists","details":{"name":"wordpress","kind":"services"},"code":409}

Wordpress Readiness probe failed in ./gpt4/tot/tot_prompt_3/response-6/k8sg

In [141]:
base_directory = "./gpt4/tot/tot_detailed_prompt_1"
gpt4_tot_detailed_prompt_1_results, gpt4_tot_detailed_prompt_1_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_tot_detailed_prompt_1_results, indent=4))
print(f"Polaris Scores: {gpt4_tot_detailed_prompt_1_scores}")

secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-24
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_1/response-24/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-18
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-26
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-22
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_1/response-22/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-42
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_1/response-42/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-5
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-21
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-46
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_1/response-46/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_1/response-38
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_1/response-38

In [144]:
base_directory = "./gpt4/tot/tot_detailed_prompt_2"
gpt4_tot_detailed_prompt_2_results, gpt4_tot_detailed_prompt_2_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_tot_detailed_prompt_2_results, indent=4))
print(f"Polaris Scores: {gpt4_tot_detailed_prompt_2_scores}")

secrets used in ./gpt4/tot/tot_detailed_prompt_2/response-24
Deployment failed for ./gpt4/tot/tot_detailed_prompt_2/response-24, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"deployments.apps \"wordpress\" already exists","reason":"AlreadyExists","details":{"name":"wordpress","group":"apps","kind":"deployments"},"code":409}

secrets used in ./gpt4/tot/tot_detailed_prompt_2/response-18
Deployment failed for ./gpt4/tot/tot_detailed_prompt_2/response-18, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"secrets \"mysql-secret\" already exists","reason":"AlreadyExists","details":{"name":"mysql-secret","kind":"secrets"},"code":409}

secrets used in ./gpt4/tot/tot_detailed_prompt_2/response-20
MySQL unknown error in ./gpt4/tot/tot_detailed_prompt_2/response-20/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_2/response-26
Deployment failed for ./gpt4/tot/tot_detailed_prompt_2/response-26, error: {"kind":"St

In [145]:
base_directory = "./gpt4/tot/tot_detailed_prompt_3"
gpt4_tot_detailed_prompt_3_results, gpt4_tot_detailed_prompt_3_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_tot_detailed_prompt_3_results, indent=4))
print(f"Polaris Scores: {gpt4_tot_detailed_prompt_3_scores}")

secrets used in ./gpt4/tot/tot_detailed_prompt_3/response-24
MySQL unknown error in ./gpt4/tot/tot_detailed_prompt_3/response-20/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_3/response-21
MySQL unknown error in ./gpt4/tot/tot_detailed_prompt_3/response-21/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_3/response-32
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_3/response-32/k8sgpt.json
MySQL unknown error in ./gpt4/tot/tot_detailed_prompt_3/response-38/k8sgpt.json
MySQL unknown error in ./gpt4/tot/tot_detailed_prompt_3/response-6/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_3/response-11
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_3/response-11/k8sgpt.json
secrets used in ./gpt4/tot/tot_detailed_prompt_3/response-0
secrets used in ./gpt4/tot/tot_detailed_prompt_3/response-28
Wordpress Readiness probe failed in ./gpt4/tot/tot_detailed_prompt_3/response-28/k8sgpt.json
secrets used in ./gpt4/tot/tot_detail

# GPT-3.5: ToT

In [5]:
base_directory = "./gpt3_5/tot/tot_prompt_1"
gpt3_5_tot_prompt_1_results, gpt3_5_tot_prompt_1_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_tot_prompt_1_results, indent=4))
print(f"Polaris Scores: {gpt3_5_tot_prompt_1_scores}")

Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_1/response-20/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_1/response-29/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_1/response-38/k8sgpt.json
MySQL unknown error in ./gpt3_5/tot/tot_prompt_1/response-6/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_1/response-0/k8sgpt.json
Deployment failed for ./gpt3_5/tot/tot_prompt_1/response-45, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"pods \"wordpress-mysql-pod\" already exists","reason":"AlreadyExists","details":{"name":"wordpress-mysql-pod","kind":"pods"},"code":409}

Wordpress unknown error in ./gpt3_5/tot/tot_prompt_1/response-28/k8sgpt.json
Deployment failed for ./gpt3_5/tot/tot_prompt_1/response-49, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"Deployment.apps \"mysql\" is invalid: spec.template.spec.containers[0].volum

In [6]:
base_directory = "./gpt3_5/tot/tot_prompt_2"
gpt3_5_tot_prompt_2_results, gpt3_5_tot_prompt_2_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_tot_prompt_2_results, indent=4))
print(f"Polaris Scores: {gpt3_5_tot_prompt_2_scores}")

Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_2/response-24/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_2/response-18/k8sgpt.json
Wordpress unknown error in ./gpt3_5/tot/tot_prompt_2/response-20/k8sgpt.json
Wordpress unknown error in ./gpt3_5/tot/tot_prompt_2/response-26/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_2/response-5/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_2/response-21/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_2/response-46/k8sgpt.json
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_prompt_2/response-29/k8sgpt.json
Deployment failed for ./gpt3_5/tot/tot_prompt_2/response-38, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"services \"wordpress-service\" already exists","reason":"AlreadyExists","details":{"name":"wordpress-service","kind":"services"},"code":409}

Wordpress Readiness probe failed in ./gpt3

In [7]:
base_directory = "./gpt3_5/tot/tot_prompt_3"
gpt3_5_tot_prompt_3_results, gpt3_5_tot_prompt_3_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_tot_prompt_3_results, indent=4))
print(f"Polaris Scores: {gpt3_5_tot_prompt_3_scores}")

Deployment failed for ./gpt3_5/tot/tot_prompt_3/response-20, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"namespaces \"wordpress\" already exists","reason":"AlreadyExists","details":{"name":"wordpress","kind":"namespaces"},"code":409}

Deployment failed for ./gpt3_5/tot/tot_prompt_3/response-26, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"deployments.apps \"wordpress-mysql\" already exists","reason":"AlreadyExists","details":{"name":"wordpress-mysql","group":"apps","kind":"deployments"},"code":409}

Deployment failed for ./gpt3_5/tot/tot_prompt_3/response-5, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"services \"wordpress\" already exists","reason":"AlreadyExists","details":{"name":"wordpress","kind":"services"},"code":409}

Deployment failed for ./gpt3_5/tot/tot_prompt_3/response-46, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","mes

In [8]:
base_directory = "./gpt3_5/tot/tot_detailed_prompt_1"
gpt3_5_tot_detailed_prompt_1_results, gpt3_5_tot_detailed_prompt_1_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_tot_detailed_prompt_1_results, indent=4))
print(f"Polaris Scores: {gpt3_5_tot_detailed_prompt_1_scores}")

secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-24
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_detailed_prompt_1/response-24/k8sgpt.json
secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-18
secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-20
MySQL unknown error in ./gpt3_5/tot/tot_detailed_prompt_1/response-20/k8sgpt.json
secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-26
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_detailed_prompt_1/response-26/k8sgpt.json
secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-22
secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-42
secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-5
secrets used in ./gpt3_5/tot/tot_detailed_prompt_1/response-36
Deployment failed for ./gpt3_5/tot/tot_detailed_prompt_1/response-36, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"secrets \"mysql-secret\" already exists","reason":"AlreadyExists","de

In [9]:
base_directory = "./gpt3_5/tot/tot_detailed_prompt_2"
gpt3_5_tot_detailed_prompt_2_results, gpt3_5_tot_detailed_prompt_2_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_tot_detailed_prompt_2_results, indent=4))
print(f"Polaris Scores: {gpt3_5_tot_detailed_prompt_2_scores}")

secrets used in ./gpt3_5/tot/tot_detailed_prompt_2/response-24
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_detailed_prompt_2/response-24/k8sgpt.json
secrets used in ./gpt3_5/tot/tot_detailed_prompt_2/response-18
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_detailed_prompt_2/response-18/k8sgpt.json
secrets used in ./gpt3_5/tot/tot_detailed_prompt_2/response-20
Deployment failed for ./gpt3_5/tot/tot_detailed_prompt_2/response-20, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"configmaps \"wordpress-config\" already exists","reason":"AlreadyExists","details":{"name":"wordpress-config","kind":"configmaps"},"code":409}

secrets used in ./gpt3_5/tot/tot_detailed_prompt_2/response-26
Deployment failed for ./gpt3_5/tot/tot_detailed_prompt_2/response-26, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"PersistentVolume \"mysql-pv\" is invalid: spec: Required value: must specify a volume type","reason":"

In [10]:
base_directory = "./gpt3_5/tot/tot_detailed_prompt_3"
gpt3_5_tot_detailed_prompt_3_results, gpt3_5_tot_detailed_prompt_3_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_tot_detailed_prompt_3_results, indent=4))
print(f"Polaris Scores: {gpt3_5_tot_detailed_prompt_3_scores}")

secrets used in ./gpt3_5/tot/tot_detailed_prompt_3/response-24
Wordpress Readiness probe failed in ./gpt3_5/tot/tot_detailed_prompt_3/response-24/k8sgpt.json
MySQL unknown error in ./gpt3_5/tot/tot_detailed_prompt_3/response-22/k8sgpt.json
MySQL unknown error in ./gpt3_5/tot/tot_detailed_prompt_3/response-42/k8sgpt.json
secrets used in ./gpt3_5/tot/tot_detailed_prompt_3/response-5
Deployment failed for ./gpt3_5/tot/tot_detailed_prompt_3/response-5, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"configmaps \"wordpress-config\" already exists","reason":"AlreadyExists","details":{"name":"wordpress-config","kind":"configmaps"},"code":409}

secrets used in ./gpt3_5/tot/tot_detailed_prompt_3/response-36
Deployment failed for ./gpt3_5/tot/tot_detailed_prompt_3/response-36, error: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"services \"wordpress\" already exists","reason":"AlreadyExists","details":{"name":"wordpress","kin

# GPT-4o: Meta TODO

In [None]:
base_directory = "./gpt4o/meta/meta_system_prompt"
gpt4o_meta_system_prompt_results, gpt4o_meta_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_meta_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4o_meta_system_prompt_scores}")

In [None]:
base_directory = "./gpt4o/meta/meta_system_prompt_detailed"
gpt4o_meta_system_prompt_detailed_results, gpt4o_meta_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_meta_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4o_meta_system_prompt_detailed_scores}")

In [None]:
base_directory = "./gpt4o/meta/meta_meta_prompt"
gpt4o_meta_meta_prompt_results, gpt4o_meta_meta_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_meta_meta_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4o_meta_meta_prompt_scores}")

In [None]:
base_directory = "./gpt4o/meta/meta_meta_prompt_detailed"
gpt4o_meta_meta_prompt_detailed_results, gpt4o_meta_meta_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4o_meta_meta_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4o_meta_meta_prompt_detailed_scores}")

# GPT-4: Meta TODO

In [None]:
base_directory = "./gpt4/meta/meta_system_prompt"
gpt4_meta_system_prompt_results, gpt4_meta_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_meta_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4_meta_system_prompt_scores}")

In [None]:
base_directory = "./gpt4/meta/meta_system_prompt_detailed"
gpt4_meta_system_prompt_detailed_results, gpt4_meta_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_meta_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4_meta_system_prompt_detailed_scores}")

In [None]:
base_directory = "./gpt4/meta/meta_meta_prompt"
gpt4_meta_meta_prompt_results, gpt4_meta_meta_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_meta_meta_prompt_results, indent=4))
print(f"Polaris Scores: {gpt4_meta_meta_prompt_scores}")

In [None]:
base_directory = "./gpt4/meta/meta_meta_prompt_detailed"
gpt4_meta_meta_prompt_detailed_results, gpt4_meta_meta_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt4_meta_meta_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt4_meta_meta_prompt_detailed_scores}")

# GPT-3.5: Meta TODO

In [None]:
base_directory = "./gpt3_5/meta/meta_system_prompt"
gpt3_5_meta_system_prompt_results, gpt3_5_meta_system_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_meta_system_prompt_results, indent=4))
print(f"Polaris Scores: {gpt3_5_meta_system_prompt_scores}")

In [None]:
base_directory = "./gpt3_5/meta/meta_system_prompt_detailed"
gpt3_5_meta_system_prompt_detailed_results, gpt3_5_meta_system_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_meta_system_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt3_5_meta_system_prompt_detailed_scores}")

In [None]:
base_directory = "./gpt3_5/meta/meta_meta_prompt"
gpt3_5_meta_meta_prompt_results, gpt3_5_meta_meta_prompt_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_meta_meta_prompt_results, indent=4))
print(f"Polaris Scores: {gpt3_5_meta_meta_prompt_scores}")

In [None]:
base_directory = "./gpt3_5/meta/meta_meta_prompt_detailed"
gpt3_5_meta_meta_prompt_detailed_results, gpt3_5_meta_meta_prompt_detailed_scores = aggregate_test_results(base_directory)

print(f"Aggregated Results: {base_directory}")
print(json.dumps(gpt3_5_meta_meta_prompt_detailed_results, indent=4))
print(f"Polaris Scores: {gpt3_5_meta_meta_prompt_detailed_scores}")