In [1]:
import os
import shutil
import re

from harness_pipeline import HarnessPipeline
from datalogger import DataLogger
from seed_data_mappers import lookup

# Helper functions

In [2]:
def count_files(directory):
    # Count files in the given directory
    file_count = 0
    try:
        # List all entries in the directory
        entries = os.listdir(directory)
        # Iterate through the entries and count only the files
        for entry in entries:
            entry_path = os.path.join(directory, entry)
            # Check if it's a file
            if os.path.isfile(entry_path):
                file_count += 1
    except Exception as e:
        print(f"Error accessing directory: {e}")
        return -1  # Return -1 or some other error indicator if desired

    return file_count

In [3]:
def get_files(directory):
    ret = []
    entries = os.listdir(directory)
    # Iterate through the entries and print only the files (ignore directories)
    for entry in entries:
        entry_path = os.path.join(directory, entry)
        # Check if it's a file and not a directory
        if os.path.isfile(entry_path):
            matches = re.findall(r"atheris_(.*?)_Initial", entry_path)
            ret.append( (entry_path, matches[0]) )
    return ret

# Experiment Setup

In [4]:
corpus_only = True # Set to true to get results for only the seeds without mutation
save_path = './results/'
corpora_path = './corpora/'
directory_path = './drivers/'

# Corpora to test with, must be present in the 'corpora_path'
tests = [ 
          ('gpt-3.5-turbo', 1.0, 'simple'),
          ('gpt-3.5-turbo', 1.0, 'complex'),
          ('gpt-3.5-turbo', 1.0, 'merge'),
          ('gpt-4-turbo-preview', 1.0, 'simple'),
          ('gpt-4-turbo-preview', 1.0, 'complex'),
          ('gpt-4-turbo-preview', 1.0, 'merge'),
          ('claude-3-opus-20240229', 1.0, 'simple'),
          ('claude-3-opus-20240229', 1.0, 'complex'),
          ('claude-3-opus-20240229', 1.0, 'merge'),
          ('claude-instant-1.2', 1.0, 'simple'),
          ('claude-instant-1.2', 1.0, 'complex'),
          ('claude-instant-1.2', 1.0, 'merge'),
          ('gemini-1.0-pro', 1.0, 'simple'),
          ('gemini-1.0-pro', 1.0, 'complex'),
          ('gemini-1.0-pro', 1.0, 'merge')   
        ]

# Run Experiment

Step through each driver, load the appropriate corpora and capture coverage measures.

In [None]:
for filepath, name in get_files(directory_path):
    print(f"\nTesting: {name}\n")
    for model_version, temperature, corpus_name in tests:
        post_fix = ""
        if corpus_only: post_fix = "_only"
        results_file = f"{save_path}{name}_{model_version}_{temperature}_{corpus_name}{post_fix}"
        num_profiling_runs = 100_000
        harness_runs = 5
                
        corpus_location = f"{corpora_path}{name}/{model_version}/{temperature}/{corpus_name}_corpus"
        
        # Skip if results already exist to support restarting
        if os.path.exists(results_file + ".parquet"):
            print(  "Skipping\n")
            continue
        
        if corpus_name == "none": corpus_location = None
        if corpus_only:
            if corpus_location is None: continue
            harness_runs = 1
            num_profiling_runs = count_files(corpus_location)
            print(f"({num_profiling_runs}) {corpus_location}\n")

        # Just using the testing functionality directly
        harness_maker = HarnessPipeline(lookup[name], num_profiling_runs=num_profiling_runs, harness_runs=harness_runs,
                                        model_version=model_version, temperature=temperature, use_docs=False)
        
        avg, diff = harness_maker.test_harness(filepath, corpus_location=corpus_location, finish_df=False)
        print(f"  Runs: {harness_runs}, Steps: {num_profiling_runs}\n")
        print(f"  Model: {model_version}, Temp: {temperature}, Corpus: {corpus_name}\n")
        print(f"  Avgs: {avg:0.2f}, Diff: {diff}\n\n")
        
        DataLogger.create_dataframe(results_file)
print(f"\n>> Done <<\n")


Testing: email.utils.formataddr

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping


Testing: email.utils.parseaddr

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

Skipping

(16) ./corpora/email.utils.parseaddr/gemini-1.0-pro/1.0/complex_corpus

Corpus Name: Corpora/email.utils.parseaddr/gemini-1.0-pro/1.0/complex
Command: conda run -n amira coverage run --source=email ./drivers/atheris_email.utils.parseaddr_Initial_1.py ./corpora/email.utils.parseaddr/gemini-1.0-pro/1.0/complex_corpus -max_len=500 -atheris_runs=16
Coverage: 6.91%
  Runs: 1, Steps: 16

  Model: gemini-1.0-pro, Temp: 1.0, Corpus: complex

  Avgs: 6.91, Diff: 0.0


(136) ./corpora/email.utils.parseaddr/gemini-1.0-pro/1.0/merge_corpus

Corpus Name: Corpora/email.utils.parseaddr/gemini-1.0-pro/1.0/merge
Command: conda run -n amira coverage run --source=