In [None]:
from helpers import *
import time

# The function below is meant to be used with a json that is formatted like testing_data

In [None]:
from transformers import  DebertaV2Tokenizer, AutoModelForSequenceClassification
def process_commits_and_generate_feedback_with_sciper(input_json_path, output_json_path, model_name_or_path, spacy_nlp_name="en_core_web_sm", start=None, end=None):
    """
    Processes commit messages grouped by sciper, generates grades and feedback, and calculates average grades.
    Also calculates and prints total execution time, average time per SCIPER, and per commit.

    Args:
        input_json_path (str): Path to the input JSON file.
        output_json_path (str): Path to save the output JSON file.
        model_name_or_path (str): Hugging Face repo name or local path to model.
        spacy_nlp_name (str, optional): The name of the Spacy model to be used.
        start (int, optional): Start index for sciper processing.
        end (int, optional): End index for sciper processing.

    Returns:
        dict: A dictionary containing sciper IDs, commits with grades, feedback, and average grades.
    """

    commit_data = extract_commit_messages_per_sciper(input_json_path)

    # Slice the sciper IDs if start and end are specified
    sciper_ids = list(commit_data.keys())[start:end] if start is not None and end is not None else list(commit_data.keys())

    output_data = {}

    total_commits = 0
    total_sciper_time = 0

    # Load Spacy model
    nlp = spacy.load(spacy_nlp_name)

    # Load tokenizer and model from HF or local
    tokenizer = DebertaV2Tokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path)
    
    # # Use this piece of code instead of the previous one to use the classic BERT
    # tokenizer = BertTokenizer.from_pretrained(model_path_or_name)
    # model = BertForSequenceClassification.from_pretrained(model_path_or_name)


    # Start timing
    total_start_time = time.time()

    for sciper in tqdm(sciper_ids, desc="Processing SCIPERs"):
        sciper_start_time = time.time()
        
        commits = commit_data[sciper]
        total_grade = 0
        processed_commits = []

        for idx, commit in enumerate(tqdm(commits, desc=f"Processing commits for SCIPER {sciper}", leave=False), start=1):
            commit_message = commit["commit_message"]
            commit_hash = commit["hash"]

            print(f"Grading commit {idx} of {len(commits)} for SCIPER {sciper}")

            grade_results = grade_commit_message(commit_message, nlp, tokenizer, model, no_openai=False)
            description_grade = grade_results[0]
            final_grade = grade_results[1]
            errors = {
                "is_desc_too_long": grade_results[2],
                "is_uppercase": grade_results[3],
                "is_not_imp_verb": grade_results[4],
                "is_perfect_prefix": grade_results[5],
                "is_uppercase_prefix": grade_results[6],
                "is_typo_prefix": grade_results[7],
                "is_uppercase_and_typo_prefix": grade_results[8],
                "is_invalid_prefix": grade_results[9],
                "is_body_meaningful": grade_results[10],
                "is_body_too_long": grade_results[11],
                "is_body_evaluated": grade_results[12],
            }

            feedback = generate_feedback(commit_message, final_grade, description_grade, errors)

            processed_commits.append({
                "hash": commit_hash,
                "commit_message": commit_message,
                "grade": final_grade,
                "feedback": feedback
            })

            total_grade += final_grade
            total_commits += 1

        average_grade = round(total_grade / len(commits), 2) if commits else 0

        output_data[sciper] = {
            "commits": processed_commits,
            "average_grade": average_grade
        }

        sciper_time = time.time() - sciper_start_time
        total_sciper_time += sciper_time

    total_end_time = time.time()
    total_duration = total_end_time - total_start_time

    average_time_per_sciper = total_sciper_time / len(sciper_ids) if sciper_ids else 0
    average_time_per_commit = total_duration / total_commits if total_commits else 0

    with open(output_json_path, "w") as file:
        json.dump(output_data, file, indent=4)

    print("\n--- Time Statistics ---")
    print(f"Total time: {total_duration:.2f} seconds")
    print(f"Average time per SCIPER: {average_time_per_sciper:.2f} seconds")
    print(f"Average time per commit: {average_time_per_commit:.2f} seconds")
    print("-----------------------")

    print("Processing complete. Results saved to:", output_json_path)
    return output_data

# This is an example usage (run it to grade the testing_data (B3 grades))

## **!!! Please keep in mind that this is linked to a personal OpenAI API key, and each run debits money to generate feedback, so use it responsibly when testing) !!!**

### use the *start* and *end* args to grade a limited number of commit messages

In [None]:
# Example usage
input_json_path = "testing_data.json"
output_json_path = "output_grading_compare_deberta_5_7.json"

# Use Hugging Face repo name instead of local folder
model_name_or_path = "albertfares/CommitGraderModelDeBERTa"

# Set your API key here
# openai.api_key = ""

# Process a subset of SCIPERs (from start incl. to end excl.)
results = process_commits_and_generate_feedback_with_sciper(
    input_json_path, 
    output_json_path, 
    model_name_or_path, 
    start=5, 
    end=7
)

# The function below is used to grade a single commit message

In [None]:
from transformers import  DebertaV2Tokenizer, AutoModelForSequenceClassification
def process_single_commit(commit_message, model_path, spacy_nlp_name="en_core_web_sm"):
    """
    Processes a single commit message, grades it, and generates feedback.

    Args:
        commit_message (str): The commit message to process.
        model_path (str): The path to the trained BERT model to be used
        spacy_nlp_name (str, optional): The name of the Spacy model to be used

    Returns:
        dict: A dictionary containing the commit message, grade, and feedback.
    """
    try:

        # Load the chosen spacy model
        nlp = spacy.load(spacy_nlp_name)
        
        # Load the trained BERT model using the model's path
        tokenizer = DebertaV2Tokenizer.from_pretrained(model_path)
        model = AutoModelForSequenceClassification.from_pretrained(model_path)
        
        # Run grading logic
        grade_results = grade_commit_message(commit_message, nlp, tokenizer, model, no_openai=False)
        description_grade = grade_results[0]
        final_grade = grade_results[1]
        errors = {
            "is_desc_too_long": grade_results[2],
            "is_uppercase": grade_results[3],
            "is_not_imp_verb": grade_results[4],
            "is_perfect_prefix": grade_results[5],
            "is_uppercase_prefix": grade_results[6],
            "is_typo_prefix": grade_results[7],
            "is_uppercase_and_typo_prefix": grade_results[8],
            "is_invalid_prefix": grade_results[9],
            "is_body_meaningful": grade_results[10],
            "is_body_too_long": grade_results[11],
            "is_body_evaluated": grade_results[12],
        }

        # Generate feedback
        feedback = generate_feedback(commit_message, final_grade, description_grade, errors)

        # Create result dictionary
        result = {
            "commit_message": commit_message,
            "grade": final_grade,
            "feedback": feedback
        }

        # Print results
        print(f"Commit Message: {commit_message}")
        print(f"Grade: {final_grade}")
        print(f"Feedback: {feedback}")

        return result

    except Exception as e:
        print(f"Error: {e}")
        return {}

### This is an example usage

In [None]:
model_name_or_path = "albertfares/CommitGraderModelDeBERTa"

# openai.api_key = ""

commit_message = "format: runnning ktfmt"


result = process_single_commit(commit_message, model_name_or_path)

### Old BERT version below

In [None]:
def process_single_commit2(commit_message, model_path, spacy_nlp_name="en_core_web_sm"):
    """
    Processes a single commit message, grades it, and generates feedback.

    Args:
        commit_message (str): The commit message to process.
        model_path (str): The path to the trained BERT model to be used
        spacy_nlp_name (str, optional): The name of the Spacy model to be used

    Returns:
        dict: A dictionary containing the commit message, grade, and feedback.
    """
    try:

        # Load the chosen spacy model
        nlp = spacy.load(spacy_nlp_name)
        
        # Load the trained BERT model using the model's path
        tokenizer = BertTokenizer.from_pretrained(model_path)
        model = BertForSequenceClassification.from_pretrained(model_path)
        
        # Run grading logic
        grade_results = grade_commit_message(commit_message, nlp, tokenizer, model, no_openai=False)
        description_grade = grade_results[0]
        final_grade = grade_results[1]
        errors = {
            "is_desc_too_long": grade_results[2],
            "is_uppercase": grade_results[3],
            "is_not_imp_verb": grade_results[4],
            "is_perfect_prefix": grade_results[5],
            "is_uppercase_prefix": grade_results[6],
            "is_typo_prefix": grade_results[7],
            "is_uppercase_and_typo_prefix": grade_results[8],
            "is_invalid_prefix": grade_results[9],
            "is_body_meaningful": grade_results[10],
            "is_body_too_long": grade_results[11],
            "is_body_evaluated": grade_results[12],
        }

        # Generate feedback
        feedback = generate_feedback(commit_message, final_grade, description_grade, errors)

        # Create result dictionary
        result = {
            "commit_message": commit_message,
            "grade": final_grade,
            "feedback": feedback
        }

        # Print results
        print(f"Commit Message: {commit_message}")
        print(f"Grade: {final_grade}")
        print(f"Feedback: {feedback}")

        return result

    except Exception as e:
        print(f"Error: {e}")
        return {}

In [None]:
model_name_or_path = "albertfares/CommitGraderModel"

# openai.api_key = ""

commit_message = "test(map): add ui tests for map"


result = process_single_commit2(commit_message, model_name_or_path)