# Classify Robot Framework Failures into Fix Categories

## Step 1: Load the structured failures JSON file

In [None]:
import json
import pandas as pd

with open("structured_failures.json", "r") as f:
    data = json.load(f)

Add example fix categories manually based on error messages

You can later update or expand these categories

In [None]:
for item in data:
    error = item["error"].lower()
    if "missing" in error and "argument" in error:
        item["fix_category"] = "missing_argument"
    elif "not found" in error or "selector" in error:
        item["fix_category"] = "invalid_selector"
    elif "assert" in error or "should be equal" in error:
        item["fix_category"] = "assertion_failed"
    elif "timeout" in error:
        item["fix_category"] = "timeout"
    elif "connection" in error:
        item["fix_category"] = "connection_error"
    else:
        item["fix_category"] = "other"

print(data)

[{'test_name': 'Access All Users With Admin Rights', 'error': "Parent suite setup failed:\nTypeError: TestObject.__init__() missing 1 required positional argument: 'api_key'", 'doc': 'Tests if all users can be accessed with Admin User.', 'steps': [], 'fix_category': 'missing_argument'}, {'test_name': 'Create User With Admin Rights', 'error': "Parent suite setup failed:\nTypeError: TestObject.__init__() missing 1 required positional argument: 'api_key'", 'doc': 'Tests if a new users can be created with Admin User.', 'steps': [], 'fix_category': 'missing_argument'}, {'test_name': 'Update User with Admin Rights', 'error': "Parent suite setup failed:\nTypeError: TestObject.__init__() missing 1 required positional argument: 'api_key'", 'doc': 'Changes Password of an existing user.', 'steps': [], 'fix_category': 'missing_argument'}, {'test_name': 'Update Own Password With User Rights', 'error': "Parent suite setup failed:\nTypeError: TestObject.__init__() missing 1 required positional argume

Convert to DataFrame

In [45]:
records = []
for item in data:
    msg = f"Error: {item['error']}\n"
    for step in item.get("steps", []):
        msg += f"Step: {step['keyword']}\n"
        msg += f"Args: {' '.join(step['args'])}\n"
        if step.get("doc"):
            msg += f"Doc: {step['doc']}\n"
        if step.get("messages"):
            msg += f"Messages: {' | '.join(step['messages'])}\n"
    records.append({
        "test_name": item["test_name"],
        "log_text": msg,
        "fix_category": item["fix_category"]
    })

#print(records)
print('\033[1m' + "Example log text :" + '\033[0m')
print(records[len(records) - 1])

df = pd.DataFrame(records)
df.head(len(df))

[1mExample log text :[0m
{'test_name': 'Test Case that fails', 'log_text': 'Error: Sorry. But that was the wrong answer... Bye Bye...\nStep: Check Correct Greeting\nArgs: Hail Our Robot Overlords!\nStep: Check Correct Greeting\nArgs: Hello World!\n', 'fix_category': 'other'}


Unnamed: 0,test_name,log_text,fix_category
0,Access All Users With Admin Rights,Error: Parent suite setup failed:\nTypeError: ...,missing_argument
1,Create User With Admin Rights,Error: Parent suite setup failed:\nTypeError: ...,missing_argument
2,Update User with Admin Rights,Error: Parent suite setup failed:\nTypeError: ...,missing_argument
3,Update Own Password With User Rights,Error: Parent suite setup failed:\nTypeError: ...,missing_argument
4,Access Own Details With User Rights,Error: Parent suite setup failed:\nTypeError: ...,missing_argument
5,Access Other Users Details With User Rights,Error: Parent suite setup failed:\nTypeError: ...,missing_argument
6,List Variables unpacked,Error: Keyword 'BuiltIn.Log' expected 1 to 6 a...,other
7,Test for the year 2022,Error: 2025 != 2022\nStep: Get Current Date\nA...,other
8,Test Case that fails,Error: Sorry. But that was the wrong answer......,other


## Step 2: Embed the logs using TF-IDF

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=500, stop_words="english")
X = vectorizer.fit_transform(df["log_text"])
y = df["fix_category"]

## Step 3: Train/test split + classifier training

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

## Step 4: Evaluate

In [None]:
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


Classification Report:

                  precision    recall  f1-score   support

missing_argument       1.00      1.00      1.00         1
           other       1.00      1.00      1.00         1

        accuracy                           1.00         2
       macro avg       1.00      1.00      1.00         2
    weighted avg       1.00      1.00      1.00         2


Confusion Matrix:

[[1 0]
 [0 1]]


## Step 5: Predict new failure

In [None]:
new_data = {
    "test_name": "Connect without API key",
    "error": "TypeError: TestObject.__init__() missing 1 required positional argument: 'api_key'",
    "doc": "Attempts to connect to the server without providing API key.",
    "steps": [
        {
            "keyword": "Connect",
            "args": ["http://localhost"],
            "status": "FAIL",
            "depth": 0,
            "doc": "Connects to backend server using TestObject",
            "messages": ["Connecting to http://localhost", "Exception raised: missing 'api_key'"]
        }
    ]
}

log_text = f"Test name: {new_data['test_name']}\n"
log_text += f"Doc: {new_data['doc']}\n"
log_text += f"Error: {new_data['error']}\n"
for step in new_data["steps"]:
    log_text += f"Step: {step['keyword']}\n"
    log_text += f"Args: {' '.join(step['args'])}\n"
    log_text += f"Status: {step['status']}\n"
    if step.get("doc"):
        log_text += f"Doc: {step['doc']}\n"
    if step.get("messages"):
        log_text += f"Messages: {' | '.join(step['messages'])}\n"

print('\033[1m' + "New log text :\n" + '\033[0m' + log_text)

new_vec = vectorizer.transform([log_text])
pred = clf.predict(new_vec)
print("Prediction:", pred[0])

[1mNew log text :
[0mTest name: Connect without API key
Doc: Attempts to connect to the server without providing API key.
Error: TypeError: TestObject.__init__() missing 1 required positional argument: 'api_key'
Step: Connect
Args: http://localhost
Status: FAIL
Doc: Connects to backend server using TestObject
Messages: Connecting to http://localhost | Exception raised: missing 'api_key'

Prediction: missing_argument
