In [None]:
import re
import os
import nbformat
from nbformat.v4 import new_code_cell
from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig

### Helper functions

In [None]:
def add_to_log(text, log_path):
    os.makedirs(os.path.dirname(log_path), exist_ok=True)
    
    with open(log_path, "a") as f:
        f.write(text + "\n")
        
def extract_python_code(text):
    # Regular expression to match Python code blocks
    match = re.search(r'(?i)```(?:python)?\s*([\s\S]*?)```', text)
    if not match:
        return None

    try:
        # Extract the Python code from the matched block
        return match.group(1).strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
    
def extract_section(file_path, title):
    # Add the markdown header symbol to the title
    title_header = f"# {title}"
    
    # Read the file contents
    with open(file_path, 'r') as file:
        content = file.read()
    
    # Use regex to find the section for the given title
    pattern = rf'{title_header}\s*(.*?)(?=\n#|\Z)'  # Match content between titles
    match = re.search(pattern, content, re.S)
    
    if match:
        return match.group(1).strip()  # Return the matched content
    else:
        return None  # Return None if the title was not found
    
def open_notebook(notebook_path):
    with open(notebook_path, 'r') as f:
        notebook = nbformat.read(f, as_version=4)
    
    # Extract code cells
    code_cells = [cell['source'] for cell in notebook.cells if cell['cell_type'] == 'code']

    # Combine all code into a single script
    notebook_code = "\n".join(code_cells)
    
    return notebook_code

def add_code_cell_to_notebook(notebook_path, generated_code):

    # Load the existing notebook
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)

    # Create a new code cell
    new_cell = new_code_cell(source=generated_code)

    # Insert the new cell at the beginning of the notebook
    notebook.cells.append(new_cell)

    # Save the modified notebook back to the file
    with open(notebook_path, 'w', encoding='utf-8') as f:
        nbformat.write(notebook, f)
        
def setup_environment():
    """Set up the environment variables."""
    try:
        hf_key = os.environ["LLAMA3_KEY"]
    except KeyError:
        print("Please set the environment variable LLAMA3_KEY")
        hf_key = input("Enter your HuggingFace API key: ")
    return hf_key

def load_model(hf_key):
    """Load the LLM model and tokenizer."""
    quant_config = BitsAndBytesConfig(load_in_8bit=True)
    model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
    
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_key)
    model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_key, quantization_config=quant_config, device_map="auto", max_length=8192)
    config = AutoConfig.from_pretrained(model_name, token=hf_key)
    
    return model, tokenizer, config

def generate_response(model, tokenizer, messages, config):
    """Generate a response from the model."""
    model_inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to('cuda')

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    generated_ids = model.generate(
        model_inputs,
        do_sample=True,
        temperature=0.1,
        eos_token_id=terminators,
        pad_token_id=tokenizer.eos_token_id,
        max_length=4500
    )

    print(f"Tokens used: {len(model_inputs[0])} out of {config.max_position_embeddings}")

    response = generated_ids[0][model_inputs.shape[-1]:]
    return tokenizer.decode(response, skip_special_tokens=True)

In [None]:
def execute_notebook(notebook_path, return_last_two=False):
    """
    Execute a Jupyter notebook and return the output of the last cell, 
    or the last two cells if specified.

    Parameters:
    - notebook_path: Path to the notebook file.
    - return_last_two: If True, returns the output of the last two code cells. 
      If False, returns only the output of the last cell.

    Returns:
    - A tuple containing the outputs of the second last and last cells. 
      If return_last_two is False, the second element will be None.
    """
    # Load the notebook
    with open(notebook_path, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)
    
    # Create an ExecutePreprocessor
    ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
    
    # Function to extract output from a cell
    def extract_output(cell):
        outputs = []
        if 'outputs' in cell:
            for output in cell['outputs']:
                if 'text' in output:
                    outputs.append(output['text'])
                elif 'data' in output:
                    if 'text/plain' in output['data']:
                        outputs.append(output['data']['text/plain'])
                    else:
                        outputs.append(str(output['data']))
        return '\n'.join(outputs)
    
    # Execute all cells
    ep.preprocess(nb, {'metadata': nb.metadata})
    
    # Get outputs of the last two code cells
    code_cells = [cell for cell in nb.cells if cell.cell_type == 'code']
    second_last_cell_output = None
    last_cell_output = None

    if len(code_cells) >= 2:
        second_last_cell_output = extract_output(code_cells[-2])
        last_cell_output = extract_output(code_cells[-1])
    elif len(code_cells) == 1:
        last_cell_output = extract_output(code_cells[-1])

    # Display the outputs based on the return_last_two flag
    if return_last_two and second_last_cell_output:
        print("Output of the second last cell:")
        print(second_last_cell_output)
        print("\n")
    elif not return_last_two:
        second_last_cell_output = None  # Do not return the second last cell's output if not needed
    
    print("Output of the last cell:")
    print(last_cell_output)
    print("\n")
    
    return second_last_cell_output, last_cell_output

## 1. Set up the model

This step includes downloading the LLM from Hugging Face and setting up the API key.

You can temporarily add your API key by using Python with os.environ["LLAMA3_KEY"]="YOUR_API_KEY"

WARNING: NEVER SHARE/PUSH YOUR API KEY.

In [None]:
hf_key = setup_environment()
model, tokenizer, config = load_model(hf_key)

## 2.  Set file directories

In [None]:
# Set the CWD to the directory of your repository
os.chdir('/home/gunes/fair/responsible-ai-model-cards/Steps-v3-thesis')

### START - UPDATE THE FOLLOWING VARIABLES ###
case_name = "norauto"
sensitive_attribute = "Male"
fairness_metric = "Equalized Odds Difference"

mitigation_technique = "Exponentiated Gradient"
constraint = "Equalized Odds"
mitigation_method = f"{mitigation_technique} with {constraint} constraint"

### END - UPDATE THE FOLLOWING VARIABLES ###

measurement_code = extract_section("files/fairness-tools/fairlearn-lib.md", fairness_metric)
mitigation_code = extract_section("files/fairness-tools/fairlearn-lib.md", mitigation_technique)

case_path = "files/cases/" + case_name
model_notebook_path = "files/model-generator/" + case_name + "/model.ipynb"
log_path = "files/logs/" + case_name + ".txt"

notebook_code = open_notebook(model_notebook_path)

## 3. Fairness measurement

In [None]:
messages = [
    {"role": "system", "content": f"""You are an expert in Python and the fairlearn library. Your task is to generate code that measures the fairness metric: {fairness_metric} for the model and the sensitive attribute {sensitive_attribute} using the fairlearn library. The generated code will be an addition to the existing model code and will run together seamlessly.
    Follow these guidelines strictly and then generate the code:
    1. Refer to the provided notebook model code for variable names.
    2. Pay attention to function parameters, DataFrame shapes, and test variables.
    3. Pay attention to case sensitivity in the variable names.
    4. Extract the sensitive attribute from the variable that contains the features of the test dataset by using the name of the sensitive attribute.
    5. Make sure to import the necessary methods from the fairlearn library.
    6. Include comments in the generated code to explain each step clearly.
    7. Avoid duplicating completed steps from the model code.
    8. Avoid assuming the order of the features in the DataFrame."""},
    {"role": "user", "content": f"Fairlearn library: {measurement_code}\n\nModel code: {notebook_code}"},
]

In [None]:
result_measurement = generate_response(model, tokenizer, messages, config)
generated_code_measurement = extract_python_code(result_measurement)
print(generated_code_measurement)

## Validate generated code

In [None]:
messages = [
    {"role": "system", "content": f"""You are an intelligent assistant tasked with verifying the correctness of Python code. Your objective is to evaluate the provided generated code that is intended to integrate with the existing model code. Ensure that the generated code aligns perfectly with the model code and correctly utilizes the fairlearn library. 
        Follow these steps for the verification process and then start evaluating the code:
        1. Make sure that sensitive attribute is extracted from the variable that contains the features of the test dataset by using the name of the sensitive attribute. Only setting the sensitive attribute as a variable is not enough.
        2. Pay attention to case sensitivity in the variable names.
        3. Compare variable names and structures in the generated code with those in the existing model code to ensure variable consistency.
        4. Identify and report any hallucinations and assumptions (eg. index value of a feature) in the generated code that do not fit the context of the model code.
        5. Make sure that necessary methods are imported from the fairlearn library.
        6. List any necessary changes to align the generated code with the existing model code. If no update is needed, skip this step.
        
        If any update is needed, provide an updated and corrected version of the generated code. If no update is needed, print the generated code as is."""},
    {"role": "user", "content": f"Here is the existing model code:\n\n{notebook_code} Here is the generated code to be evaluated:\n\n{generated_code_measurement}"},
]

In [None]:
response_validation_measurement = generate_response(model, tokenizer, messages, config)
validated_code_measurement = extract_python_code(response_validation_measurement)
print(validated_code_measurement)

## 3.1 Update the code

In [None]:
add_code_cell_to_notebook(model_notebook_path, validated_code_measurement)

## 3.2 Execute the updated code

In [None]:
measurement_last_output = execute_notebook(model_notebook_path)
print(measurement_last_output[1])

## 4. Mitigation

In [None]:
notebook_code = open_notebook(model_notebook_path)

In [None]:
messages = [{
    "role": "system",
    "content": f"""You are an expert in Python and the fairlearn library. You successfully executed the model code below and obtained the value for {measurement_last_output}. You now aim to apply the mitigation method: {mitigation_method} from the fairlearn library. The generated code will be an addition to the existing code and will run together seamlessly.
    Follow these guidelines strictly and then generate the code:
    1. **Comment on the Fairness State:**
        - Provide a short description of the {fairness_metric} metric. Explain what this metric measures in general terms and what different values indicate about model fairness.
        - Comment specifically on the measured value {measurement_last_output} that you obtained. Explain what this specific measured value indicates about the model's fairness in this context.
        
    2. **Generate Mitigation Code:**
    - Generate code for the specified mitigation method by referring to the relevant mitigation method definition.
    - Make sure to import the necessary methods from the fairlearn library. Constraint values do not take parameters.
    - Ensure the code is compatible with the given model code and the requested mitigation method (and its constraints, if applicable).
    - Extract the sensitive attribute from the appropriate variable in the model code.
    - Use the appropriate variable names for the sensitive attribute, dataset, and test data as defined in the provided model code.
    - Include code to measure the performance metrics (accuracy, precision, etc.) and the fairness metric measurement that were used previously used.
    - Include comments in the code to explain the steps.

    Organize your output into two sections:
    1. **Comment on the fairness metric value obtained and code explanation.**
    2. **Mitigation Code:**
    ```python
    # Write the mitigation code and the model performance metrics and fairness metric measurement code here
    ```
    """
},
{
    "role": "user",
    "content": f"Model code:\n{notebook_code}\n\nMitigation method definition:\n{mitigation_code}"
}]

In [None]:
result_mitigation = generate_response(model, tokenizer, messages, config)
print(result_mitigation)

In [None]:
generated_code_mitigation = extract_python_code(result_mitigation)
print(generated_code_mitigation)

## Validate generated code

In [None]:
messages = [
    {"role": "system", "content": f"""You are an intelligent assistant tasked with verifying the correctness of Python code. Your objective is to evaluate the provided generated code that is intended to integrate with the existing model code. Ensure that the generated code aligns perfectly with the model code and correctly utilizes the fairlearn library. 
        Follow these steps for the verification process and then start evaluating the code:
        1. Make sure that sensitive attribute is extracted from the variable that contains the features of the test dataset by using the name of the sensitive attribute. Only setting the sensitive attribute as a variable is not enough.
        2. Pay attention to case sensitivity in the variable names.
        3. Compare variable names and structures in the generated code with those in the existing model code to ensure variable consistency.
        4. Identify and report any hallucinations and assumptions (eg. index value of a feature) in the generated code that do not fit the context of the model code.
        5. Make sure that necessary methods are imported from the fairlearn library.
        6. List any necessary changes to align the generated code with the existing model code. If no update is needed, skip this step.
        
        If any update is needed, provide an updated and corrected version of the generated code. If no update is needed, print the generated code as is."""},
    {"role": "user", "content": f"Here is the existing model code:\n\n{notebook_code}\n\nHere is the generated code to be evaluated:\n\n{generated_code_mitigation}"},
]

In [None]:
response_validation_mitigation = generate_response(model, tokenizer, messages, config)
print(response_validation_mitigation)

In [None]:
validated_code_mitigation = extract_python_code(response_validation_mitigation)
print(validated_code_mitigation)

## 4.1 Update the code

In [None]:
add_code_cell_to_notebook(model_notebook_path, validated_code_mitigation)

## 4.2 Execute the updated code

In [None]:
mitigation_last_output = execute_notebook(model_notebook_path)
print(mitigation_last_output)

In [None]:
text = f"{fairness_metric} is measured as {mitigation_last_output[0]}"
print(text)

In [None]:
text += f"\n{mitigation_method} is applied and the post-mitigation performance is measured as {mitigation_last_output[1]}"
add_to_log(text)