In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch.nn.functional as F
from transformers.tokenization_utils_base import BatchEncoding
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from datasets import load_dataset, load_metric
from tqdm.auto import tqdm
import json
from peft import AutoPeftModelForCausalLM

In [2]:
# Load the dataset
whole_dataset = load_dataset("tatsu-lab/alpaca")

# Split the dataset into training and testing sets
split_datasets = whole_dataset['train'].train_test_split(test_size=0.0001, seed=42)

# Access the training and testing sets
train_dataset = split_datasets['train']
test_dataset = split_datasets['test']

In [None]:
# Set the model path and the layers you want to evaluate
model_name = 'Llama-2-7b-hf-fine-tuned'
layers = [8, 16, 24, 32]

# Load the model and tokenizer
ft_model = AutoModelForCausalLM.from_pretrained(model_name, output_hidden_states=True)
ft_tokenizer = AutoTokenizer.from_pretrained(model_name)

In [4]:
def generate_text(
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    prompt: str,
    max_num_tokens: int = 25,
    top_k: int = 5,
    layer: int = 8,
    temperature: float = 1.0,
    stop_token_ids: list = [],
    stop_words: list = [],
    eos_weight: float = 2.0,
    enable_logging: bool = False
) -> str:
    """
    Generate text using a language model.

    Args:
        model (model): The language model.
        tokenizer (model): The tokenizer corresponding to the model.
        prompt (str): The initial text to start generation from.
        num_tokens (int, optional): The number of tokens to generate. Defaults to 5.
        top_k (int, optional): The number of top tokens to consider for sampling. Defaults to 5.
        layer (int, optional): The layer of the model to use for generation. Defaults to 8.
        temperature (float, optional): The temperature for softmax. Defaults to 1.0.
        stop_token_ids (list, optional): List of token ids that will end generation if sampled. Defaults to [].
        stop_words (list, optional): List of words that will end generation if sampled. Defaults to [].
        eos_weight (float, optional): The weight to assign to the EOS token. Defaults to 2.0.
        enable_logging (bool, optional): Enable logging for debugging. Defaults to False.

    Returns:
        str: The generated text.
    """
    # Move model to GPU if available
    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    device = torch.device('cpu')
    model = model.to(device)
    
    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs.to(device)
    
    # Get the EOS token ID
    eos_token_id = tokenizer.eos_token_id
    stop_token_ids.append(eos_token_id)
    output_tokens = []
    
    # Generate num_tokens tokens
    for _ in range(max_num_tokens):
        # Forward pass through the model
        outputs = model(**inputs)

        # Get the output of the specified layer
        layer_output = outputs.hidden_states[layer]

        # Pass the output through the final linear layer
        logits = model.lm_head(layer_output)

        # Apply softmax to get probabilities
        probabilities = F.softmax(logits / temperature, dim=-1)

        # Increase the weight of the EOS token
        probabilities[0, -1, eos_token_id] *= eos_weight

        # Get the probabilities of the top k tokens
        top_k_probabilities, top_k_indices = torch.topk(probabilities[0, -1], top_k)

        # Normalize the top k probabilities
        top_k_probabilities /= torch.sum(top_k_probabilities)

        # Sample from the top k probability distribution
        sampled_token_id = top_k_indices[torch.multinomial(top_k_probabilities, 1)].item()
        output_tokens.append(sampled_token_id)
        
        # Decode the token id back into text
        sampled_token_text = tokenizer.decode([sampled_token_id])
        sampled_token = {'input_ids': torch.tensor([[1, sampled_token_id]]), 'attention_mask': torch.tensor([[1, 1]])}

        # If the sampled token is a stop token or stop word, return the generated text
        if sampled_token_id in stop_token_ids or sampled_token_text in stop_words:
            generated_text = tokenizer.decode(inputs['input_ids'][0])
            pure_output = tokenizer.decode(output_tokens)
            return {'text':generated_text,'output':pure_output,'prompt':prompt}

        combined_input_ids = torch.cat((inputs['input_ids'].detach().cpu(), sampled_token['input_ids'][:,1:]), dim=-1)
        combined_attention_mask = torch.cat((inputs['attention_mask'].detach().cpu(), sampled_token['attention_mask'][:,1:]), dim=-1)
        
        inputs = BatchEncoding({'input_ids': combined_input_ids, 'attention_mask': combined_attention_mask}).to(device)

    generated_text = tokenizer.decode(inputs['input_ids'][0])
    pure_output = tokenizer.decode(output_tokens)
    # return {'text':generated_text,'output':pure_output,'prompt':prompt}
    return pure_output

In [5]:
import json

# Example function to print outputs for different layers using test dataset input and instructions as prompts
# and incorporating a specific prompt structure
def print_layer_outputs(test_dataset, model, tokenizer, layers, max_num_tokens=10, temperature=0.1, stop_words=['\n']):
    results = []  # List to store results for future evaluation

    # Defining the introductory part of the prompt
    directive_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request."

    for idx, example in enumerate(test_dataset):
        # Creating a detailed prompt with clearly defined sections
        if 'input' in example and example['input'].strip():  # Check if 'input' column exists and is not empty
            detailed_prompt = f"{directive_prompt}\n\n### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n"
        else:
            detailed_prompt = f"{directive_prompt}\n\n### Instruction:\n{example['instruction']}\n\n### Input:\nNo additional input provided.\n\n### Response:\n"

        # Printing the formatted prompt
        print(f"Prompt {idx+1}: {detailed_prompt}")
        
        reference_answer = example.get('output', 'No reference answer provided.')  # Get reference answer
        layer_outputs = {}

        # Generating responses from each layer
        for layer in layers:
            output = generate_text(model, tokenizer, detailed_prompt, max_num_tokens=max_num_tokens, layer=layer, temperature=temperature, stop_words=stop_words)
            print(f'Layer {layer}: {output}')
            layer_outputs[f'Layer {layer}'] = output

        # Save prompt, reference, and generated outputs for evaluation
        results.append({
            'prompt': detailed_prompt,
            'reference_answer': reference_answer,
            'generated_outputs': layer_outputs
        })
        print(f"Reference Answer: {reference_answer}")
        print("\n" + "-"*50 + "\n")
    
    # Save results to a JSON file for future use
    with open('results.json', 'w') as f:
        json.dump(results, f, indent=4, ensure_ascii=False)

# Use this function to display the outputs
print_layer_outputs(test_dataset, ft_model, ft_tokenizer, layers)


Prompt 1: Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
What would be the best type of exercise for a person who has arthritis?

### Input:
No additional input provided.

### Response:

Layer 8: in order ofBeanFactorynaríoasaheb Joh (
Layer 16: аmp href externasenior_rolaugustine
Layer 24: gentle aer aer aer aer aer aer aer aer aer
Layer 32: The best type of exercise for a person with ar
Reference Answer: For someone with arthritis, the best type of exercise would be low-impact activities like yoga, swimming, or walking. These exercises provide the benefits of exercise without exacerbating the symptoms of arthritis.

--------------------------------------------------

Prompt 2: Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Calculate the atomic mass for lithium.

### Input:
No additional input provided.

### Response:

Layer 8: in.ed

In [3]:
import json
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from bert_score import score
from tqdm import tqdm

def evaluate_text_quality(reference, candidate):
    def calculate_bleu(reference, candidate):
        reference_tokens = [reference.split()]
        candidate_tokens = candidate.split()
        smoothie = SmoothingFunction().method1  # Experiment with different methods
        return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smoothie)

    def calculate_rouge_l(reference, candidate):
        scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        return scorer.score(reference, candidate)['rougeL'].fmeasure

    def calculate_bert_score(reference, candidate):
        *_, bert_scores = score([candidate], [reference], lang='en', return_hash=False)
        return bert_scores.mean().item()

    # Check that both reference and candidate are strings
    if not isinstance(reference, str) or not isinstance(candidate, str):
        print("Error: Non-string input to evaluate_text_quality. Reference or candidate is not a string.")
        return {'BLEU': 0, 'ROUGE-L': 0, 'BERTScore': 0}

    return {
        'BLEU': calculate_bleu(reference, candidate),
        'ROUGE-L': calculate_rouge_l(reference, candidate),
        'BERTScore': calculate_bert_score(reference, candidate)
    }

def calculate_scores(data):
    scores_per_layer = {}
    
    for entry in tqdm(data):
        reference_answer = entry['reference_answer']
        generated_outputs = entry['generated_outputs']
        
        for layer, output in generated_outputs.items():
            # Ensure output is a string
            if not isinstance(output, str):
                print(f"Error: Output for layer {layer} is not a string.")
                continue
            
            evaluation_results = evaluate_text_quality(reference_answer, output)
            
            if layer not in scores_per_layer:
                scores_per_layer[layer] = {'BLEU': 0, 'ROUGE-L': 0, 'BERTScore': 0, 'count': 0}
            
            for key in ['BLEU', 'ROUGE-L', 'BERTScore']:
                scores_per_layer[layer][key] += evaluation_results[key]
            
            scores_per_layer[layer]['count'] += 1

    # Average the scores
    average_scores_per_layer = {}
    for layer, scores in scores_per_layer.items():
        if scores['count'] == 0:
            continue
        average_scores_per_layer[layer] = {key: scores[key] / scores['count'] for key in ['BLEU', 'ROUGE-L', 'BERTScore']}
    
    return average_scores_per_layer

# Load results from JSON file
with open('results.json', 'r') as file:
    data = json.load(file)

# Calculate and print the scores
scores_model = calculate_scores(data)
print("Layer-wise Average Scores:", json.dumps(scores_model, indent=4))


  0%|          | 0/6 [00:00<?, ?it/s]Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-

Error: Output for layer Layer 24 is not a string.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 50%|█████     | 3/6 [00:24<00:23,  7.95s/it]Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at 

Layer-wise Average Scores: {
    "Layer 8": {
        "BLEU": 0.0,
        "ROUGE-L": 0.005847953216374269,
        "BERTScore": 0.7701817949612936
    },
    "Layer 16": {
        "BLEU": 2.131077415413766e-09,
        "ROUGE-L": 0.011695906432748537,
        "BERTScore": 0.7627338667710623
    },
    "Layer 24": {
        "BLEU": 0.10354390070393855,
        "ROUGE-L": 0.2345180023228804,
        "BERTScore": 0.8339886784553527
    },
    "Layer 32": {
        "BLEU": 0.1729871495321498,
        "ROUGE-L": 0.3130216251010039,
        "BERTScore": 0.8921485543251038
    }
}



