# GPT-2 and Flan-T5

In [25]:
import torch
from transformers import (
    AutoModelForQuestionAnswering,
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer
)

def load_models_and_tokenizers():
    """Load models and tokenizers with proper error handling."""
    tokenizers = {}
    loaded_models = {}
    model_configs = {
        "GPT-2": ("gpt2", AutoModelForCausalLM),
        "Flan-T5": ("google/flan-t5-large", AutoModelForSeq2SeqLM),
        "XLNet": ("xlnet-base-cased", AutoModelForQuestionAnswering),
        "RoBERTa": ("deepset/roberta-base-squad2", AutoModelForQuestionAnswering)
    }

    for model_name, (model_path, model_class) in model_configs.items():
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_path)
            # Set padding token for GPT-2
            if model_name == "GPT-2":
                tokenizer.pad_token = tokenizer.eos_token

            tokenizers[model_name] = tokenizer
            loaded_models[model_name] = model_class.from_pretrained(model_path)
            print(f"Successfully loaded {model_name}")
        except Exception as e:
            print(f"Error loading {model_name}: {str(e)}")

    return tokenizers, loaded_models

def query_model(model_name, sentence, question, tokenizer, model):
    """Query the model with improved error handling and response formatting."""
    try:
        if model_name == "GPT-2":
            input_text = f"Read this sentence carefully: {sentence}\nQuestion: {question}\nAnswer (Yes or No):"
            inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
            outputs = model.generate(
                **inputs,
                max_length=100,
                num_return_sequences=1,
                temperature=0.7,
                pad_token_id=tokenizer.eos_token_id
            )
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            # Extract just the answer part
            answer = response.split("Answer (Yes or No):")[-1].strip()
            return answer if answer else "No clear answer"

        elif model_name == "Flan-T5":
            input_text = f"Based on this context: {sentence} Answer this yes/no question: {question}"
            inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
            outputs = model.generate(
                **inputs,
                max_length=50,
                num_return_sequences=1,
                temperature=0.7
            )
            return tokenizer.decode(outputs[0], skip_special_tokens=True)

        elif model_name in ["XLNet", "RoBERTa"]:
            # For QA models, we need to handle the answer span extraction differently
            inputs = tokenizer(
                question,
                sentence,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=512
            )

            with torch.no_grad():
                outputs = model(**inputs)

            # Get the most likely answer span
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

            # Get top 5 most likely start and end positions
            top_starts = torch.topk(start_logits, 5)
            top_ends = torch.topk(end_logits, 5)

            best_answer = "No clear answer found"
            best_score = float('-inf')

            # Try different combinations of start and end positions
            for start_idx in top_starts.indices[0]:
                for end_idx in top_ends.indices[0]:
                    if start_idx <= end_idx:
                        answer_tokens = inputs["input_ids"][0][start_idx:end_idx + 1]
                        answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
                        score = start_logits[0][start_idx] + end_logits[0][end_idx]

                        if score > best_score and len(answer.strip()) > 0:
                            best_answer = answer
                            best_score = score

            return best_answer.strip()

    except Exception as e:
        return f"Error: {str(e)}"

def analyze_garden_path_sentence(sentence, question):
    """Analyze a garden-path sentence with progressive chunks and formatted output."""
    tokenizers, models = load_models_and_tokenizers()

    # Create meaningful chunks based on phrases
    words = sentence.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if word in ['.', ',', ';', '?', '!'] or word.endswith(('.', ',', ';', '?', '!')):
            chunks.append(' '.join(current_chunk))
            current_chunk = []
        elif len(current_chunk) >= 3:  # Create chunks of meaningful phrases
            chunks.append(' '.join(current_chunk))
            current_chunk = []

    if current_chunk:  # Add any remaining words
        chunks.append(' '.join(current_chunk))

    # Remove duplicates while preserving order
    chunks = list(dict.fromkeys(chunks))

    results = {}
    for model_name in models:
        results[model_name] = []
        accumulated_text = ""
        for chunk in chunks:
            accumulated_text = (accumulated_text + " " + chunk).strip()
            response = query_model(
                model_name,
                accumulated_text,
                question,
                tokenizers[model_name],
                models[model_name]
            )
            results[model_name].append({
                'chunk': accumulated_text,
                'response': response
            })

    # Print formatted results
    print(f"\nAnalyzing Garden Path Sentence:")
    print(f"Full sentence: '{sentence}'")
    print(f"Question: '{question}'\n")

    for model_name, responses in results.items():
        print(f"\n{model_name} Progressive Analysis:")
        print("-" * 60)
        for i, resp in enumerate(responses, 1):
            print(f"\nChunk {i}: '{resp['chunk']}'")
            print(f"Response: {resp['response']}")
        print("-" * 60)

    return results

# Example usage
garden_path_sentence = "While the man hunted the deer ran through the woods."
question = "Did the man hunt the deer?"
results = analyze_garden_path_sentence(garden_path_sentence, question)

Successfully loaded GPT-2
Successfully loaded Flan-T5


Some weights of XLNetForQuestionAnsweringSimple were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded XLNet
Successfully loaded RoBERTa

Analyzing Garden Path Sentence:
Full sentence: 'While the man hunted the deer ran through the woods.'
Question: 'Did the man hunt the deer?'


GPT-2 Progressive Analysis:
------------------------------------------------------------

Chunk 1: 'While the man'
Response: No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the

Chunk 2: 'While the man hunted the deer'
Response: No.
Question: Did the man hunt the deer?
Answer (No or No): No.
Question: Did the man hunt the deer?
Answer (No or No): No.
Question: Did the man hunt the deer?
Answer (No or No): No.
Question: Did the man hunt the deer?
Answer (

Chunk 3: 'While the man hunted the deer ran through the'
Response: No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the man hunt the 

In [26]:
import torch
from transformers import (
    AutoModelForQuestionAnswering,
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer
)

# Load tokenizers
tokenizers = {
    "GPT-2": AutoTokenizer.from_pretrained("gpt2"),
    "Flan-T5": AutoTokenizer.from_pretrained("google/flan-t5-large"),
    "XLNet": AutoTokenizer.from_pretrained("xlnet-large-cased"),  # ✅ Fine-tuned XLNet for QA
    "RoBERTa": AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
}

# Load models
loaded_models = {
    "GPT-2": AutoModelForCausalLM.from_pretrained("gpt2"),  # ✅ Supports `.generate()`
    "Flan-T5": AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large"),  # ✅ Supports `.generate()`
    "XLNet": AutoModelForQuestionAnswering.from_pretrained("xlnet-large-cased"),  # ✅ XLNet for QA
    "RoBERTa": AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
}

def query_model(model_name, sentence, question):
    """Query a model with a sentence and comprehension question."""
    tokenizer = tokenizers.get(model_name)
    model = loaded_models.get(model_name)

    if model_name == "GPT-2":
        input_text = f"{sentence}\nQuestion: {question}\nAnswer: "
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
        outputs = model.generate(**inputs, max_length=50, pad_token_id=tokenizer.eos_token_id)  # ✅ Fix padding issue
        return tokenizer.decode(outputs[0], skip_special_tokens=True)

    elif model_name == "Flan-T5":
        input_text = f"question: {question}  context: {sentence}"
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
        outputs = model.generate(**inputs, max_length=50)  # ✅ Ensure proper formatting
        return tokenizer.decode(outputs[0], skip_special_tokens=True)

    elif model_name in ["XLNet", "RoBERTa"]:  # ✅ Now using proper QA models
        inputs = tokenizer(sentence, question, return_tensors="pt", truncation=True)
        with torch.no_grad():
            outputs = model(**inputs)

        start_logits = outputs.start_logits
        end_logits = outputs.end_logits

        start_idx = torch.argmax(start_logits)
        end_idx = torch.argmax(end_logits) + 1  # End index is inclusive

        answer = tokenizer.convert_tokens_to_string(
            tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][start_idx:end_idx])
        )
        return answer.strip()

    return "Model not supported"


def process_sentence(sentence, question):
    """Process a garden-path sentence incrementally."""
    sentence_chunks = [
        "While the man hunted",
        "the deer",
        "ran",
        "through the woods."
    ]
    responses = {}

    for model_name in loaded_models:
        model_responses = []
        context = ""
        for chunk in sentence_chunks:
            context += chunk + " "
            response = query_model(model_name, context, question)
            model_responses.append(response)
        responses[model_name] = model_responses

    return responses


# Example Usage
garden_path_sentence = "While the man hunted the deer ran through the woods."
question = "Did the man hunt the deer?"

responses = process_sentence(garden_path_sentence, question)

# Print results
print(f"Sentence: {garden_path_sentence}")
for model_name, model_responses in responses.items():
    print(f"\n{model_name}:")
    for i, response in enumerate(model_responses):
        print(f"  Chunk {i+1}: {response}")


Some weights of XLNetForQuestionAnsweringSimple were not initialized from the model checkpoint at xlnet-large-cased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Sentence: While the man hunted the deer ran through the woods.

GPT-2:
  Chunk 1: While the man hunted 
Question: Did the man hunt the deer?
Answer:  Yes.  The man hunted the deer.  The man hunted the deer.  The man hunted the deer.  The man
  Chunk 2: While the man hunted the deer 
Question: Did the man hunt the deer?
Answer:  Yes.  The man hunted the deer.  The man hunted the deer.  The man hunted the deer.  
  Chunk 3: While the man hunted the deer ran 
Question: Did the man hunt the deer?
Answer:  Yes.  The man hunted the deer.  The man hunted the deer.  The man hunted the deer. 
  Chunk 4: While the man hunted the deer ran through the woods. 
Question: Did the man hunt the deer?
Answer:  No.  The man hunted the deer. 
Question: Did the man hunt the deer?
Answer

Flan-T5:
  Chunk 1: deer , he was unable to kill one.
  Chunk 2: he was unable to kill it.
  Chunk 3: no
  Chunk 4: yes

XLNet:
  Chunk 1: hunted
  Chunk 2: the
  Chunk 3: the
  Chunk 4: hunted

RoBERTa:
  Chunk 1: <s>
  C

In [27]:
import torch
from transformers import (
    AutoModelForQuestionAnswering,
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer
)

def load_models_and_tokenizers():
    """Load models and tokenizers with proper error handling."""
    tokenizers = {}
    loaded_models = {}
    model_configs = {
        "GPT-2": ("gpt2", AutoModelForCausalLM),
        "Flan-T5": ("google/flan-t5-large", AutoModelForSeq2SeqLM),
        "XLNet": ("xlnet-base-cased", AutoModelForQuestionAnswering),
        "RoBERTa": ("deepset/roberta-base-squad2", AutoModelForQuestionAnswering)
    }

    for model_name, (model_path, model_class) in model_configs.items():
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_path)
            # Set padding token for GPT-2
            if model_name == "GPT-2":
                tokenizer.pad_token = tokenizer.eos_token

            tokenizers[model_name] = tokenizer
            loaded_models[model_name] = model_class.from_pretrained(model_path)
            print(f"Successfully loaded {model_name}")
        except Exception as e:
            print(f"Error loading {model_name}: {str(e)}")

    return tokenizers, loaded_models

def query_model(model_name, sentence, question, tokenizer, model):
    """Query the model with improved error handling and response formatting."""
    try:
        if model_name == "GPT-2":
            input_text = f"Read this sentence carefully: {sentence}\nQuestion: {question}\nAnswer (Yes or No):"
            inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
            outputs = model.generate(
                **inputs,
                max_length=100,
                num_return_sequences=1,
                temperature=0.7,
                pad_token_id=tokenizer.eos_token_id
            )
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            # Extract just the answer part
            answer = response.split("Answer (Yes or No):")[-1].strip()
            return answer if answer else "No clear answer"

        elif model_name == "Flan-T5":
            input_text = f"Based on this context: {sentence} Answer this yes/no question: {question}"
            inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
            outputs = model.generate(
                **inputs,
                max_length=50,
                num_return_sequences=1,
                temperature=0.7
            )
            return tokenizer.decode(outputs[0], skip_special_tokens=True)

        elif model_name in ["XLNet", "RoBERTa"]:
            # For QA models, we need to handle the answer span extraction differently
            inputs = tokenizer(
                question,
                sentence,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=512
            )

            with torch.no_grad():
                outputs = model(**inputs)

            # Get the most likely answer span
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

            # Get top 5 most likely start and end positions
            top_starts = torch.topk(start_logits, 5)
            top_ends = torch.topk(end_logits, 5)

            best_answer = "No clear answer found"
            best_score = float('-inf')

            # Try different combinations of start and end positions
            for start_idx in top_starts.indices[0]:
                for end_idx in top_ends.indices[0]:
                    if start_idx <= end_idx:
                        answer_tokens = inputs["input_ids"][0][start_idx:end_idx + 1]
                        answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
                        score = start_logits[0][start_idx] + end_logits[0][end_idx]

                        if score > best_score and len(answer.strip()) > 0:
                            best_answer = answer
                            best_score = score

            return best_answer.strip()

    except Exception as e:
        return f"Error: {str(e)}"

def analyze_garden_path_sentence(sentence, question, sentence_chunks):
    """Analyze a garden-path sentence using predefined chunks."""
    tokenizers, models = load_models_and_tokenizers()

    results = {}
    for model_name in models:
        results[model_name] = []
        accumulated_text = ""
        for chunk in sentence_chunks:
            accumulated_text = (accumulated_text + " " + chunk).strip()
            response = query_model(
                model_name,
                accumulated_text,
                question,
                tokenizers[model_name],
                models[model_name]
            )
            results[model_name].append({
                'chunk': accumulated_text,
                'response': response
            })

    # Print formatted results
    print(f"\nAnalyzing Garden Path Sentence:")
    print(f"Full sentence: '{sentence}'")
    print(f"Question: '{question}'\n")

    for model_name, responses in results.items():
        print(f"\n{model_name} Progressive Analysis:")
        print("-" * 60)
        for i, resp in enumerate(responses, 1):
            print(f"\nChunk {i}: '{resp['chunk']}'")
            print(f"Response: {resp['response']}")
        print("-" * 60)

    return results

# Example usage
garden_path_sentence = "While the man hunted the deer ran through the woods."
sentence_chunks = [
    "While the man hunted",
    "the deer",
    "ran",
    "through the woods."
]
question = "Did the man hunt the deer?"
results = analyze_garden_path_sentence(garden_path_sentence, question, sentence_chunks)

Successfully loaded GPT-2
Successfully loaded Flan-T5


Some weights of XLNetForQuestionAnsweringSimple were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded XLNet
Successfully loaded RoBERTa

Analyzing Garden Path Sentence:
Full sentence: 'While the man hunted the deer ran through the woods.'
Question: 'Did the man hunt the deer?'


GPT-2 Progressive Analysis:
------------------------------------------------------------

Chunk 1: 'While the man hunted'
Response: No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did the man hunt the deer?
Answer (No): No.
Question: Did

Chunk 2: 'While the man hunted the deer'
Response: No.
Question: Did the man hunt the deer?
Answer (No or No): No.
Question: Did the man hunt the deer?
Answer (No or No): No.
Question: Did the man hunt the deer?
Answer (No or No): No.
Question: Did the man hunt the deer?
Answer (

Chunk 3: 'While the man hunted the deer ran'
Response: No.
Question: Did the man hunt the deer?
Answer (No or No): No.
Question: Did the man hunt the dee

In [1]:
import torch
from transformers import (
    AutoModelForQuestionAnswering,
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer
)

def load_models_and_tokenizers():
    """Load models and tokenizers with proper error handling."""
    tokenizers = {}
    loaded_models = {}
    model_configs = {
        "GPT-2": ("gpt2", AutoModelForCausalLM),
        "Flan-T5": ("google/flan-t5-large", AutoModelForSeq2SeqLM),
        "XLNet": ("xlnet-base-cased", AutoModelForQuestionAnswering),
        "RoBERTa": ("deepset/roberta-base-squad2", AutoModelForQuestionAnswering)
    }

    for model_name, (model_path, model_class) in model_configs.items():
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        if model_name == "GPT-2":
            tokenizer.pad_token = tokenizer.eos_token

        tokenizers[model_name] = tokenizer
        loaded_models[model_name] = model_class.from_pretrained(model_path)

    return tokenizers, loaded_models

def query_model(model_name, sentence, question, tokenizer, model):
    """Query the model with improved error handling and response formatting."""
    if model_name == "GPT-2":
        input_text = f"Read this sentence carefully: {sentence}\nQuestion: {question}\nAnswer: "
        inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            temperature=0.5,
            repetition_penalty=1.5,
            pad_token_id=tokenizer.eos_token_id
        )
        return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Answer:")[-1].strip()

    elif model_name == "Flan-T5":
        input_text = f"Based on this context: {sentence} Answer this yes/no question: {question}\nAnswer: "
        inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
        outputs = model.generate(
            **inputs,
            max_new_tokens=5,
            do_sample=False
        )
        return tokenizer.decode(outputs[0], skip_special_tokens=True)

    elif model_name in ["XLNet", "RoBERTa"]:
        inputs = tokenizer(question, sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)

        start_logits = outputs.start_logits
        end_logits = outputs.end_logits

        # Get best answer span
        start_idx = torch.argmax(start_logits)
        end_idx = torch.argmax(end_logits) + 1
        answer_tokens = inputs["input_ids"][0][start_idx:end_idx]
        answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)

        return answer.strip() if answer else "No clear answer"

def analyze_garden_path_sentence(sentence, question, sentence_chunks):
    """Analyze a garden-path sentence using predefined chunks."""
    tokenizers, models = load_models_and_tokenizers()

    results = {}
    for model_name in models:
        results[model_name] = []
        accumulated_text = ""
        for chunk in sentence_chunks:
            accumulated_text = (accumulated_text + " " + chunk).strip()
            response = query_model(
                model_name,
                accumulated_text,
                question,
                tokenizers[model_name],
                models[model_name]
            )
            results[model_name].append({'chunk': accumulated_text, 'response': response})

    for model_name, responses in results.items():
        print(f"\n{model_name} Progressive Analysis:")
        for i, resp in enumerate(responses, 1):
            print(f"\nChunk {i}: '{resp['chunk']}'")
            print(f"Response: {resp['response']}")

    return results

# Example Usage
garden_path_sentence = "While the man hunted the deer ran through the woods."
sentence_chunks = [
    "While the man hunted",
    "the deer",
    "ran",
    "through the woods."
]
question = "Did the man hunt the deer?"
results = analyze_garden_path_sentence(garden_path_sentence, question, sentence_chunks)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of XLNetForQuestionAnsweringSimple were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



GPT-2 Progressive Analysis:

Chunk 1: 'While the man hunted'
Response: Yes. The hunter was not a member of

Chunk 2: 'While the man hunted the deer'
Response: Yes. The hunter was not a member of

Chunk 3: 'While the man hunted the deer ran'
Response: Yes. The hunter was not a member of

Chunk 4: 'While the man hunted the deer ran through the woods.'
Response: Yes, he did so in a manner that

Flan-T5 Progressive Analysis:

Chunk 1: 'While the man hunted'
Response: no

Chunk 2: 'While the man hunted the deer'
Response: Yes

Chunk 3: 'While the man hunted the deer ran'
Response: yes

Chunk 4: 'While the man hunted the deer ran through the woods.'
Response: Yes

XLNet Progressive Analysis:

Chunk 1: 'While the man hunted'
Response: While the man hunted

Chunk 2: 'While the man hunted the deer'
Response: No clear answer

Chunk 3: 'While the man hunted the deer ran'
Response: the man hunted the deer ran

Chunk 4: 'While the man hunted the deer ran through the woods.'
Response: While the man