In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import Trainer, TrainingArguments
import pandas as pd

# Load dataset
data = """John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow."""
# data = pd.read_csv('your_dataset.csv')  # Assuming a CSV with 'paragraph' and 'dialogue' columns

# Tokenization
tokenizer = T5Tokenizer.from_pretrained('t5-base')
inputs = tokenizer(data['paragraph'].tolist(), return_tensors='pt', padding=True, truncation=True)
labels = tokenizer(data['dialogue'].tolist(), return_tensors='pt', padding=True, truncation=True)

# Model initialization
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Training setup
training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=8,
    num_train_epochs=3,
    save_steps=10_000,
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=inputs,  # Make sure to create a proper dataset class
)

# Train the model
trainer.train()

# Generate dialogue from a new paragraph
def generate_dialogue(paragraph):
    input_ids = tokenizer(paragraph, return_tensors='pt').input_ids
    outputs = model.generate(input_ids)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
print(generate_dialogue("Your paragraph here"))


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


TypeError: string indices must be integers, not 'str'

In [2]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with refined few-shot examples and clear instructions
examples = """
Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids,
        max_length=250,
        num_beams=4,  # For diverse generation
        repetition_penalty=1.2,  # To reduce repetitive text
        temperature=0.7,  # To control randomness
        length_penalty=1.0,  # To ensure reasonable output length
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Post-process the output to clean and structure it
def clean_dialogue_output(raw_output):
    lines = raw_output.splitlines()
    cleaned_lines = []
    for line in lines:
        if ':' in line and line.strip():  # Keep valid dialogue lines
            cleaned_lines.append(line.strip())
    return "\n".join(cleaned_lines)

processed_output = clean_dialogue_output(raw_output)

# Print the processed dialogue
print("Generated Dialogue:")
print(processed_output)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Generated Dialogue:
Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do."


In [3]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import subprocess

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with refined few-shot examples and clear instructions
examples = """
Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids,
        max_length=250,
        num_beams=4,  # For diverse generation
        repetition_penalty=1.2,  # To reduce repetitive text
        temperature=0.7,  # To control randomness
        length_penalty=1.0,  # To ensure reasonable output length
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Post-process the output to clean and structure it
def clean_dialogue_output(raw_output):
    lines = raw_output.splitlines()
    cleaned_lines = []
    for line in lines:
        if ':' in line and line.strip():  # Keep valid dialogue lines
            cleaned_lines.append(line.strip())
    return "\n".join(cleaned_lines)

processed_output = clean_dialogue_output(raw_output)

# Write the processed output to the terminal
subprocess.run(["echo", f"Generated Dialogue:\n{processed_output}"], shell=True)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


CompletedProcess(args=['echo', 'Generated Dialogue:\nConvert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It\'s complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don\'t know what to do."'], returncode=0)

In [7]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

def generate_dialogue(narrative_text):
    # Initialize the T5 tokenizer and model
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    model = T5ForConditionalGeneration.from_pretrained('t5-base')

    # Prepare the prompt with clear instructions
    prompt = f"""
    Convert the following narrative into a structured dialogue with speaker names:
    {narrative_text}
    Dialogue:
    """
    # Tokenize input
    input_ids = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).input_ids

    # Generate dialogue
    outputs = model.generate(
        input_ids,
        max_length=256,  # Adjust based on the expected length of the output
        num_beams=5,     # Beam search for better results
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

    # Decode the output
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Post-process the output to extract clean dialogue
    def clean_dialogue_output(raw_output):
        lines = raw_output.splitlines()
        cleaned_lines = []
        for line in lines:
            if ':' in line and line.strip():  # Retain only valid dialogue lines
                cleaned_lines.append(line.strip())
        return "\n".join(cleaned_lines)

    return clean_dialogue_output(raw_output)

# Test the function with your narrative
narrative = """
John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said, "Everything will be okay. We'll figure this out together."
John smiled weakly and said, "I don't know what to do," his voice trembling.
Mary replied, "We will start looking for new opportunities tomorrow."
"""

# Generate dialogue
dialogue = generate_dialogue(narrative)
print("Generated Dialogue:")
print(dialogue)
print("Raw Output:")
print(raw_output)

Generated Dialogue:

Raw Output:
True


In [11]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

def generate_dialogue(narrative_text):
    # Initialize the T5 tokenizer and model
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    model = T5ForConditionalGeneration.from_pretrained('t5-base')

    # Prepare the prompt with clear instructions
    prompt = """
Convert the following narrative into a structured dialogue with speaker names:

Example:
Narrative: Jane walked into the room and said hello to Mike. Mike replied, "Hello, Jane! How have you been?"
Dialogue:
Jane: "Hello, Mike."
Mike: "Hello, Jane! How have you been?"

Narrative:
John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said, "Everything will be okay. We'll figure this out together."
John smiled weakly and said, "I don't know what to do," his voice trembling.
Mary replied, "We will start looking for new opportunities tomorrow."

Dialogue:
"""

    # Tokenize input
    input_ids = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).input_ids

    # Generate dialogue
    outputs = model.generate(
    input_ids,
    max_length=256,
    num_beams=5,  # Increase for higher-quality results
    repetition_penalty=2.0,  # Penalize repetitive sequences
    temperature=0.9,  # Slightly increase creativity
    length_penalty=1.0,
    early_stopping=True
)

    # Decode the output
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Post-process the output to extract clean dialogue
    def clean_dialogue_output(raw_output):
        lines = raw_output.splitlines()
        cleaned_lines = []
        for line in lines:
            if ':' in line and line.strip():  # Retain only valid dialogue lines
                cleaned_lines.append(line.strip())
        return "\n".join(cleaned_lines)

    return clean_dialogue_output(raw_output)

# Test the function with your narrative
narrative = """
Narrative: Sarah walked up to Tom with a worried expression. She asked, "What happened today?" 
Tom hesitated and then replied, "It's complicated."
"""
# Generate dialogue
dialogue = generate_dialogue(narrative)
print("Generated Dialogue:")
print(dialogue)
print("Raw Output Tokens:", outputs)
print("Decoded Raw Output:", tokenizer.decode(outputs[0], skip_special_tokens=True))


Generated Dialogue:

Raw Output Tokens: tensor([[    0, 10998,     1]])
Decoded Raw Output: True


In [3]:
pip install transformers


Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Your code to generate dialogue here
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
lines = raw_output.splitlines()

# Remove duplicate lines and empty lines
unique_lines = []
for line in lines:
    if line not in unique_lines and line.strip():
        unique_lines.append(line)

# Combine the cleaned lines
processed_output = "\n".join(unique_lines)

# Print the processed dialogue
# print("Generated Dialogue:")
# print(processed_output)

# output = """Generated Dialogue:
# Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do.""""
output = processed_output
# Extract the dialogue text
dialogue_text = output.split("Generated Dialogue:")[1].strip()

# Split the dialogue text into individual lines
lines = dialogue_text.splitlines()

# Initialize an empty list to store the formatted dialogue
formatted_dialogue = []

# Iterate through the lines and format the dialogue
for line in lines:
    if line.startswith("Narrative:"):
        continue
    elif line.startswith("Dialogue:"):
        continue
    else:
        # Extract the speaker name and quote
        speaker_name = line.split(":")[0].strip()
        quote = line.split(":")[1].strip().strip('"')
        
        # Format the dialogue
        formatted_dialogue.append(f"{speaker_name}: \"{quote}\"")

# Join the formatted dialogue into a single string
formatted_dialogue_str = "\n".join(formatted_dialogue)

print(formatted_dialogue_str)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Your code to generate dialogue here
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
lines = raw_output.splitlines()

# Remove duplicate lines and empty lines
unique_lines = []
for line in lines:
    if line not in unique_lines and line.strip():
        unique_lines.append(line)

# Combine the cleaned lines
processed_output = "\n".join(unique_lines)

# Print the processed dialogue
# print("Generated Dialogue:")
# print(processed_output)

# output = """Generated Dialogue:
# Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do.""""
output = processed_output
# Extract the dialogue text
dialogue_text = output.split("Generated Dialogue:")[1].strip()

# Split the dialogue text into individual lines
lines = dialogue_text.splitlines()

# Initialize an empty list to store the formatted dialogue
formatted_dialogue = []

# Iterate through the lines and format the dialogue
for line in lines:
    if line.startswith("Narrative:"):
        continue
    elif line.startswith("Dialogue:"):
        continue
    else:
        # Extract the speaker name and quote
        speaker_name = line.split(":")[0].strip()
        quote = line.split(":")[1].strip().strip('"')
        
        # Format the dialogue
        formatted_dialogue.append(f"{speaker_name}: \"{quote}\"")

# Join the formatted dialogue into a single string
formatted_dialogue_str = "\n".join(formatted_dialogue)

print(formatted_dialogue_str)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Your code to generate dialogue here
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
lines = raw_output.splitlines()

# Remove duplicate lines and empty lines
unique_lines = []
for line in lines:
    if line not in unique_lines and line.strip():
        unique_lines.append(line)

# Combine the cleaned lines
processed_output = "\n".join(unique_lines)

# Print the processed dialogue
# print("Generated Dialogue:")
# print(processed_output)

# output = """Generated Dialogue:
# Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do.""""
output = processed_output
# Extract the dialogue text
dialogue_text = output.split("Generated Dialogue:")[1].strip()

# Split the dialogue text into individual lines
lines = dialogue_text.splitlines()

# Initialize an empty list to store the formatted dialogue
formatted_dialogue = []

# Iterate through the lines and format the dialogue
for line in lines:
    if line.startswith("Narrative:"):
        continue
    elif line.startswith("Dialogue:"):
        continue
    else:
        # Extract the speaker name and quote
        speaker_name = line.split(":")[0].strip()
        quote = line.split(":")[1].strip().strip('"')
        
        # Format the dialogue
        formatted_dialogue.append(f"{speaker_name}: \"{quote}\"")

# Join the formatted dialogue into a single string
formatted_dialogue_str = "\n".join(formatted_dialogue)

print(formatted_dialogue_str)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Your code to generate dialogue here
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
lines = raw_output.splitlines()

# Remove duplicate lines and empty lines
unique_lines = []
for line in lines:
    if line not in unique_lines and line.strip():
        unique_lines.append(line)

# Combine the cleaned lines
processed_output = "\n".join(unique_lines)

# Print the processed dialogue
# print("Generated Dialogue:")
# print(processed_output)

# output = """Generated Dialogue:
# Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do.""""
output = processed_output
# Extract the dialogue text
dialogue_text = output.split("Generated Dialogue:")[1].strip()

# Split the dialogue text into individual lines
lines = dialogue_text.splitlines()

# Initialize an empty list to store the formatted dialogue
formatted_dialogue = []

# Iterate through the lines and format the dialogue
for line in lines:
    if line.startswith("Narrative:"):
        continue
    elif line.startswith("Dialogue:"):
        continue
    else:
        # Extract the speaker name and quote
        speaker_name = line.split(":")[0].strip()
        quote = line.split(":")[1].strip().strip('"')
        
        # Format the dialogue
        formatted_dialogue.append(f"{speaker_name}: \"{quote}\"")

# Join the formatted dialogue into a single string
formatted_dialogue_str = "\n".join(formatted_dialogue)

print(formatted_dialogue_str)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Your code to generate dialogue here
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
lines = raw_output.splitlines()

# Remove duplicate lines and empty lines
unique_lines = []
for line in lines:
    if line not in unique_lines and line.strip():
        unique_lines.append(line)

# Combine the cleaned lines
processed_output = "\n".join(unique_lines)

# Print the processed dialogue
# print("Generated Dialogue:")
# print(processed_output)

# output = """Generated Dialogue:
# Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do.""""
output = processed_output
# Extract the dialogue text
dialogue_text = output.split("Generated Dialogue:")[1].strip()

# Split the dialogue text into individual lines
lines = dialogue_text.splitlines()

# Initialize an empty list to store the formatted dialogue
formatted_dialogue = []

# Iterate through the lines and format the dialogue
for line in lines:
    if line.startswith("Narrative:"):
        continue
    elif line.startswith("Dialogue:"):
        continue
    else:
        # Extract the speaker name and quote
        speaker_name = line.split(":")[0].strip()
        quote = line.split(":")[1].strip().strip('"')
        
        # Format the dialogue
        formatted_dialogue.append(f"{speaker_name}: \"{quote}\"")

# Join the formatted dialogue into a single string
formatted_dialogue_str = "\n".join(formatted_dialogue)

print(formatted_dialogue_str)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Your code to generate dialogue here
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
lines = raw_output.splitlines()

# Remove duplicate lines and empty lines
unique_lines = []
for line in lines:
    if line not in unique_lines and line.strip():
        unique_lines.append(line)

# Combine the cleaned lines
processed_output = "\n".join(unique_lines)

# Print the processed dialogue
# print("Generated Dialogue:")
# print(processed_output)

# output = """Generated Dialogue:
# Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do.""""
output = processed_output
# Extract the dialogue text
dialogue_text = output.split("Generated Dialogue:")[1].strip()

# Split the dialogue text into individual lines
lines = dialogue_text.splitlines()

# Initialize an empty list to store the formatted dialogue
formatted_dialogue = []

# Iterate through the lines and format the dialogue
for line in lines:
    if line.startswith("Narrative:"):
        continue
    elif line.startswith("Dialogue:"):
        continue
    else:
        # Extract the speaker name and quote
        speaker_name = line.split(":")[0].strip()
        quote = line.split(":")[1].strip().strip('"')
        
        # Format the dialogue
        formatted_dialogue.append(f"{speaker_name}: \"{quote}\"")

# Join the formatted dialogue into a single string
formatted_dialogue_str = "\n".join(formatted_dialogue)

print(formatted_dialogue_str)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Your code to generate dialogue here
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """Convert the following **Narrative** into a structured **Dialogue** with speaker names:
Example 1:
Narrative: Jane entered the room and smiled warmly at Mike. "Good to see you," she said. Mike grinned back. "Same here."
Dialogue:
Jane: "Good to see you."
Mike: "Same here."

Example 2:
Narrative: Sarah walked up to Tom with a worried expression. "What happened today?" she asked. Tom hesitated, then sighed. "It's complicated."
Dialogue:
Sarah: "What happened today?"
Tom: "It's complicated."

Now convert this:
Narrative: John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
Dialogue:
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode and clean the generated tokens
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
lines = raw_output.splitlines()

# Remove duplicate lines and empty lines
unique_lines = []
for line in lines:
    if line not in unique_lines and line.strip():
        unique_lines.append(line)

# Combine the cleaned lines
processed_output = "\n".join(unique_lines)

# Print the processed dialogue
# print("Generated Dialogue:")
# print(processed_output)

# output = """Generated Dialogue:
# Convert the following **Narrative** into a structured **Dialogue** with speaker names: Example 1: Narrative: Jane smiled warmly at Mike. "Good to see you." Dialogue: Sarah walked up to Tom with a worried expression. "It's complicated." Example 3: Narrative: John smiled weakly and sat down on the couch. "I don't know what to do.""""
output = processed_output
# Extract the dialogue text
dialogue_text = output.split("Generated Dialogue:")[1].strip()

# Split the dialogue text into individual lines
lines = dialogue_text.splitlines()

# Initialize an empty list to store the formatted dialogue
formatted_dialogue = []

# Iterate through the lines and format the dialogue
for line in lines:
    if line.startswith("Narrative:"):
        continue
    elif line.startswith("Dialogue:"):
        continue
    else:
        # Extract the speaker name and quote
        speaker_name = line.split(":")[0].strip()
        quote = line.split(":")[1].strip().strip('"')
        
        # Format the dialogue
        formatted_dialogue.append(f"{speaker_name}: \"{quote}\"")

# Join the formatted dialogue into a single string
formatted_dialogue_str = "\n".join(formatted_dialogue)

print(formatted_dialogue_str)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

In [12]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message="`do_sample` is set to `False`")

# Set logging to suppress unnecessary information
logging.basicConfig(level=logging.ERROR)

# Initialize the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define the input narrative text with few-shot examples
examples = """John walked into the dimly lit room, his shoulders slumped. He told Mary that he had lost his job today. 
Mary's eyes widened with concern. She said that everything would be okay. "We'll figure this out together," she assured him warmly.
John smiled weakly and sat down on the couch. "I don't know what to do," he whispered, his voice trembling.
Mary sat beside him and told him that they would start looking for new opportunities tomorrow.
"""

# Prepare input for the T5 model
input_ids = tokenizer(examples, return_tensors='pt', padding=True, truncation=True).input_ids

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to the device (GPU if available)
model.to(device)

# Generate dialogue using the T5 model with max_length to avoid truncation warning
with torch.no_grad():
    outputs = model.generate(
        input_ids.to(device),
        max_length=250,  # Adjust max_length based on the expected length of output
        num_beams=4,
        repetition_penalty=1.2,
        temperature=0.7,
        length_penalty=1.0,
        early_stopping=True
    )

# Decode the generated tokens into readable text
raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Check if the model's output is in the correct format
if "Dialogue:" not in raw_output:
    print("Dialogue format not recognized.")
else:
    print("Generated Dialogue:")
    print(raw_output)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Dialogue format not recognized.


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 1. Load MNIST Dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# 2. Define the Model (Simple Feedforward Neural Network)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)  # Flattened MNIST images (28x28)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)  # 10 classes for digits 0-9
        
    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the image
        x = torch.relu(self.fc1(x))  # Apply ReLU activation function
        x = torch.relu(self.fc2(x))  # Apply ReLU activation function
        x = self.fc3(x)  # No activation at the final layer (for classification)
        return x

# 3. Initialize Model, Loss Function, and Optimizer
model = SimpleNN()
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Train the Model
epochs = 5
for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in trainloader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")

# 5. Evaluate the Model
correct = 0
total = 0
with torch.no_grad():
    for images, labels in testloader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total}%")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:03<00:00, 3.21MB/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 561kB/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 2.64MB/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.21MB/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw

Epoch 1, Loss: 0.3908133572384493
Epoch 2, Loss: 0.19090443029443718
Epoch 3, Loss: 0.1387457896405255
Epoch 4, Loss: 0.10946547301478192
Epoch 5, Loss: 0.0930230775639526
Accuracy: 96.78%


In [3]:
import spacy
import re
import random

# Sample narrative text (for simplicity, you can use your own dataset)
narrative_text = """
Sarah was walking down the street when she suddenly bumped into James. 
"Oh, sorry!" she exclaimed. 
James smiled and said, "No problem, Sarah." 
"How have you been?" Sarah asked, still a bit embarrassed. 
"I'm doing great! Just busy with work," James replied. 
"That's good to hear!" she said with a smile.
"""

# Load the spaCy model for named entity recognition (NER)
nlp = spacy.load("en_core_web_sm")

# Function to preprocess the text
def preprocess_text(text):
    # Identify sentences using regex or NLP library (spaCy can be used here for sentence segmentation)
    doc = nlp(text)
    sentences = []
    for sent in doc.sents:
        sentences.append(sent.text.strip())
    return sentences

# Preprocess the narrative text into sentences
sentences = preprocess_text(narrative_text)

# Print the sentences
for sent in sentences:
    print(sent)


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [9]:
import nltk
from nltk.tokenize import sent_tokenize

# Sample narrative text (for simplicity, you can use your own dataset)
narrative_text = """
Sarah was walking down the street when she suddenly bumped into James. 
"Oh, sorry!" she exclaimed. 
James smiled and said, "No problem, Sarah." 
"How have you been?" Sarah asked, still a bit embarrassed. 
"I'm doing great! Just busy with work," James replied. 
"That's good to hear!" she said with a smile.
"""

# Function to preprocess the text
def preprocess_text(text):
    # Use nltk's sent_tokenize to split text into sentences
    sentences = sent_tokenize(text)
    return sentences

# Preprocess the narrative text into sentences
sentences = preprocess_text(narrative_text)

# Print the sentences
for sent in sentences:
    print(sent)


AttributeError: partially initialized module 'nltk' has no attribute 'data' (most likely due to a circular import)

In [8]:
import nltk
from nltk.tokenize import sent_tokenize

# Sample narrative text
narrative_text = """
Sarah was walking down the street when she suddenly bumped into James. 
"Oh, sorry!" she exclaimed. 
James smiled and said, "No problem, Sarah." 
"How have you been?" Sarah asked, still a bit embarrassed. 
"I'm doing great! Just busy with work," James replied. 
"That's good to hear!" she said with a smile.
"""

# Use nltk's sent_tokenize to split text into sentences
sentences = sent_tokenize(narrative_text)

# Print the sentences
for sent in sentences:
    print(sent)


AttributeError: partially initialized module 'nltk' has no attribute 'data' (most likely due to a circular import)

In [12]:
import nltk
from nltk.tokenize import sent_tokenize

# Sample narrative text
narrative_text = """
Sarah was walking down the street when she suddenly bumped into James. 
"Oh, sorry!" she exclaimed. 
James smiled and said, "No problem, Sarah." 
"How have you been?" Sarah asked, still a bit embarrassed. 
"I'm doing great! Just busy with work," James replied. 
"That's good to hear!" she said with a smile.
"""

# Use nltk's sent_tokenize to split text into sentences
sentences = sent_tokenize(narrative_text)

# Print the sentences
for sent in sentences:
    print(sent)


AttributeError: partially initialized module 'nltk' has no attribute 'data' (most likely due to a circular import)