In [1]:
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate

# Create prompt template for paraphrasing
prompt = PromptTemplate.from_template(
    "Paraphrase the following text in natural modern English:\n\n{text}\n\nParaphrase:"
)

# Use a model via Ollama 
llm = OllamaLLM(model="mistral")  

# Create LangChain pipeline (prompt → model)
chain = prompt | llm

# Example Hamlet line to paraphrase
text = "FRANCISCO at his post. Enter to him BERNARDO"

# Run the chain 
paraphrased = chain.invoke({"text": text})

# Print result
print("Original:\n", text)
print("\nParaphrased:\n", paraphrased)


Original:
 FRANCISCO at his post. Enter to him BERNARDO

Paraphrased:
  Francisco is on duty. Bernardo approaches him.

In this context, "Francisco at his post" means that Francisco is on guard or performing his duties, and "Enter to him BERNARDO" implies that Bernardo entered the scene and approached Francisco.


In [3]:
import pandas as pd
from tqdm import tqdm
import time
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate

# Load Hamlet dataset
df = pd.read_csv("hamlet.csv")

# Add a new column for paraphrased text 
if "paraphrased" not in df.columns:
    df["paraphrased"] = None

# Initialize LangChain + Ollama
llm = OllamaLLM(model="mistral") 
prompt = PromptTemplate.from_template(
     "Rewrite the following line in modern English. Do not explain or annotate. Just provide the modern version:\n\n{text}"
)
chain = prompt | llm

# Choose batch size
BATCH_SIZE = 10

# Start where left off
start_idx = df[df["paraphrased"].isna()].index.min()

# Process in batches
for i in tqdm(range(start_idx, len(df), BATCH_SIZE), desc="Paraphrasing"):
    batch = df.iloc[i:i + BATCH_SIZE]

    for idx, row in batch.iterrows():
        if pd.notnull(df.at[idx, "paraphrased"]):
            continue  # already processed

        try:
            text = row["dialogue"]
            response = chain.invoke({"text": text})
            df.at[idx, "paraphrased"] = response
        except Exception as e:
            df.at[idx, "paraphrased"] = f"[ERROR] {e}"

    # Save progress after every batch
    df.to_csv("hamlet_paraphrased.csv", index=False)

    # Optional pause to give CPU/GPU a breather
    time.sleep(0.2)


Paraphrasing: 100%|████████████████████████████████████████████████████████████████| 153/153 [2:08:26<00:00, 50.37s/it]
