In [None]:
import os
import json
from langchain_ollama import ChatOllama
from langchain.chains import LLMChain 
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence

# === CONFIGURATION ===
MODEL_NAME = "llama3"  # Or any model supported by Ollama
INPUT_DIR = "input"
OUTPUT_DIR = "output"

# === LLM SETUP ===
llm = ChatOllama(model=MODEL_NAME)

# === PROMPT TEMPLATE ===
template = """
You are an expert Arabic linguist and translator.
1. Correct any grammatical, orthographic, or linguistic errors in the Arabic input.
2. Translate the corrected text into fluent English.

Return the result in this format:
Corrected Arabic: ...
English Translation: ...

Arabic Text:
{input_text}
"""
prompt = PromptTemplate.from_template(template=template)
chain = prompt | llm

# === HELPER FUNCTIONS ===

def load_text_file(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        return f.read()

def split_paragraphs(text):
    # Split on newlines, but preserve order
    return [p.strip() for p in text.split('\n') if p.strip()]

def process_paragraph(paragraph):
    try:
        result = chain.invoke({"input_text":paragraph})
        response_text = result.content if hasattr(result, "content") else str(result)

        corrected = response_text.split("Corrected Arabic:")[1].split("English Translation:")[0].strip()
        translation = response_text.split("English Translation:")[1].strip()
        return {
            "original": paragraph,
            "corrected_arabic": corrected,
            "english_translation": translation
        }
    except Exception as e:
        return {
            "original": paragraph,
            "error": str(e)
        }

def process_file(filepath):
    print(f"Processing: {filepath}")
    text = load_text_file(filepath)
    paragraphs = split_paragraphs(text)
    results = [process_paragraph(p) for p in paragraphs]
    return results

def save_results(filename, results):
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    base = os.path.basename(filename)
    output_path = os.path.join(OUTPUT_DIR, base.replace(".txt", "_output.json"))
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    print(f"Saved: {output_path}")

# === MAIN ===

def main():
    files = [f for f in os.listdir(INPUT_DIR) if f.endswith(".txt")]
    for file in files:
        path = os.path.join(INPUT_DIR, file)
        results = process_file(path)
        save_results(file, results)

if __name__ == "__main__":
    main()


Processing: input\surah1-ayat1.txt
