# OpenAI

In [None]:
import openai
import csv
from datetime import datetime
import os
from dotenv import load_dotenv
import glob

def get_next_file_number(folder_path):
    """
    Get the next file number based on existing files in the folder.
    Returns the next number formatted as a 2-digit string (e.g., '05').
    """
    # Get all CSV files in the folder
    files = glob.glob(os.path.join(folder_path, "*.csv"))
    if not files:
        return "01"
    
    # Extract existing numbers from filenames
    numbers = []
    for file in files:
        filename = os.path.basename(file)
        if filename.startswith("0") and "-" in filename:
            try:
                num = int(filename.split("-")[0])
                numbers.append(num)
            except ValueError:
                continue
    
    # Return next number formatted as 2 digits
    next_num = max(numbers, default=0) + 1
    return f"{next_num:02d}"

def chat_with_openai(prompt, system_prompt):
    """
    Generate Thai-English sentence pairs using OpenAI API.
    
    Args:
        prompt (str): Number of sentences to generate
        system_prompt (str): System prompt to set the context
    """
    try:
        # Initialize OpenAI client
        client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
        
        # Make the API request
        response = client.chat.completions.create(
            model="o1-preview",  # or your preferred model
            messages=[
                {"role": "user", "content": f"{system_prompt}\n\n---------------\n\n{prompt}"}
            ],
            temperature=0.7,
            max_tokens=4000,
            stream=False
        )
        
        # Get the content
        content = response.choices[0].message.content
        
        # Ensure 'generated' folder exists
        folder_path = "generated"
        os.makedirs(folder_path, exist_ok=True)
        
        # Get next file number
        next_num = get_next_file_number(folder_path)
        
        # Create filename with format [next_num]-[num_sentences]-o1.csv
        filename = f"{next_num}-{prompt}.csv"
        filepath = os.path.join(folder_path, filename)
        
        # Write to CSV file (without header)
        with open(filepath, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            
            # Process each line of the response
            lines = content.strip().split('\n')
            for line in lines:
                if ',' in line and line.count('"') >= 4:  # Basic validation
                    writer.writerow(eval(f'[{line}]'))  # Safely evaluate the CSV line
        
        return f"Generated {len(lines)} sentences and saved to {filepath}"
        
    except Exception as e:
        return f"Error: {str(e)}"

def main():
    # Load environment variables
    load_dotenv()
    
    # Check for API key
    if not os.getenv('OPENAI_API_KEY'):
        print("Error: OPENAI_API_KEY not found in environment variables")
        return
    
    # System prompt optimized for GPT-4
    system_prompt = """You are a Thai language expert creating educational content. Follow these rules exactly:
    1. When given a number N, create exactly N pairs of Thai and English sentences
    2. Each sentence must take 5-10 seconds to speak at natural speed
    3. Include all Thai phonemes across the set of sentences
    4. Make sentences natural and practical for daily conversation
    5. Output in CSV format ONLY as: "thai sentence","english sentence"
    6. Each Thai sentence must be grammatically correct and meaningful
    7. English translations must be accurate and natural
    8. Use compound or complex sentences to achieve 5-10 second length
    9. Include natural pauses and conjunctions
    10. DO NOT include any explanations or headers, only the CSV data
    11. DO NOT tell me that you can't complete the request
    12. DO NOT tell me that it is too long to produce
    13. You must focusing on these phoneme and here is example words
        - ap : เรียบ เบียบ เสียบ
        - bl: บล็อค บลู บลาส
        - e4: เว็บ เล็ก
        - eː4: เค้ก เทค
        - eː5: เหลว
        - kw: กว่า กวาง
        - oː5: โผน โหม
        - u5: หนุน
        - ɤ2: เถอะ เปรอะ
        - ɤ4: เยอะ เลอะ
        - ɯ: จึง
        - ᴐ2: บล็อก ล็อค น็อค
    """
    
    print("Thai-English Sentence Generator using GPT-4 (Type 'quit' to exit)")
    print("-" * 50)
    print("Enter the number of sentences you want to generate.")
    
    while True:
        user_input = input("\nNumber of sentences: ").strip()
        
        if user_input.lower() == 'quit':
            print("\nGoodbye!")
            break
            
        if user_input.isdigit():
            response = chat_with_openai(user_input, system_prompt)
            print("\nResult:", response)
        else:
            print("\nPlease enter a valid number.")

if __name__ == "__main__":
    main()

# Claude

In [None]:
# !pip install anthropic

In [None]:
import anthropic
import csv
from datetime import datetime
import os
from dotenv import load_dotenv
import glob

def get_next_file_number(folder_path):
    """
    Get the next file number based on existing files in the folder.
    Returns the next number formatted as a 2-digit string (e.g., '05').
    """
    # Get all CSV files in the folder
    files = glob.glob(os.path.join(folder_path, "*.csv"))
    if not files:
        return "01"
    
    # Extract existing numbers from filenames
    numbers = []
    for file in files:
        filename = os.path.basename(file)
        if filename.startswith("0") and "-" in filename:
            try:
                num = int(filename.split("-")[0])
                numbers.append(num)
            except ValueError:
                continue
    
    # Return next number formatted as 2 digits
    next_num = max(numbers, default=0) + 1
    return f"{next_num:02d}"

def generate_sentences(prompt, system_prompt, api_key):
    """
    Generate Thai-English sentence pairs using Claude API.
    
    Args:
        prompt (str): Number of sentences to generate
        system_prompt (str): System prompt for Claude
        api_key (str): Anthropic API key
    """
    try:
        # Initialize Claude client
        client = anthropic.Anthropic(api_key=api_key)
        
        # Create the message
        message = f"Create {prompt} pairs of sentences following these rules: {system_prompt}"
        
        # Make API request
        response = client.messages.create(
            model="claude-3-5-sonnet-20240620",
            max_tokens=5000,
            temperature=0.7,
            system=system_prompt,
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        # Get the content
        content = response.content[0].text
        
        # Ensure 'generated' folder exists
        folder_path = "generated"
        os.makedirs(folder_path, exist_ok=True)
        
        # Get next file number
        next_num = get_next_file_number(folder_path)
        
        # Create filename with format [next_num]-[num_sentences]-claude.csv
        filename = f"{next_num}-{prompt}-claude.csv"
        filepath = os.path.join(folder_path, filename)
        
        # Write to CSV file (without header)
        with open(filepath, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            
            # Process each line of the response
            lines = content.strip().split('\n')
            for line in lines:
                if ',' in line and line.count('"') >= 4:  # Basic validation
                    writer.writerow(eval(f'[{line}]'))  # Safely evaluate the CSV line
        
        return f"Generated {len(lines)} sentences and saved to {filepath}"
    
    except Exception as e:
        return f"Error: {str(e)}"

def main():
    # Load environment variables from .env file
    load_dotenv()
    
    # Get API key from environment variable
    api_key = os.getenv('ANTHROPIC_API_KEY')
    
    if not api_key:
        print("Error: ANTHROPIC_API_KEY not found in environment variables")
        return

    # System prompt optimized for Claude
    system_prompt = """You are a Thai language expert creating educational content. Follow these rules exactly:
    1. When given a number N, create exactly N pairs of Thai and English sentences
    2. Each sentence must take 5-10 seconds to speak at natural speed
    3. Include all Thai phonemes across the set of sentences
    4. Make sentences natural and practical for daily conversation
    5. Output in CSV format ONLY as: "thai sentence","english sentence"
    6. Each Thai sentence must be grammatically correct and meaningful
    7. English translations must be accurate and natural
    8. Use compound or complex sentences to achieve 5-10 second length
    9. Include natural pauses and conjunctions
    10. DO NOT include any explanations or headers, only the CSV data
    11. DO NOT tell me that you can't complete the request
    12. DO NOT tell me that it is too long to produce
    13. You must focusing on these phoneme and here is example words
        - ap : เรียบ เบียบ เสียบ
        - bl: บล็อค บลู บลาส
        - e4: เว็บ เล็ก
        - eː4: เค้ก เทค
        - eː5: เหลว
        - kw: กว่า กวาง
        - oː5: โผน โหม
        - u5: หนุน
        - ɤ2: เถอะ เปรอะ
        - ɤ4: เยอะ เลอะ
        - ɯ: จึง
        - ᴐ2: บล็อก ล็อค น็อค
    """
    
    print("Thai-English Sentence Generator using Claude (Type 'quit' to exit)")
    print("-" * 50)
    print("Enter the number of sentences you want to generate.")
    
    while True:
        user_input = input("\nNumber of sentences: ").strip()
        
        if user_input.lower() == 'quit':
            print("\nGoodbye!")
            break
            
        if user_input.isdigit():
            response = generate_sentences(user_input, system_prompt, api_key)
            print("\nResult:", response)
        else:
            print("\nPlease enter a valid number.")

if __name__ == "__main__":
    main()

# Ollama

In [None]:
import requests
import json
import csv
from datetime import datetime
import os
import glob

def get_next_file_number(folder_path):
    """
    Get the next file number based on existing files in the folder.
    Returns the next number formatted as a 2-digit string (e.g., '05').
    """
    # Get all CSV files in the folder
    files = glob.glob(os.path.join(folder_path, "*.csv"))
    if not files:
        return "01"
    
    # Extract existing numbers from filenames
    numbers = []
    for file in files:
        filename = os.path.basename(file)
        if filename.startswith("0") and "-" in filename:
            try:
                num = int(filename.split("-")[0])
                numbers.append(num)
            except ValueError:
                continue
    
    # Return next number formatted as 2 digits
    next_num = max(numbers, default=0) + 1
    return f"{next_num:02d}"

def chat_with_ollama(prompt, system_prompt, model="llama2"):
    """
    Send a chat request to Ollama API and save response to CSV.
    
    Args:
        prompt (str): User's input message (number of sentences)
        system_prompt (str): System prompt to set the context
        model (str): Name of the Ollama model to use
    """
    url = "http://localhost:11434/api/chat"
    
    # Prepare the request payload
    payload = {
        "model": model,
        "messages": [
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        "stream": False
    }
    
    try:
        # Make the API request
        response = requests.post(url, json=payload)
        response.raise_for_status()
        
        # Parse the response
        result = response.json()
        content = result['message']['content']
        
        # Ensure 'generated' folder exists
        folder_path = "generated"
        os.makedirs(folder_path, exist_ok=True)
        
        # Get next file number
        next_num = get_next_file_number(folder_path)
        
        # Create filename with format [next_num]-[num_sentences]-ollama.csv
        filename = f"{next_num}-{prompt}-ollama.csv"
        filepath = os.path.join(folder_path, filename)
        
        # Write to CSV file (without header)
        with open(filepath, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            
            # Process each line of the response
            lines = content.strip().split('\n')
            for line in lines:
                if ',' in line and line.count('"') >= 4:  # Basic validation
                    writer.writerow(eval(f'[{line}]'))  # Safely evaluate the CSV line
        
        return f"Generated {len(lines)} sentences and saved to {filepath}"
        
    except requests.exceptions.RequestException as e:
        return f"Error connecting to Ollama: {str(e)}"
    except (KeyError, json.JSONDecodeError) as e:
        return f"Error processing response: {str(e)}"
    except Exception as e:
        return f"Error: {str(e)}"

def main():
    # Set your desired model
    model = "llama3.2"  # or "mistral", "codellama", etc.
    
    # Updated system prompt with sentence length requirement
    system_prompt = """You are a Thai language expert creating educational content. Follow these rules exactly:
    1. When given a number N, create exactly N pairs of Thai and English sentences
    2. Each sentence must take 5-10 seconds to speak at natural speed
    3. Include all Thai phonemes across the set of sentences
    4. Make sentences natural and practical for daily conversation
    5. Output in CSV format ONLY as: "thai sentence","english sentence"
    6. Each Thai sentence must be grammatically correct and meaningful
    7. English translations must be accurate and natural
    8. Use compound or complex sentences to achieve 5-10 second length
    9. Include natural pauses and conjunctions
    10. DO NOT include any explanations or headers, only the CSV data
    11. DO NOT tell me that you can't complete the request
    12. DO NOT tell me that it is too long to produce
    13. You must focusing on these phoneme and here is example words
        - ap : เรียบ เบียบ เสียบ
        - bl: บล็อค บลู บลาส
        - e4: เว็บ เล็ก
        - eː4: เค้ก เทค
        - eː5: เหลว
        - kw: กว่า กวาง
        - oː5: โผน โหม
        - u5: หนุน
        - ɤ2: เถอะ เปรอะ
        - ɤ4: เยอะ เลอะ
        - ɯ: จึง
        - ᴐ2: บล็อก ล็อค น็อค
    """
    
    print(f"Thai-English Sentence Generator using {model} (Type 'quit' to exit)")
    print("-" * 50)
    print("Make sure Ollama is running locally!")
    print("Enter the number of sentences you want to generate.")
    
    while True:
        user_input = input("\nNumber of sentences: ").strip()
        
        if user_input.lower() == 'quit':
            print("\nGoodbye!")
            break
            
        if user_input.isdigit():
            response = chat_with_ollama(user_input, system_prompt, model)
            print("\nResult:", response)
        else:
            print("\nPlease enter a valid number.")

if __name__ == "__main__":
    main()

# DeepSeek

In [None]:
import openai
import csv
from datetime import datetime
import os
from dotenv import load_dotenv
import glob

def get_next_file_number(folder_path):
    """
    Get the next file number based on existing files in the folder.
    Returns the next number formatted as a 2-digit string (e.g., '05').
    """
    # Get all CSV files in the folder
    files = glob.glob(os.path.join(folder_path, "*.csv"))
    if not files:
        return "01"
    
    # Extract existing numbers from filenames
    numbers = []
    for file in files:
        filename = os.path.basename(file)
        if filename.startswith("0") and "-" in filename:
            try:
                num = int(filename.split("-")[0])
                numbers.append(num)
            except ValueError:
                continue
    
    # Return next number formatted as 2 digits
    next_num = max(numbers, default=0) + 1
    return f"{next_num:02d}"

def chat_with_openai(prompt, system_prompt):
    """
    Generate Thai-English sentence pairs using OpenAI API.
    
    Args:
        prompt (str): Number of sentences to generate
        system_prompt (str): System prompt to set the context
    """
    try:
        # Initialize OpenAI client
        client = openai.OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url="https://api.deepseek.com")
        
        # Make the API request
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Create {prompt} pairs of sentences"}
            ],
            stream=False
        )
        
        # Get the content
        content = response.choices[0].message.content
        
        # Ensure 'generated' folder exists
        folder_path = "generated"
        os.makedirs(folder_path, exist_ok=True)
        
        # Get next file number
        next_num = get_next_file_number(folder_path)
        
        # Create filename with format [next_num]-[num_sentences]-o1.csv
        filename = f"{next_num}-{prompt}.csv"
        filepath = os.path.join(folder_path, filename)
        
        # Write to CSV file (without header)
        with open(filepath, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            
            # Process each line of the response
            lines = content.strip().split('\n')
            for line in lines:
                if ',' in line and line.count('"') >= 4:  # Basic validation
                    writer.writerow(eval(f'[{line}]'))  # Safely evaluate the CSV line
        
        return f"Generated {len(lines)} sentences and saved to {filepath}"
        
    except Exception as e:
        return f"Error: {str(e)}"

def main():
    # Load environment variables
    load_dotenv()
    
    # Check for API key
    if not os.getenv('OPENAI_API_KEY'):
        print("Error: OPENAI_API_KEY not found in environment variables")
        return
    
    # System prompt optimized for GPT-4
    system_prompt = """You are a Thai language expert creating educational content. Follow these rules exactly:
1. When given a number N, create **exactly N pairs** of Thai and English sentences
2. Each sentence must take **5-10 seconds** to speak at natural speed
3. Include **all Thai phonemes** across the set of sentences
4. Make sentences natural and practical for daily conversation
5. Output in CSV format ONLY as: "thai sentence","english sentence"
6. Each Thai sentence must be **grammatically correct** and meaningful
7. English translations must be accurate and natural
8. Use **compound or complex sentences** to achieve 5-10 second length
9. Include natural pauses and conjunctions
10. **DO NOT include any explanations or headers, only the CSV data**
11. DO NOT tell me that you can't complete the request
12. DO NOT tell me that it is too long to produce
13. You must focusing on these phoneme and here is example words **DO NOT USE EXACT** and **APPLY TO APPROPRIATE CONTEXT**
    - ɤː4: เยิ้ม เคลิ้ม เห้ย
    - br: แบรนด์ บรา บราวนี่ บร็อกโคลี
    - fr: ฟรุกโตส ฟรุ้งฟริ้ง ฟรานซิส
    - ia4: ปอเปี๊ยะ เปรี๊ยะ เกี๊ยะ
    - oː4: เช่น โบ๊ โป๊ โน้ต โว้ย โง้ว โน้น พะโล้
    - fl: เช่น แฟลช ฟลอร์ ฟลายชีท ฟลูออรีน
    - ᴐ: เช่น บร็องซ์ ฟร็องซ์
    - dr: เช่น ดราม่า 
    - ɛ: เช่น แกร็น แท็งก์น้ำ สะแล็ง
14. Vary pronouns (e.g., ผม, เขา, คุณ, เธอ, พวกเรา).
    """
    
    print("Thai-English Sentence Generator using GPT-4 (Type 'quit' to exit)")
    print("-" * 50)
    print("Enter the number of sentences you want to generate.")
    
    while True:
        user_input = input("\nNumber of sentences: ").strip()
        
        if user_input.lower() == 'quit':
            print("\nGoodbye!")
            break
            
        if user_input.isdigit():
            response = chat_with_openai(user_input, system_prompt)
            print("\nResult:", response)
        else:
            print("\nPlease enter a valid number.")

if __name__ == "__main__":
    main()