#Install Dependencies

In [1]:
%pip install cerebras-cloud-sdk pandas gradio sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
Collecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting regex (from sacrebleu)
  Downloading regex-2025.11.3-cp313-cp313-win_amd64.whl.metadata (41 kB)
Collecting tabulate>=0.8.9 (from sacrebleu)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting lxml (from sacrebleu)
  Downloading lxml-6.0.2-cp313-cp313-win_amd64.whl.metadata (3.7 kB)
Collecting pywin32>=226 (from portalocker->sacrebleu)
  Using cached pywin32-311-cp313-cp313-win_amd64.whl.metadata (10 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Downloading lxml-6.0.2-cp313-cp313-win_amd64.whl (4.0 MB)
   ---------------------------------------- 0.0/4.0 MB ? eta -:--:--
   ------------------ --------------------- 1.8/4.0 MB 8.7 MB/s eta 0:00:01
   ------------------------------------ --- 3.7/4.0 MB 8.5 


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install openai-whisper gTTS


Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
     ---------------------------------------- 0.0/803.2 kB ? eta -:--:--
     ---------------------------------------- 803.2/803.2 kB 9.7 MB/s  0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting more-itertools (from openai-whisper)
  Downloading more_itertools-10.8.0-py3-none-any.whl.metadata (39 kB)
Collecting numba (from openai-whisper)
  Downloading numba-0.63.1-cp313-cp313-win_amd64.whl.metadata (3.0 kB)
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.12.0-cp313-cp313-win_amd64.whl.metadata (6.9 kB)
Collecting request


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


#Tenby10 Backend Experiment


In [3]:
import os
from cerebras.cloud.sdk import Cerebras
import pandas as pd
import json
import gradio as gr
import sacrebleu


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import os
from cerebras.cloud.sdk import Cerebras
import pandas as pd
import json
import gradio as gr
import sacrebleu


client = Cerebras(api_key=os.environ["CEREBRAS_API_KEY_DANI"])

def generate_vocab_flashcards(vocab_list, target_language="Spanish"):
    results = []
    has_reading = target_language.lower() in ["japanese", "chinese"]

    for word in vocab_list:
        # Prompt asks for reading if Japanese or Chinese
        if has_reading:
            prompt = f"""
            You are a multilingual language learning assistant.
            For the term \"{word}\", translate it into {target_language}, give the pronunciation (reading,
            such as pinyin for Chinese or furigana for Japanese in [brackets] for the reading and example sentence),
            and provide one natural example sentence in
            the input and target language.
            Respond ONLY in valid JSON with this exact structure:
            {{
                "term": \"{word}\",
                "translation": "...",
                "reading": "...",
                "example_sentence": "..."
            }}
            """
        else:
            prompt = f"""
            You are a multilingual language learning assistant.
            For the term \"{word}\", translate it into {target_language}, and give one natural example
            sentence in the input language and target language.
            Respond ONLY in valid JSON with this exact structure:
            {{
                "term": \"{word}\",
                "translation": "...",
                "example_sentence": "..."
            }}
            """
        try:
            completion = client.chat.completions.create(
                model="qwen-3-235b-a22b-instruct-2507",
                messages=[
                    {"role": "system", "content": "You are a helpful multilingual assistant."},
                    {"role": "user", "content": prompt}
                ],
                max_completion_tokens=500,
                temperature=0.7,
                top_p=0.5,
                stream=False
            )
            content = completion.choices[0].message.content.strip()
            try:
                data = json.loads(content)
            except:
                # Fallback: handle JSON errors
                if has_reading:
                    data = {
                        "term": word,
                        "translation": content.split("\n")[0] if "\n" in content else content,
                        "reading": "",
                        "example_sentence": ""
                    }
                else:
                    data = {
                        "term": word,
                        "translation": content.split("\n")[0] if "\n" in content else content,
                        "example_sentence": ""
                    }
            if has_reading:
                results.append({
                    "Grammar/Vocab": data.get("term", word),
                    "Reading": data.get("reading", ""),
                    "Translation": data.get("translation", ""),
                    "Example Sentence": data.get("example_sentence", "")
                })
            else:
                results.append({
                    "Grammar/Vocab": data.get("term", word),
                    "Translation": data.get("translation", ""),
                    "Example Sentence": data.get("example_sentence", "")
                })
        except Exception as e:
            if has_reading:
                results.append({
                    "Grammar/Vocab": word,
                    "Reading": "",
                    "Translation": f"Error: {e}",
                    "Example Sentence": ""
                })
            else:
                results.append({
                    "Grammar/Vocab": word,
                    "Translation": f"Error: {e}",
                    "Example Sentence": ""
                })
    return pd.DataFrame(results)

In [5]:
import os
import csv
import json

GRAMMAR_CSV_PATH = "grammar_flashcards.csv"
GRAMMAR_COLUMNS = [
    "grammar point",
    "grammar point translation",
    "conjugation rules",
    "example sentence",
    "example sentence translation",
]


In [6]:
def append_grammar_row(data):
    """
    Append a single grammar flashcard row to grammar_flashcards.csv.
    `data` keys come from the model JSON.
    """
    row = {
        "grammar point": data.get("grammar_point", ""),
        "grammar point translation": data.get("grammar_point_translation", ""),
        "conjugation rules": data.get("conjugation_rules", ""),
        "example sentence": data.get("example_sentence", ""),
        "example sentence translation": data.get("example_sentence_translation", ""),
    }

    file_exists = os.path.isfile(GRAMMAR_CSV_PATH)

    with open(GRAMMAR_CSV_PATH, "a", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=GRAMMAR_COLUMNS)
        if not file_exists:
            writer.writeheader()
        writer.writerow(row)


In [7]:
import os
from gtts import gTTS
import tempfile
import whisper

def generate_grammar_flashcard_and_explanation(
    user_message,
    target_language="Spanish",
    native_language="English",
):
    """
    Given a learner's question, produce:
      - a natural explanation (string) to show in chat
      - a structured grammar flashcard dict to save in CSV
    """

    prompt = f"""
    You are a grammar tutor helping a learner whose native language is {native_language}.
    The target language is {target_language}.

    The learner asked:
    "{user_message}"

    Your tasks:

    1. Identify ONE main grammar point in the target language that best matches this question.
       Examples:
         - For Spanish: "Present tense of 'tener' (to have)".
         - For Japanese 'to have', discuss nuance between いる, ある, 飼っている, 持っている.
    2. Provide the translation of that grammar point into {native_language}.
    3. Describe the conjugation rules or usage patterns clearly.
       - For verbs like Spanish "tener", list the key forms (tengo, tienes, tiene, etc.).
       - For Japanese "to have", explain when to use いる, ある, 飼っている, 持っている and why.
    4. Provide ONE good example sentence in the target language.
    5. Provide the {native_language} translation of that example.

    You must respond in the following exact format:

    [EXPLANATION]
    (Write a friendly explanation for the learner. Use both {target_language} and short notes in {native_language}.)

    [FLASHCARD_JSON]
    {{
      "grammar_point": "...",
      "grammar_point_translation": "...",
      "conjugation_rules": "...",
      "example_sentence": "...",
      "example_sentence_translation": "..."
    }}
    """

    completion = client.chat.completions.create(
        model="qwen-3-235b-a22b-instruct-2507",
        messages=[
            {"role": "system", "content": "You are a patient grammar tutor."},
            {"role": "user", "content": prompt},
        ],
        max_completion_tokens=700,
        temperature=0.6,
        top_p=0.9,
        stream=False,
    )

    content = completion.choices[0].message.content.strip()

    # Default fallbacks
    explanation = content
    flashcard_data = None

    # Try to split [EXPLANATION] and [FLASHCARD_JSON]
    if "[FLASHCARD_JSON]" in content:
        parts = content.split("[FLASHCARD_JSON]", 1)
        explanation_part = parts[0]
        json_part = parts[1]

        # Clean explanation
        explanation = explanation_part.replace("[EXPLANATION]", "").strip()

        # Try to parse JSON
        json_str = json_part.strip()
        try:
            flashcard_data = json.loads(json_str)
        except Exception:
            flashcard_data = None

    return explanation, flashcard_data

def conversation_partner_chat(
    user_text,
    history,
    target_language="Spanish",
    native_language="English",
):
    """
    Simple text conversation partner:
      - Speaks mostly in target_language
      - Gently corrects mistakes
      - Optionally gives very short notes in native_language
    """
    if history is None:
        history = []

    messages = [
        {
            "role": "system",
            "content": f"""
            You are a friendly conversation partner helping a learner practice {target_language}.
            - Reply primarily in {target_language}.
            - Keep replies short and conversational (1–3 sentences).
            - If the learner makes a clear mistake, correct it naturally.
            - Optionally add a very short note in {native_language} in parentheses when a grammar point is important.
            """,
        }
    ]

    # Convert Gradio style history [(user, bot), ...] into chat messages
    for user_msg, bot_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})

    messages.append({"role": "user", "content": user_text})

    completion = client.chat.completions.create(
        model="qwen-3-235b-a22b-instruct-2507",
        messages=messages,
        max_completion_tokens=200,
        temperature=0.7,
        top_p=0.9,
        stream=False,
    )

    reply = completion.choices[0].message.content.strip()
    history.append((user_text, reply))
    return history


import os
from gtts import gTTS
import tempfile
import whisper

# Load Whisper ASR model once
# "small" is fast and accurate;
whisper_model = whisper.load_model("small")


def transcribe_audio(audio_path, source_language="auto"):
    """
    Fully implemented speech-to-text using OpenAI Whisper.

    audio_path: file path passed by gr.Audio(type="filepath")
    source_language: "auto" or language code (e.g. "en", "es", "ja")

    Returns transcribed text.
    """
    if audio_path is None or not os.path.exists(audio_path):
        return "(No audio detected)"

    # Run Whisper transcription
    result = whisper_model.transcribe(audio_path, language=None if source_language == "auto" else source_language)
    text = result.get("text", "").strip()

    if not text:
        return "(Unable to transcribe audio)"

    return text


def synthesize_speech(text, target_language="Spanish"):
    """
    Fully implemented TTS using gTTS.
    Creates an MP3 file and returns its path.
    """
    if not text or text.strip() == "":
        return None

    # Map human language names -> gTTS language codes
    lang_map = {
        "English": "en",
        "Spanish": "es",
        "French": "fr",
        "Japanese": "ja",
        "Chinese": "zh-CN",
    }

    lang_code = lang_map.get(target_language, "en")  # default English

    # Create a temporary audio file
    tmp_dir = tempfile.gettempdir()
    out_path = os.path.join(tmp_dir, "tts_reply.mp3")

    try:
        tts = gTTS(text=text, lang=lang_code)
        tts.save(out_path)
    except Exception as e:
        print("TTS error:", e)
        return None

    return out_path


def conversation_partner_voice_step(
    audio_path,
    history,
    target_language="Spanish",
    native_language="English",
):
    """
    Voice pipeline:
      1. STT: audio -> user_text
      2. LLM: conversation partner reply
      3. TTS: reply_text -> reply_audio

    Returns: updated history, reply_audio_path
    """
    if history is None:
        history = []

    if audio_path is None:
        # No audio recorded
        return history, None

    # 1) Audio -> text
    user_text = transcribe_audio(audio_path, source_language=native_language)

    # 2) Text conversation partner
    history = conversation_partner_chat(
        user_text=user_text,
        history=history,
        target_language=target_language,
        native_language=native_language,
    )

    # Last assistant reply
    _, bot_reply = history[-1]

    # 3) Text -> audio
    reply_audio_path = synthesize_speech(
        bot_reply,
        target_language=target_language,
    )

    return history, reply_audio_path



100%|███████████████████████████████████████| 461M/461M [00:30<00:00, 15.6MiB/s]


In [8]:
import sacrebleu

def evaluate_translation_bleu(vocab_list, target_language, reference_translations):
    """
    vocab_list: list of source terms (strings)
    target_language: e.g. "Spanish"
    reference_translations: list of gold translations, same length/order as vocab_list
    """

    # 1. Generate model translations
    df = generate_vocab_flashcards(vocab_list, target_language)

    # 2. Extract only term translations (Translation)
    system_translations = df["Translation"].tolist()

    # 3. Standard BLEU (default max_ngram_order=4)
    bleu = sacrebleu.corpus_bleu(system_translations, [reference_translations])

    # 4. BLEU-1 (unigrams only)
    bleu1_metric = sacrebleu.metrics.BLEU(max_ngram_order=1)
    bleu1 = bleu1_metric.corpus_score(system_translations, [reference_translations])

    return {
        "bleu": bleu.score,  #may delete later
        "bleu_1": bleu1.score,
        "details": bleu,
        "df": df,
        "system_translations": system_translations,
        "reference_translations": reference_translations,
    }



In [9]:
# Example for Spanish -> English
'''
vocab_list = [
    "la organización",
    "el animal",
    "Esto se está volviendo ridículo."
]

reference_translations = [
    "the organization",
    "the animal",
    "This is getting stupid."
]
'''

'''
vocab_list = [
    "l'organisation",
    "l'animal",
    "Ça devient stupide."
]

reference_translations = [
    "the organization",
    "the animal",
    "It's becoming ridiculous."
]
'''

# Example for Chinese -> English
vocab_list = [
    "组织",
    "动物",
    "这变得有点荒谬了。"
]

reference_translations = [
    "the organization",
    "the animal",
    "This is getting ridiculous."
]


results = evaluate_translation_bleu(vocab_list, "English", reference_translations)

print("BLEU-1 Accuracy:", results["bleu_1"])

for src, hyp, ref in zip(
    vocab_list,
    results["system_translations"],
    results["reference_translations"]
):
    print(f"SRC: {src}")
    print(f"HYP: {hyp}")
    print(f"REF: {ref}")
    print("-----")

BLEU-1 Accuracy: 77.77777777777777
SRC: 组织
HYP: organization
REF: the organization
-----
SRC: 动物
HYP: animal
REF: the animal
-----
SRC: 这变得有点荒谬了。
HYP: This is getting a bit ridiculous.
REF: This is getting ridiculous.
-----


In [10]:
def run_flashcard_generator(text, language):
    vocab_list = [w.strip() for w in text.split("\n") if w.strip()]
    df = generate_vocab_flashcards(vocab_list, language)
    csv_path = "anki_flashcards.csv"

    # Determine columns by language
    if language.lower() in ["japanese", "chinese"]:
        display_cols = ["Grammar/Vocab", "Translation", "Reading", "Example Sentence"]
    else:
        display_cols = ["Grammar/Vocab", "Translation", "Example Sentence"]
        # If DataFrame has Reading, drop it for display and CSV
        if "Reading" in df.columns:
            df = df.drop(columns=["Reading"])  # <-- fixed the split 'column\ns' typo

    df = df.reindex(columns=display_cols)
    df.to_csv(csv_path, index=False)
    return df, csv_path


def grammar_chatbot_fn(
    user_message,
    history,
    target_language="Spanish",
    native_language="English",
):
    if history is None:
        history = []

    explanation, flashcard_data = generate_grammar_flashcard_and_explanation(
        user_message=user_message,
        target_language=target_language,
        native_language=native_language,
    )

    # Save to CSV if JSON parsed correctly
    if flashcard_data:
        append_grammar_row(flashcard_data)

    history.append((user_message, explanation))
    return history


def get_grammar_csv():
    """
    Return the path to the grammar flashcards CSV for download in Gradio.
    If it doesn't exist yet, create an empty file with just the header.
    """
    if not os.path.exists(GRAMMAR_CSV_PATH):
        with open(GRAMMAR_CSV_PATH, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=GRAMMAR_COLUMNS)
            writer.writeheader()
    return GRAMMAR_CSV_PATH


with gr.Blocks() as demo:
    # ---------- TAB 1: VOCAB FLASHCARDS ----------
    with gr.Tab("Vocab Flashcards"):
        gr.Markdown("### TEN by10 – AI Vocab Flashcard Generator")

        vocab_text = gr.Textbox(
            label="Enter vocab terms (one per line)",
            lines=8,
            placeholder="e.g.\nla organización\nel animal\nla tecnología",
        )

        vocab_target_lang = gr.Radio(
            ["Spanish", "French", "Japanese", "Chinese", "English"],
            label="Target Language",
            value="Spanish",
        )

        vocab_button = gr.Button("Generate Vocab Flashcards")

        vocab_table = gr.Dataframe(label="Generated Flashcards")
        vocab_csv = gr.File(label="Download CSV")

        vocab_button.click(
            fn=run_flashcard_generator,
            inputs=[vocab_text, vocab_target_lang],
            outputs=[vocab_table, vocab_csv],
        )

# ---------- TAB 2: Grammar Chatbot ----------
    with gr.Tab("Grammar Chatbot"):
        gr.Markdown(
            "### Grammar Tutor (builds grammar_flashcards.csv automatically)"
        )

        target_lang_dropdown = gr.Dropdown(
            ["Spanish", "French", "Japanese", "Chinese", "English"],
            label="Target language",
            value="Spanish",
        )
        native_lang_dropdown = gr.Dropdown(
            ["English", "Spanish", "French", "Japanese", "Chinese"],
            label="Your native language",
            value="English",
        )

        grammar_chat = gr.Chatbot(label="Ask about grammar")
        grammar_input = gr.Textbox(
            label="Type your grammar question",
            placeholder=(
                "Examples:\n"
                "- How do you conjugate 'tener' in Spanish?\n"
                "- How do you express 'to have' in Japanese (いる／ある／飼ってる／持ってる)?"
            ),
        )

        # NEW: buttons + file output
        grammar_clear = gr.Button("Clear conversation")
        grammar_download = gr.Button("Download grammar CSV")
        grammar_file = gr.File(label="Grammar CSV")

        def grammar_wrapper(message, history, target_language, native_language):
            return grammar_chatbot_fn(
                user_message=message,
                history=history,
                target_language=target_language,
                native_language=native_language,
            )

        grammar_input.submit(
            fn=grammar_wrapper,
            inputs=[grammar_input, grammar_chat, target_lang_dropdown, native_lang_dropdown],
            outputs=grammar_chat,
        )

        grammar_clear.click(
            fn=lambda: [],
            inputs=None,
            outputs=grammar_chat,
        )

        # NEW: wire download button to CSV-returning function
        grammar_download.click(
            fn=get_grammar_csv,
            inputs=None,
            outputs=grammar_file,
        )


  # ---------- TAB 3: CONVERSATION PARTNER (VOICE) ----------
    with gr.Tab("Conversation Partner (Voice)"):
        gr.Markdown(
            "### Voice conversation partner\n"
            "Record your voice, get a spoken reply, and see the transcript."
        )

        conv_target_lang = gr.Dropdown(
            ["Spanish", "French", "Japanese", "Chinese", "English"],
            label="Target language",
            value="Spanish",
        )
        conv_native_lang = gr.Dropdown(
            ["English", "Spanish", "French", "Japanese", "Chinese"],
            label="Your native language",
            value="English",
        )

        voice_chat = gr.Chatbot(label="Conversation transcript")

        voice_input = gr.Audio(
            sources=["microphone"], # Changed 'source' to 'sources' and made it a list
            type="filepath",
            label="Press to record, then release",
        )

        voice_send = gr.Button("Send voice")
        voice_clear = gr.Button("Clear conversation")

        voice_reply_audio = gr.Audio(
            label="Assistant reply (audio)",
            interactive=False,
        )

        def voice_wrapper(
            audio_path,
            history,
            target_language,
            native_language,
        ):
            return conversation_partner_voice_step(
                audio_path=audio_path,
                history=history,
                target_language=target_language,
                native_language=native_language,
            )

        voice_send.click(
            fn=voice_wrapper,
            inputs=[
                voice_input,
                voice_chat,
                conv_target_lang,
                conv_native_lang,
            ],
            outputs=[voice_chat, voice_reply_audio],
        )

        voice_clear.click(
            fn=lambda: ([], None),
            inputs=None,
            outputs=[voice_chat, voice_reply_audio],
        )

demo.launch()

  grammar_chat = gr.Chatbot(label="Ask about grammar")
  voice_chat = gr.Chatbot(label="Conversation transcript")


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


