In [4]:
import os, copy, json, time, pickle
import pandas as pd
from openai import OpenAI



In [32]:

api_key=open('openai_api_key.txt', 'r').read()
client = OpenAI(api_key=api_key)

# Big dataset of chinese words to learn and chinese_chars,pinyin_pronunciation,english_meaning
ALL_WORDS = [[p.strip() for p in l.split(',')] for l in open('chinese_words.txt').readlines()]
ch2pinyin = dict((p[0], p[1]) for p in ALL_WORDS)
ch2english = dict((p[0], p[2]) for p in ALL_WORDS)


N_KNOWN = 30
N_LEARNING = 5
N_TARGET_SENTENCES = 5
BATCH_SIZE = 20   # ask for several at once


In [33]:
# Variable state

known_words = [wb[0] for wb in ALL_WORDS[:N_KNOWN]]
target_words = [wb[0] for wb in ALL_WORDS[N_KNOWN+1:N_KNOWN+N_LEARNING]]

# Success/fail record for each word
perf = {w[0]:[] for w in ALL_WORDS}

In [34]:
PROMPT_TEMPLATE = """
Here are a list of "Known words" that you can use as much as you want,
and a list of "Target words" - you can only use one of them in a sentence.
That idea is that every sentence should have exactly one "Target" word
and have all the other words be "Known" words.
You may ONLY use words from the "Known words" list and the "Target words" list.
Do NOT use any other words.
You may repeat words.

Known words:
{known_words_s}

Target words:
{target_words_s}

Task:
Write {n} idiomatic, natural, everyday Chinese sentences.
Each sentence must contain exactly one word from the "Target words" list
and have the rest be from the "Known words" list.

Each line should have the target word, the sentence (with spaces), and an English translation. 
Separated by pipes.

Exmaple lines:
喜欢|我喜欢你|I like you
朋友|你是我的朋友|You are my friend
"""
def get_lines(known_words: list[list[str]], target_words: list[list[str]], n, model="gpt-5-mini") -> list[tuple[str,str]]:
    known_words_s = ','.join(known_words)
    target_words_s = ','.join(target_words)
    prompt = PROMPT_TEMPLATE.format(**locals())
    response = client.responses.create(
        model = model,
        input=prompt
    )
    lines = response.output_text.strip().splitlines()
    lines = [l.split('|') for l in lines if l.count('|')==2 and l.split('|')[0] in target_words]
    return lines
#
#get_lines(known_words, target_words, 5, model="gpt-5-mini")

In [36]:


while True:
    print('\n\n** NEW ROUND **')
    sentences = get_lines(known_words, target_words, N_TARGET_SENTENCES)
    for i, (tw, s, trans) in enumerate(sentences):
        print(f"{i}. {s.replace(' ', '')}")
    time.sleep(1)
    errors = input("Which numbers did you miss?")
    # print('errors', errors)
    if errors=='end':
        # End session
        break
    errors = [int(x) for x in errors]
    for i, (tw, s, trans) in enumerate(sentences):
        if i in errors:
            print('Failed', tw)
            print(s,'\n',trans)
            perf[tw].append('fail')
        else:
            # print('Suceeded', tw)
            perf[tw].append('success')
        if len(perf[tw])>5 and set(perf[tw][-5:])==set(['success']):
            known_words.append(tw)
            target_words.remove(tw)
            new_wb = [wb for wb in ALL_WORDS if wb[0] not in known_words][0]
            target_words.append(new_wb[0])




** NEW ROUND **
0. 我没去那
1. 我们去吧
2. 你把这看一看
3. 我跟你来
4. 他没来


Which numbers did you miss? 0


Failed 没
我没去那 
 I didn't go there


** NEW ROUND **
0. 我没来这儿
1. 我们去吧
2. 我把那看了
3. 我跟你来
4. 你没想好


Which numbers did you miss? end
