In [1]:
import os, copy, json, time
import pandas as pd
from openai import OpenAI



In [15]:

client = OpenAI(api_key=open('openai_api_key.txt', 'r').read())

ALL_WORDS = [[p.strip() for p in l.split(',')] for l in open('chinese_words.txt').readlines()]

N_KNOWN = 30
N_LEARNING = 5
KNOWN_WORDS = ALL_WORDS[:N_KNOWN]
TARGET_WORDS = ALL_WORDS[N_KNOWN+1:N_KNOWN+N_LEARNING]
print(KNOWN_WORDS)
print(TARGET_WORDS)

KNOWN_WORDS_CH = [p[0] for p in KNOWN_WORDS]
TARGET_WORDS_CH = [p[0] for p in TARGET_WORDS]

N_TARGET_SENTENCES = 5
BATCH_SIZE = 20   # ask for several at once

def is_valid(sentence, allowed):
    words = sentence.lower().split('')
    return all(word in allowed for word in words)

def get_sentences(known_words: list[str], target_words: list[str], n, model="gpt-5-mini") -> list[str]:
    prompt = f"""
Here are a list of "Known words" that you can use as much as you want,
and a list of "Target words" - you can only use one of them in a sentence.
That idea is that every sentence should have exactly one "Target" word
and have all the other words be "Known" words.
You may ONLY use words from the "Known words" list and the "Target words" list.
Do NOT use any other words.
You may repeat words.

Known words:
{",".join(known_words)}

Target words:
{",".join(target_words)}

Task:
Write {n} idiomatic, natural, everyday Chinese sentences.
Each sentence must contain exactly one word from the "Target words" list
and have the rest be from the "Known words" list.
Put spaces between the words.
One sentence per line.
"""
    response = client.responses.create(
        model = model,
        input=prompt
    )
    lines = response.output_text.strip().splitlines()
    sentences = []
    for line in lines:
        words_in_line = line.split()
        n_target = len([w for w in words_in_line if w in target_words])
        n_known = len([w for w in words_in_line if w in known_words])
        #print(n_target, '   ', line)
        if n_target == 1 and n_known == len(words_in_line)-1:
            sentences.append(line)
    return sentences


#while len(sentences) < TARGET_SENTENCES:
if False:
    sentences = get_sentences(KNOWN_WORDS_CH, TARGET_WORDS_CH, N_TARGET_SENTENCES)
    print('Sentences', sentences)
    # Done
    print('\n')
    for i, s in enumerate(sentences):
        print(f"{i}. {s}")


[['的', 'de', 'grammatical particle indicating possession or description'], ['在', 'zài', 'at; in; located at'], ['有', 'yǒu', 'to have; there is'], ['一', 'yī', 'one'], ['个', 'gè', 'general measure word'], ['我', 'wǒ', 'I; me'], ['不', 'bù', 'not'], ['是', 'shì', 'to be'], ['这', 'zhè', 'this'], ['他', 'tā', 'he; him'], ['了', 'le', 'completed action or change‑of‑state particle'], ['你', 'nǐ', 'you'], ['们', 'men', 'plural marker for people'], ['也', 'yě', 'also'], ['说', 'shuō', 'to say; to speak'], ['就', 'jiù', 'then; only'], ['人', 'rén', 'person; people'], ['都', 'dōu', 'all; both'], ['和', 'hé', 'and; with'], ['来', 'lái', 'to come'], ['上', 'shàng', 'on; above'], ['去', 'qù', 'to go'], ['看', 'kàn', 'to look; to see'], ['为', 'wèi', 'for'], ['到', 'dào', 'to arrive; to reach'], ['能', 'néng', 'can; to be able to'], ['这儿', 'zhèr', 'here'], ['那', 'nà', 'that'], ['好', 'hǎo', 'good'], ['想', 'xiǎng', 'to want; to think']]
[['没', 'méi', 'not have; didn’t'], ['吧', 'ba', 'sentence‑final question particle'], ['

In [None]:
import time 

ch2pinyin = dict((p[0], p[1]) for p in ALL_WORDS)
ch2english = dict((p[0], p[2]) for p in ALL_WORDS)

def get_error_report(s, known, target):
    words = s.split()
    ret = [s]
    ret.append(' '.join([ch2pinyin[w] for w in words]))
    for w in words:
        ret.append(f'   {w}  {ch2pinyin[w]}  {ch2english[w]}')
    return '\n'.join(ret)


known_words = copy.deepcopy(KNOWN_WORDS)
target_words = copy.deepcopy(TARGET_WORDS)


perf = {w[0]:[] for w in ALL_WORDS}

while True:
    print('\n\n** NEW ROUND **')
    known_words_ch = [p[0] for p in known_words]
    target_words_ch = [p[0] for p in target_words]
    # print('target_words_ch', target_words_ch)
    sentences = get_sentences(known_words_ch, target_words_ch, N_TARGET_SENTENCES)
    # print('sentences', sentences)
    for i, s in enumerate(sentences):
        print(f"{i}. {s.replace(' ', '')}")
    time.sleep(1)
    errors = input("Which numbers did you miss?")
    # print('errors', errors)
    if errors=='end': break
    errors = [int(x) for x in errors]
    for i, s in enumerate(sentences):
        words = s.split()
        # print('words', words)
        hits = [w for w in words if w in target_words_ch]
        if len(hits) ==0:
                print(target_words)
                print(target_words_ch)
                print('Perf', perf)
                print('target_words', target_words)
        tw = hits[0]
        if i in errors:
            print('Failed', tw)
            print(get_error_report(sentences[i], known_words, target_words))
            perf[tw].append('fail')
        else:
            # print('Suceeded', tw)
            perf[tw].append('success')
        if len(perf[tw])>5 and set(perf[tw][-5:])==set(['success']):
            print('Learned word', tw)
            tw_hits = [wb for wb in target_words if wb[0]==tw]
            if len(tw_hits) != 1:
                print(target_words)
                print(target_words_ch)
                print('Perf', perf)
                print('target_words', target_words)
            tw_blob = tw_hits[0]
            known_words.append(tw_blob)
            target_words.remove(tw_blob)
            new_wb = [wb for wb in ALL_WORDS if wb not in known_words and wb not in target_words][0]
            print('New word:', new_wb)
            target_words.append(new_wb)




** NEW ROUND **
0. 我没看这个
1. 我们去吧
2. 你把那个看好
3. 我跟你说
4. 他没来


** NEW ROUND **
0. 我没来这儿
1. 我们去看吧
2. 你把这个看了
3. 我跟他来
4. 他没在这儿


** NEW ROUND **
0. 我没看到他
1. 你们去吧
2. 我把他看到了
3. 我跟你说
4. 这儿没人
Learned word 没
New word: ['个儿', 'gèr', 'measure word variant']


** NEW ROUND **
0. 我们去吧
1. 你跟我来
2. 你把他看好
3. 这个儿好
4. 我想你来看这儿吧


** NEW ROUND **
0. 你们来看我吧
1. 我把这个看了
2. 你跟我来
3. 这儿有个儿好
4. 你把这看上了
Learned word 吧
New word: ['又', 'yòu', 'again']
Learned word 把
New word: ['把握', 'bǎwò', 'to grasp; to hold']


** NEW ROUND **
0. 我跟你去看了
1. 这个儿看上去不好
2. 他又来了
3. 我没把握吧
4. 你跟我们来看吧
Learned word 跟
New word: ['啥', 'shá', 'what (colloquial)']


IndexError: list index out of range

In [1]:
x = input("foo: ")
print("You typed:", x)


You typed: foobar


# OLD

In [None]:
# Example prompt
from openai import OpenAI

client = OpenAI()

allowed_words = [
    "the",
    "cat",
    "sat",
    "on",
    "mat",
    "and",
    "slept"
]

prompt = f"""
You may ONLY use words from the following list.
Do NOT use any other words.
You may repeat words.
Do NOT use punctuation that implies new words.

Allowed words:
{", ".join(allowed_words)}

Task:
Write 3 simple sentences.
"""

response = client.responses.create(
    model="gpt-4.1-mini",
    input=prompt
)

print(response.output_text)


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [None]:
# First Draft

from openai import OpenAI

client = OpenAI(api_key=open('openai_api_key.txt', 'r').read())

ALL_WORDS = [[p.strip() for p in l.split(',')] for l in open('chinese_words.txt').readlines()]
KNOWN_WORDS = ALL_WORDS[:20]
WORKING_ON = ALL_WORDS[21:25]


TOP_100_CHARACTERS = ("的一是不在人有我他这个们中来上大为和国地到以说时"
                      "要就出会可也你对生能而子那得于着下自之年过发后作"
                      "里用道行所然家种事成方多经么去法学如都同现当没动"
                      "面起看定天分还进好小部其些主样理心她本前开但因只从想实")

WORKING_ON = ("为国地到以时"
                      "要就出会可也对生能而子那得于着自之年过发后作"
                      "里道行所然家种事成方经么去法学如都同现当没动"
                      "面起看定分还进好小部其些主样理心她本前开但因只从实")

COMMON_WORDS = [
    "的", "我", "你", "是", "了", "不", "在", "他", "我们", "这",
    "有", "会", "个", "们", "来", "上", "大", "和", "要", "去",
    "说", "也", "为", "她", "吗", "那", "可以", "知道", "你们", "现在",
    "时间", "看", "好", "喜欢", "想", "对", "还是", "为什么", "怎么", "一点",
    "怎么了", "一点儿", "因为", "所以", "应该", "知道吗", "觉得", "呢", "自己的", "觉得呢",
    "觉得吗", "孩子", "老师", "朋友", "学校", "公司", "事情", "地方", "家", "家庭",
    "工作", "生活", "孩子们", "东西", "问题", "可能", "人们", "社会", "别人", "世界",
    "学习", "帮助", "已经", "一起", "开始", "结束", "继续", "第一次", "最后", "更多",
    "少", "很多", "每个", "每个人", "所有", "只有", "真的", "可能吗", "需要", "得到",
    "最好", "非常", "特别", "觉得很", "感到", "听说", "明白", "理解", "看见", "听见",
    "告诉", "问", "回答", "担心", "希望", "感觉", "记得", "忘记", "带来", "关心"
]

KNOWN_WORDS = [
    "的", "我", "你", "是", "了", "不", "在", "他", "我们", "这",
    "有", "会", "个", "们", "来", "上", "大", "和", "要", "去",
    "说", "也", "为", "她", "吗", "那", "可以", "知道", "你们", "现在",
    "一点", "好", "朋友", "还是",
    # "时间", "看", "喜欢", "想", "对", "为什么", "怎么", 
    # "怎么了", "一点儿", "因为", "所以", "应该", "知道吗", "觉得", "呢", "自己的", "觉得呢",
    # "觉得吗", "孩子", "老师", "学校", "公司", "事情", "地方", "家", "家庭",
]

TARGET_WORDS = [
    "们", "来", "上", "去",
    "那", "可以", "知道", "你们", "现在",
    #"时间", "看", "好", "喜欢", "想", "对", "还是", "为什么", "怎么", "一点",
    # "怎么了", "一点儿", "因为", "所以", "应该", "知道吗", "觉得", "呢", "自己的", "觉得呢",
    # "觉得吗", "孩子", "老师", "朋友", "学校", "公司", "事情", "地方", "家", "家庭",
    # "工作", "生活", "孩子们", "东西", "问题", "可能", "人们", "社会", "别人", "世界",
    # "学习", "帮助", "已经", "一起", "开始", "结束", "继续", "第一次", "最后", "更多",
    # "少", "很多", "每个", "每个人", "所有", "只有", "真的", "可能吗", "需要", "得到",
    # "最好", "非常", "特别", "觉得很", "感到", "听说", "明白", "理解", "看见", "听见",
    # "告诉", "问", "回答", "担心", "希望", "感觉", "记得", "忘记", "带来", "关心"
]


TARGET_SENTENCES = 5
BATCH_SIZE = 20   # ask for several at once

def is_valid(sentence, allowed):
    words = sentence.lower().split('')
    return all(word in allowed for word in words)

def get_prompt(known_words, target_words, n):
    prompt = f"""
You may ONLY use words from the "Known words" list
and the "Target words" list.
Do NOT use any other words.
You may repeat words.
Make sure that each sentence has exactly ONE word from the
"Target words" list, and the rest are from the "Known words" list.
Do NOT use punctuation, but DO put a space between the words in a sentence.

Known words:
{",".join(known_words)}

Target words:
{",".join(target_words)}

Task:
Write {n} idiomatic, normal-sounding sentences.
Each sentence must contain exactly one word from the "Target words" list
and have the rest be from the "Known words" list.
One sentence per line.
"""
    return prompt

sentences = []

#while len(sentences) < TARGET_SENTENCES:
if True:
    #remaining = TARGET_SENTENCES - len(sentences)
    prompt = get_prompt(KNOWN_WORDS, TARGET_WORDS,TARGET_SENTENCES )

    response = client.responses.create(
        # model="gpt-4.1-mini",
        model = "gpt-4.1-nano",
        input=prompt
    )

    lines = response.output_text.strip().splitlines()
    print(lines)

    for line in lines:
        words_in_line = line.split()
        n_target = len([w for w in words_in_line if w in TARGET_WORDS])
        print(n_target, '   ', line)
        if n_target == 1:
            sentences.append(line)

# Done
print('\n')
for i, s in enumerate(sentences, 1):
    print(f"{i}. {s}")
