# Import library

In [1]:
import re
import time
import json
from bs4 import BeautifulSoup
from openai import OpenAI
import pprint

In [2]:
client = OpenAI()

# Read the xml file

### Find example sentences which are needed to translate from Webster dictionary

`<id>`: the word \
`<stems>`: inflectional paradigm \
`<sense>`: includes definitions and examples \
`<dt>`: definition and may include 2 or more examples for different types \
`<vi>`: example 

In [3]:
def getData(alphabet):
    file = str()
    with open(f"../data/MW_Learners/LD_{alphabet}.xml", "r") as f:
      file = f.read()
    return file

In [4]:
def getExampleSentence(soup, target_word, num_word):
  pattern = r"{ldquo}|{rdquo}|{it}|{phrase}|{/phrase}|\[\=.*\]|\n"
  input_dict = {}
  word_count = 0
  find = False
  # Find all words that contain example sentence/phrase
  for word in soup.find_all('entry'):
    # Get the number of words we want
    # print(f"word count:{word_count}")
    if find and word_count == num_word:
      break

    # First, find the target word
    word_id = word.find('id').text
    # print(f"current word:{word_id}")
    if (not find) and (word_id != target_word):
      continue
    else:
      find = True
    # altername = word.find_all('va') + word.find_all('if')
    # print(f"alter name: {altername}")
    # sense = word.find_all('sense')

    # Get example sentences
    examples = word.find_all('vi')
    # If the word contains an example
    if examples != []:
      input_dict[word_id] = []
      # Delete annotations
      for sentence in examples:
        example = re.sub(pattern, "", sentence.text)
        example = example.replace(r"{/it}", "")
        # print(example)
        input_dict[word_id].append(example)
      print(f"ID: {word_id}")
      print(f"examples: {input_dict[word_id]}")
    print("---\n")
    word_count += 1
  return input_dict

### Get example sentences from Macmillan dictionary

In [5]:
macmillan_xml = str()
with open(f"../data/test.xml", "r") as f:
  macmillan_xml = f.read()
macmillan_soup = BeautifulSoup(macmillan_xml, 'xml')

In [6]:
log_list =  macmillan_soup.find_all("HOMOGRAPH")
print(len(log_list))
pprint.pprint(log_list[15:20])

1042
[<HOMOGRAPH ID="363831"><HEAD><BASE-FORM><ENTRY0>ugh</ENTRY0><PRONS><PRON ID="0">x</PRON></PRONS><PRONS><PRON ID="1">g</PRON></PRONS></BASE-FORM><PART-OF-SPEECH>interjection</PART-OF-SPEECH></HEAD><SENSE><DEFINITION>used for writing the sound people make when they think something is extremely unpleasant</DEFINITION><trans lang="zh_HK">噁（表示極度厭惡）</trans><EXAMPLES><EXAMPLE>Ugh! That tastes disgusting!</EXAMPLE><trans lang="zh_HK">噁！真難吃！</trans></EXAMPLES></SENSE></HOMOGRAPH>,
 <HOMOGRAPH ID="363841"><HEAD><BASE-FORM><ENTRY13>ugly</ENTRY13><PRONS><PRON ID="0">gli</PRON></PRONS></BASE-FORM><PART-OF-SPEECH>adj</PART-OF-SPEECH><STARS/></HEAD><SENSE45><SENSE-NUM>1</SENSE-NUM><DEFINITION>someone who is ugly has a face that is unpleasant to look at<USAGUC>Ugly is an unkind word, so people often use the word <VOCAB>plain</VOCAB> instead, to avoid causing offence</USAGUC></DEFINITION><trans lang="zh_HK">（人）難看的，醜陋的（不禮貌用語，常用plain來替代）</trans><EXAMPLES><EXAMPLE>The other man was hideously ugly.</

In [25]:
pattern = r"{ldquo}|{rdquo}|{it}|{phrase}|{/phrase}|\[\=.*\]|\n"
macmillan_dict = {}
find = False
# Find all words that contain example sentence/phrase
for log in macmillan_soup.find_all('HOMOGRAPH'):
  # print(f"log: {log}")

  # First, find the target word
  word_id = log.find(re.compile(r"ENTRY.+$")).text
  if word_id != "ugly":
    continue

  # Get example sentences
  examples = log.find_all('EXAMPLES')
  # If the word contains an example
  if examples != []:
    macmillan_dict[word_id] = []
    # Delete annotations
    for sentence in examples:
      example = sentence.find("EXAMPLE").text
      trans_example = sentence.find("trans").text
      macmillan_dict[word_id].append([example, trans_example])
    print(f"ID: {word_id}")
    pprint.pprint(f"examples: {macmillan_dict[word_id]}", width = 120)
  print("---\n")

ID: ugly
("examples: [['The other man was hideously ugly.', '另一個男人奇醜無比。'], ['She wont wear T-shirts because she thinks her arms "
 "are ugly.', '她不願穿T恤，因為覺得手臂很難看。'], ['She was rich and powerful and ugly as sin.', '極其醜陋的'], ['an ugly concrete "
 "bridge', '難看的混凝土橋'], ['Do you think its frame makes the picture look ugly?', '你覺得這畫框是不是讓畫變醜了？'], ['an ugly rumour', "
 "'卑劣的謠言'], ['ugly racist propaganda', '卑劣的種族主義宣傳'], ['Critics have described the policy as the ugly face of "
 "Republicanism.', '事物令人厭惡的方面'], ['There were ugly scenes following the match.', '比賽後發生了暴力事件。'], ['an ugly "
 "confrontation', '暴力衝突'], ['The situation turned ugly when one of the police officers was hit by a rock.', "
 "'變得令人不愉快'], ['The problem reared its ugly head again a few weeks later.', '這個惱人的問題幾週後又再出現了。']]")
---



### Prompts

In [36]:
# Zero-prompt
def queryZero(word, examples):
  prompt = fr"""Let us do it step by step to reach the best results.
Act as if you are an English teacher and translate the following sentences/phrases into traditional Mandarin Chinese (NOT simplified Chinese) for non-native English learners to learn how to understand and translate the word "{word}" in a sentence.
Please consult Cambridge English–Chinese (Traditional) Dictionary to find out about the range of translations of "{word}".

Present the translations in a list format, providing both the original sentence/phrase, and the translation(exclude pinyin).
Now, translate the following examples.
```
{examples}
```"""
  return prompt


# With Cambridge dictionary
def queryCambridge(word, examples):
  prompt = fr"""Let us do it step by step to reach the best results.
Act as if you are an English teacher and translate the following sentences/phrases into traditional Mandarin Chinese (NOT simplified Chinese) for non-native English learners to learn how to understand and translate the word "{word}" in a sentence.
Please consult Cambridge English–Chinese (Traditional) Dictionary to find out about the range of translations of "{word}".

Present the translations in a list format, providing both the original sentence/phrase, and the translation(exclude pinyin).
For example, if the word is "ugly", the output should resemble:

1. I think a lot of modern architecture is very ugly.
我覺得很多現代建築都非常難看。
2. Yesterday I saw the ugliest baby I've ever seen in my life.
昨天我看見了有生以來見過的最醜的嬰兒。

Now, translate the following examples.
```
{examples}
```"""
  return prompt

# With Macmillan dictionary
def queryMacmillan(word, examples):
  prompt = fr"""Let us do it step by step to reach the best results.
Act as if you are an English teacher and translate the following sentences/phrases into traditional Mandarin Chinese (NOT simplified Chinese) for non-native English learners to learn how to understand and translate the word "{word}" in a sentence.

Present the translations in a list format, providing both the original sentence/phrase, and the translation(exclude pinyin).
For example, if the word is "ugly", the output should resemble:

1. {macmillan_dict["ugly"][0][0]}
{macmillan_dict["ugly"][0][1]}
2. {macmillan_dict["ugly"][1][0]}
{macmillan_dict["ugly"][1][1]}

Now, translate the following examples.
```
{examples}
```"""
  return prompt

# With Cambridge and Macmillan dictionary
def queryMix(word, examples):
  prompt = fr"""Let us do it step by step to reach the best results.
Act as if you are an English teacher and translate the following sentences/phrases into traditional Mandarin Chinese (NOT simplified Chinese) for non-native English learners to learn how to understand and translate the word "{word}" in a sentence.
Please consult Cambridge English–Chinese (Traditional) Dictionary to find out about the range of translations of "{word}".

Present the translations in a list format, providing both the original sentence/phrase, and the translation(exclude pinyin).
For example, if the word is "ugly", the output should resemble:

1. {macmillan_dict["ugly"][0][0]}
{macmillan_dict["ugly"][0][1]}
2. {macmillan_dict["ugly"][1][0]}
{macmillan_dict["ugly"][1][1]}

Now, translate the following examples.
```
{examples}
```"""
  return prompt

In [9]:

def chat(word, examples, prompt_type, version):
  prompt = ""
  if prompt_type == "Zero":
    prompt = queryZero(word, examples)
  elif prompt_type == "Cambridge":
    prompt = queryCambridge(word, examples)
  elif prompt_type == "Macmillan":
    prompt = queryMacmillan(word, examples)
  else:
    prompt = queryMix(word, examples)

  # Get completion
  completion = client.chat.completions.create(
    model= version,
    messages=[
      {"role": "user", "content": f"{prompt}"},
    ]
  )
  respond = completion.choices[0].message.content
  return respond

def oneWordTranslate(word, examples, prompt_type, version="gpt-3.5-turbo"):
  begin_time = time.monotonic()
  respond = chat(word, examples, prompt_type, version)
  end_time = time.monotonic()
  duration = end_time - begin_time
  return (respond, duration)

def writeToFile(word, respond, duration):
  with open(f"{word}_result.txt", 'a') as f:
    f.write(f"{word}\n{respond}\n[duration: {duration}]\n---\n\n")

def writeToFileNoduration(word, respond):
  with open("transtion_result.txt", 'a') as f:
    f.write(f"{word}\n{respond}\n\n=====\n")

# Parsing

In [10]:
f = getData("a")
soup = BeautifulSoup(f, 'xml')

In [11]:
input_dict = getExampleSentence(soup, "ability", 1)
absence_dict = getExampleSentence(soup, "absence", 1)

ID: ability
examples: ['a young woman with many remarkable musical/artistic/athletic abilities', 'a teacher with an ability to inspire his students', 'Does he have the ability  to fire employees?', "a writer's ability  to interest readers", 'a young woman of great musical/artistic/athletic ability', 'She has shown some ability with foreign languages.', 'He always works to the best of his ability. ']
---

ID: absence
examples: ['There was an absence ', 'The products showed a remarkable absence of defects.', 'In the absence of reform , progress was slow.', 'With the (continued) absence of rain , crops have begun to dry up.', 'He had many absences from work.', 'I expected to see her and was surprised by her absence.', 'He was conspicuous by his absence. ', 'She returned to the company after a long/prolonged absence.', "She'll be away for a month, but you know what they say—absence makes the heart grow fonder.", 'The study was completed in her absence.', "He was asked to speak in his broth

In [20]:
input_dict.update(absence_dict)
pprint.pprint(input_dict, width = 120)

{'ability': ['a young woman with many remarkable musical/artistic/athletic abilities',
             'a teacher with an ability to inspire his students',
             'Does he have the ability  to fire employees?',
             "a writer's ability  to interest readers",
             'a young woman of great musical/artistic/athletic ability',
             'She has shown some ability with foreign languages.',
             'He always works to the best of his ability. '],
 'absence': ['There was an absence ',
             'The products showed a remarkable absence of defects.',
             'In the absence of reform , progress was slow.',
             'With the (continued) absence of rain , crops have begun to dry up.',
             'He had many absences from work.',
             'I expected to see her and was surprised by her absence.',
             'He was conspicuous by his absence. ',
             'She returned to the company after a long/prolonged absence.',
             "She'll be away

In [13]:
prompt_type_list = ["Zero", "Cambridge", "Macmillan", "Mix"]

In [34]:
for key, value in input_dict.items():
  prompt = queryMacmillan(key, value)
  print(prompt)
  print("---")

Let us do it step by step to reach the best results.
Act as if you are an English teacher and translate the following sentences/phrases into traditional Mandarin Chinese (NOT simplified Chinese) for non-native English learners to learn how to understand and translate the word "ability" in a sentence.

Present the translations in a list format, providing both the original sentence/phrase, and the translation(exclude pinyin).
For example, if the word is "ugly", the output should resemble:

1. The other man was hideously ugly.
另一個男人奇醜無比。
2. She wont wear T-shirts because she thinks her arms are ugly.
她不願穿T恤，因為覺得手臂很難看。

Now, translate the following examples.
```
['a young woman with many remarkable musical/artistic/athletic abilities', 'a teacher with an ability to inspire his students', 'Does he have the ability  to fire employees?', "a writer's ability  to interest readers", 'a young woman of great musical/artistic/athletic ability', 'She has shown some ability with foreign languages.', 

In [14]:
# respond, duration = oneWordTranslate('u', input_dict['u'])

In [15]:
# respond

In [16]:
# duration

In [17]:
count = 0
tranlated_dict = {}

In [37]:
for key, value in input_dict.items():
  single_word_trans = {}
  for type in prompt_type_list:
    respond, duration = oneWordTranslate(key, value, type, "gpt-4-1106-preview")
    single_word_trans[type] = [respond, duration]
    # writeToFile(key, respond, duration)
    # count += 1
    # if count == 10:
    #   break
  tranlated_dict[key] = single_word_trans

In [38]:
copy = tranlated_dict.copy()

In [40]:
pprint.pprint(copy)

{'ability': {'Cambridge': ['1. a young woman with many remarkable '
                           'musical/artistic/athletic abilities\n'
                           '一位具有許多了不起的音樂／藝術／運動才能的年輕女性。\n'
                           '\n'
                           '2. a teacher with an ability to inspire his '
                           'students\n'
                           '一位有激勵學生能力的老師。\n'
                           '\n'
                           '3. Does he have the ability to fire employees?\n'
                           '他有解僱員工的能力嗎？\n'
                           '\n'
                           "4. a writer's ability to interest readers\n"
                           '作家吸引讀者興趣的能力。\n'
                           '\n'
                           '5. a young woman of great '
                           'musical/artistic/athletic ability\n'
                           '一位具有極高音樂／藝術／運動才華的年輕女性。\n'
                           '\n'
                           '6. She has shown some ability with foreign '
  

In [44]:
# for key, value in tranlated_dict.items():
#   print(key, value[1])
#   word = key + "2"
#   writeToFile(key + "2", value[0], value[1])

In [45]:
with open("tranlated_test.json", "w") as f:
  json.dump(tranlated_dict, f)

In [9]:
with open("tranlated.json", "r") as f:
  t_dict = json.load(f)

In [None]:
t_dict