## Install packages

- `kenlm`
- `datasets`
- `pycorrector`

```shell
pip install datasets
pip install kenlm
pip install -U pycorrector
```

In [1]:
# Colab
!pip install -U pycorrector

Collecting pycorrector
  Downloading pycorrector-1.0.0.tar.gz (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pypinyin (from pycorrector)
  Downloading pypinyin-0.49.0-py2.py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
Collecting loguru (from pycorrector)
  Downloading loguru-0.7.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting kenlm (from pycorrector)
  Downloading kenlm-0.2.0.tar.gz (427 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.4/427.4 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone

In [2]:
! pip install datasets

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets
Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6


## Start to correct

In [5]:
from pycorrector import Corrector
import pycorrector

class EmailHelper(Corrector):

    # translate traditional Chinese to simplified Chinese
    def tra2sim(self, content):
        try:
            sim_content = pycorrector.traditional2simplified(content.replace("\n", '').replace(" ", ''))
            return [sim_content]
        except:
            sim_content = [pycorrector.traditional2simplified(tra_content.replace("\n", '').replace(" ", '')) for tra_content in content]
            return sim_content

    # get the values which the key equals to "target"
    def get_target(self, correct_dict):
        if len(correct_dict) == 1:
            self.error = correct_dict[0]["errors"]
            return correct_dict[0]["target"]
        else:
            self.error = [f"第{i+1}篇錯誤：{j['errors']}" for i, j in enumerate(correct_dict)]
            return [i["target"] for i in correct_dict]


    # translate traditional Chinese to simplified Chinese
    def sim2tra(self, content):
        if type(content) == str:
            tra_content = pycorrector.simplified2traditional(content)
        else:
            tra_content = [pycorrector.simplified2traditional(sim_content) for sim_content in content]
        return tra_content

In [10]:
emailhelper = EmailHelper()

### 單篇文章
> input: str <br>
> output: str

In [28]:
mail = """
    我寫這封信是為了向您於10/7星期午請一天假，

    因為我當天要參加一位置友的喪禮。

    我確認過公司的行事例了，

    我想我可以在那天之前把交辦事項處李好。

    感蟹您的諒解。
    """

In [29]:
mail = emailhelper.tra2sim(mail)
mail = emailhelper.correct_batch(mail)
mail = emailhelper.get_target(mail)
mail = emailhelper.sim2tra(mail)
mail

'我寫這封信是為了向您於10/7星期五請一天假，因為我當天要參加一位摯友的喪禮。我確認過公司的行事例了，我想我可以在那天之前把交辦事項處理好。感謝您的諒解。'

In [30]:
# 檢查文章之錯誤
emailhelper.error

[('午请', '五请', 17), ('置友', '挚友', 33), ('李', '理', 67), ('感蟹', '感谢', 70)]

### 多篇文章
> input: list <br>
> output: list

In [31]:
mail_list = [
    """
    我寫這封信是為了向您於10/7星期五請一天假，

    因為我當天要參加一位摯友的喪禮。

    我確認過公司的行事曆了，

    我想我可以在那天之前把交辦事項處理好。

    感謝您的諒解。
    """,
    """
    我寫這封信是為了向您於10/7星期午請一天假，

    因為我當天要參加一位置友的喪禮。

    我確認過公司的行事例了，

    我想我可以在那天之前把交辦事項處李好。

    感蟹您的諒解。
    """
]

In [32]:
mail_list = emailhelper.tra2sim(mail_list)
mail_list = emailhelper.correct_batch(mail_list)
mail_list = emailhelper.get_target(mail_list)
mail_list = emailhelper.sim2tra(mail_list)
mail_list

['我寫這封信是為了向您於10/7星期五請一天假，因為我當天要參加一位摯友的喪禮。我確認過公司的行事曆了，我想我可以在那天之前把交辦事項處理好。感謝您的諒解。',
 '我寫這封信是為了向您於10/7星期五請一天假，因為我當天要參加一位摯友的喪禮。我確認過公司的行事例了，我想我可以在那天之前把交辦事項處理好。感謝您的諒解。']

In [33]:
# 檢查文章之錯誤
for i in emailhelper.error:
    print(i)

第1篇錯誤：[]
第2篇錯誤：[('午请', '五请', 17), ('置友', '挚友', 33), ('李', '理', 67), ('感蟹', '感谢', 70)]
