# Localized Translation

In [None]:
from transformers import MarianMTModel, MarianTokenizer

def translate_text(text, model_name):
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    outputs = model.generate(**inputs)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation

# Example usage
chinese_model = '/Users/bytedance/Desktop/personal/nus/CS5246/projects/provide_support/models/opus-mt-en-zh/' # "Helsinki-NLP/opus-mt-en-zh"
malay_model = '/Users/bytedance/Desktop/personal/nus/CS5246/projects/provide_support/models/opus-mt-zh-ms' # "Helsinki-NLP/opus-mt-zh-ms"

# summary_text = "Singapore's Prime Minister announced new climate policies."
summary_text = "Residents in Singapore can expect warm weather accompanied by short thundery showers, primarily during afternoons and occasionally extending into evenings, until the end of April. The Meteorological Service Singapore forecasts daily maximum temperatures between 33°C and 34°C, occasionally reaching 35°C. Sumatra squalls might cause widespread thundery showers and gusty winds during early mornings on certain days. Notably, heavy rainfall occurred in early April, especially around Yishun. Despite frequent rain, temperatures remained high, with Paya Lebar hitting 36.2°C on April 12. Rainfall varied significantly across locations, with Yio Chu Kang experiencing above-average rainfall."
mandarin_translation = translate_text(summary_text, chinese_model)
malay_translation = translate_text(mandarin_translation, malay_model)

print(mandarin_translation)
print(malay_translation)

# Singlish Term Mapping

In [None]:
import spacy
# Load spaCy NER model
try:
    nlp = spacy.load("en_core_web_sm")
except:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# Example sentence
sentence = "The weather today is very hot."

# Rule-based Singlish mapping
singlish_dict = {
    "weather": "weather",
    "hot": "sibei hot",
    "very": "sibei",
    "tired": "sian",
    "delicious": "shiok",
    "expensive": "atas",
}

def ner_singlish(sentence):
    doc = nlp(sentence)
    singlish_sentence = sentence
    for token in doc:
        if token.lemma_.lower() in singlish_dict:
            singlish_sentence = singlish_sentence.replace(token.text, singlish_dict[token.lemma_.lower()])
    return singlish_sentence

# Usage example
singlish_output = ner_singlish(sentence)
print(singlish_output)

# Singlish LLM Rephrasing

In [None]:
from transformers import pipeline

model_name = "/Users/bytedance/Desktop/personal/nus/CS5246/projects/provide_support/models/danube2-singlish-finetuned"

sentences = [
    "Residents in Singapore can expect hot weather accompanied by short thundery showers, primarily during afternoons and occasionally extending into evenings, until the end of April.",
    "The Meteorological Service Singapore forecasts daily maximum temperatures between 33°C and 34°C, occasionally reaching 35°C.",
    "Sumatra squalls might cause widespread thundery showers and gusty winds during early mornings on certain days.",
    "Notably, heavy rainfall occurred in early April, especially around Yishun. Despite frequent rain, temperatures remained high, with Paya Lebar hitting 36.2°C on April 12.",
    "Rainfall varied significantly across locations, with Yio Chu Kang experiencing above-average rainfall.",
]

generate_text = pipeline(
    model=model_name,
    torch_dtype="auto",
    trust_remote_code=True,
    use_fast=True,
    token=True,
)

for sentence in sentences:
    prompt = "Translate the following to Singlish: " + sentence + '\n'
    res = generate_text(
        prompt,
        min_new_tokens=2,
        max_new_tokens=256,
        do_sample=True,
        num_beams=3,
        temperature=float(0.8),
        repetition_penalty=float(1.0),
        renormalize_logits=True
    )
    print(res[0]["generated_text"])