Translate to the following languages:
- en
- tl
- vi
- th
- zh

for zero-shot languages:
- bn
- fa
- gu
- mr
- pa
- no
- si
- sq
- ru
- az
- pt
- nl
- fr

In [1]:
target_lang = "zh"

In [2]:
from google.cloud import translate

BatchTranslateTextRequest = translate.BatchTranslateTextRequest
# Initialize Translation client
def translate_text(
    text: list[str] = ["YOUR_TEXT_TO_TRANSLATE"], project_id: str = "YOUR_PROJECT_ID", target_language_code: str = "tl"
) -> translate.TranslationServiceClient:
    """Translating Text."""

    client = translate.TranslationServiceClient()

    location = "global"

    parent = f"projects/{project_id}/locations/{location}"

    # Translate text from English to French
    # Detail on supported types can be found here:
    # https://cloud.google.com/translate/docs/supported-formats
    response = client.translate_text(
        request={
            "parent": parent,
            "contents": text,
            "mime_type": "text/plain",  # mime types: text/plain, text/html
            "target_language_code": target_language_code,
        }
    )

    return response

In [3]:
import pandas as pd

train_df = pd.read_csv("xlm_fakenews/train_augmented.tsv", sep="\t")

In [4]:
train_list = train_df["claim"].to_list()

In [5]:
train_list

['Son 50 y─▒l─▒n siyasi tarihine bak─▒n, tek ba┼Я─▒na iktidarlardaki T├╝rkiyeтАЩnin b├╝y├╝me oran─▒, koalisyon d├╢nemlerindekinin iki kat─▒d─▒r.',
 'сГФсГХсГасГЭсГЮсГгсГЪсГШ сГжсГШсГасГФсГСсГгсГЪсГФсГСсГФсГСсГШ - сГШсГвсГРсГЪсГШсГШсГб сГбсГРсГЫсГРсГисГХсГФсГЪсГЭ сГбсГРсГЫсГбсГРсГосГгсГасГФсГСсГб сГЫсГШсГТсГасГРсГЬсГвсГФсГСсГШсГб сГйсГРсГлсГШсГасГгсГЪсГШ сГТсГФсГЫсГФсГСсГШсГб сГУсГРсГосГЫсГРсГасГФсГСсГР сГРсГФсГЩсГасГл...',
 'Aqui em S├гo Paulo, n├│s estamos com 2,7 milh├╡es de pessoas vivendo com os tais R$ 600 que viraram R$ 300 [do aux├нlio emergencial]',
 'сГШсГасГРсГЩсГЪсГШ сГосГРсГосГгсГСсГШсГР, "сГЬсГРсГксГШсГЭсГЬсГРсГЪсГгсГасГШ сГЫсГЭсГлсГасГРсГЭсГСсГШсГб" сГбсГЮсГЭсГЬсГбсГЭсГасГШ, сГУсГжсГФсГб "сГЭсГксГЬсГФсГСсГРсГисГШсГР" сГТсГРсГУсГРсГСсГРсГасГТсГФсГСсГгсГЪсГШ',
 'Zumi Zola Jalan-Jalan Di Bandara Soekarno-Hatta',
 'No Twitter, o PSD escreve que a taxa de desemprego em Portugal est├б n......',
 'тАЮU prvim mesecima 2011. godine imamo odli─Нne pokazatelje izvoza poljoprivrednih

In [6]:
from torch.utils.data import DataLoader

In [7]:
batcher = DataLoader(train_list, batch_size=32, shuffle=False)

In [8]:
from tqdm import tqdm

augmented_texts = []
for batch in tqdm(batcher):
    translated_text = translate_text(batch, "trusty-moment-407614", target_lang)
    texts = [translation.translated_text for translation in translated_text.translations]
    augmented_texts.extend(texts)

  0%|          | 0/566 [00:00<?, ?it/s]

100%|тЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИ| 566/566 [16:06<00:00,  1.71s/it]


In [15]:
augmented_texts

['чЬЛчЬЛш┐ЗхО╗50х╣┤чЪДцФ┐ц▓╗хОЖхП▓я╝МхНХф╕АцФ┐х║ЬцЧ╢цЬЯчЪДхЬЯшА│хЕ╢ч╗Пц╡ОхвЮщХ┐чОЗцШпшБФхРИцФ┐х║ЬцЧ╢цЬЯчЪДф╕дхАНуАВ',
 'цмзц┤▓ф╗╖хА╝шзВтАФтАФцДПхдзхИйцХСцП┤цЬНхКбшвлчжБцнвх╕охКйц▓Йц▓бчЪДчз╗ц░СшИ╣хПктАжтАж',
 'хЬихЬгф┐Эч╜Чя╝МцИСф╗мцЬЙ 270 ф╕Зф║║чЪДчФЯц┤╗ш┤╣чФ▒ 600 щЫ╖ф║Ъх░ФхПШцИРф║Ж 300 щЫ╖ф║Ъх░Фя╝Ич┤зцАецП┤хКйя╝Й',
 'тАЬц░СцЧПш┐РхКитАЭхПСш╡╖ф║║ф╝КцЛЙхЕЛхИй┬╖хУИшГбцпФф║ЪхжВф╗КтАЬхЬицвжф╕нтАЭ',
 'чеЦч▒│┬╖ф╜РцЛЙ (Zumi Zola) хЬишЛПхКашп║-хУИш╛╛цЬ║хЬ║цХгцне',
 'PSD хЬицОичЙ╣ф╕КхЖЩщБУя╝МшСбшРДчЙЩчЪДхд▒ф╕ЪчОЗф╕║......',
 'тАЬ2011х╣┤хд┤хЗаф╕кцЬИя╝МхбЮх░Фч╗┤ф║ЪхЖЬф║зхУБхЗ║хПгцМЗцаЗщЭЮх╕╕хЗ║шЙ▓я╝МцпФхО╗х╣┤хвЮщХ┐ф║Ж40%уАВтАЭ',
 'цДПхдзхИйхТМцмзц┤▓хЬичз╗ц░Сф╕КшК▒ш┤╣ф║ЖхдЪх░С',
 'шзЖщвСтАЬх╛╖шТВхЕЛ┬╖х╛╖шТВхЕЛ┬╖щймщ▓Бхдл┬╖щШ┐цШОш┐ФхЫЮх╣╢чФ▒цЩоцЛЙхНЪц▓ГцОецЫ┐я╝МцЛЕф╗╗ф╜РчзСхиБщЩкхРМчЪДхЙпцА╗ч╗ЯтАЭ',
 'Alfamart цНРш╡а 6000 х╝аф╝ШцГахИ╕ф╗ех╕охКйцКЧхЗ╗ COVID-19',
 'шО▒шМ╡-хЖЕхНбщУБш╖пя╝Ъхп╗ц▒Вх║ЗцКдшАЕхЕНш┤╣хЗ║шбМ',
 'чд╛ф║дхкТф╜Уф╕Кц╡Бф╝аф╕Аф╗╜хЗ║чЙИчЙйя╝Мш░┤ш┤гшСбшРДчЙЩ......',
 'чЬЛхИ░тАЬщймшп║х░Фхдз

In [14]:
import pandas as pd

In [16]:
train_df.head()

Unnamed: 0,claim,label,augmented_en,augmented_tl,augmented_vi,augmented_th,augmented_zh
0,"Son 50 y─▒l─▒n siyasi tarihine bak─▒n, tek ba┼Я─▒na...",false,Look at the political history of the last 50 y...,Tingnan ang kasaysayang pampulitika sa nakalip...,"Nh├мn v├аo lс╗Лch sс╗н ch├нnh trс╗Л 50 n─Гm qua, tс╗Сc ─Сс╗Щ ...",р╕Фр╕╣р╕Ыр╕гр╕░р╕зр╕▒р╕Хр╕┤р╕ир╕▓р╕кр╕Хр╕гр╣Мр╕Бр╕▓р╕гр╣Ар╕бр╕╖р╕нр╕Зр╣Гр╕Щр╕Кр╣Ир╕зр╕З 50 р╕Ыр╕╡р╕Чр╕╡р╣Ир╕Ьр╣Ир╕▓р╕Щр╕бр╕▓ р╕н...,чЬЛчЬЛш┐ЗхО╗50х╣┤чЪДцФ┐ц▓╗хОЖхП▓я╝МхНХф╕АцФ┐х║ЬцЧ╢цЬЯчЪДхЬЯшА│хЕ╢ч╗Пц╡ОхвЮщХ┐чОЗцШпшБФхРИцФ┐х║ЬцЧ╢цЬЯчЪДф╕дхАНуАВ
1,сГФсГХсГасГЭсГЮсГгсГЪсГШ сГжсГШсГасГФсГСсГгсГЪсГФсГСсГФсГСсГШ - сГШсГвсГРсГЪсГШсГШсГб сГбсГРсГЫсГРсГисГХсГФсГЪсГЭ сГбсГРсГЫсГб...,partly true/misleading,European values тАЛтАЛ- Italian rescue services ar...,Mga halaga sa Europa - Ipinagbabawal ang mga s...,Gi├б trс╗Л ch├вu ├Вu - Dс╗Лch vс╗е cс╗йu hс╗Щ cс╗зa ├Э bс╗Л cс║еm ...,р╕Др╣Ир╕▓р╕Щр╕┤р╕вр╕бр╕вр╕╕р╣Вр╕гр╕Ы - р╕лр╣Йр╕▓р╕бр╣Др╕бр╣Ир╣Гр╕лр╣Йр╕лр╕Щр╣Ир╕зр╕вр╕Бр╕╣р╣Йр╕ар╕▒р╕вр╕Вр╕нр╕Зр╕нр╕┤р╕Хр╕▓р╕ер╕╡р╕К...,цмзц┤▓ф╗╖хА╝шзВтАФтАФцДПхдзхИйцХСцП┤цЬНхКбшвлчжБцнвх╕охКйц▓Йц▓бчЪДчз╗ц░СшИ╣хПктАжтАж
2,"Aqui em S├гo Paulo, n├│s estamos com 2,7 milh├╡es...",partly true/misleading,"Here in S├гo Paulo, we have 2.7 million people ...","Dito sa S├гo Paulo, mayroon kaming 2.7 milyong ...","Tс║бi S├гo Paulo, ch├║ng t├┤i c├│ 2,7 triс╗Зu ng╞░с╗Эi sс╗С...",р╕Чр╕╡р╣Ир╕Щр╕╡р╣Ир╣Гр╕Щр╣Ар╕Лр╕▓р╣Ар╕Ыр╕▓р╣Вр╕е р╣Ар╕гр╕▓р╕бр╕╡р╕Ьр╕╣р╣Йр╕Др╕Щ 2.7 р╕ер╣Йр╕▓р╕Щр╕Др╕Щр╕Чр╕╡р╣Ир╕нр╕▓р╕ир╕▒р╕в...,хЬихЬгф┐Эч╜Чя╝МцИСф╗мцЬЙ 270 ф╕Зф║║чЪДчФЯц┤╗ш┤╣чФ▒ 600 щЫ╖ф║Ъх░ФхПШцИРф║Ж 300 щЫ╖ф║Ъх░Фя╝Ич┤зцАецП┤хКйя╝Й
3,"сГШсГасГРсГЩсГЪсГШ сГосГРсГосГгсГСсГШсГР, ""сГЬсГРсГксГШсГЭсГЬсГРсГЪсГгсГасГШ сГЫсГЭсГлсГасГРсГЭсГСсГШсГб"" сГбсГЮсГЭсГЬсГбсГЭ...",true,"Irakli Khakhubia, the sponsor of the ""National...","Si Irakli Khakhubia, ang sponsor ng ""National ...","Irakli Khakhubia, ng╞░с╗Эi bс║гo trс╗г cho тАЬPhong tr├а...",р╕нр╕┤р╕гр╕▓р╕Др╕ер╕╡ р╕Др╕▓р╕Др╕╣р╣Ар╕Ър╕╡р╕в р╕Ьр╕╣р╣Йр╕кр╕Щр╕▒р╕Ър╕кр╕Щр╕╕р╕Щ тАЬр╕Вр╕Ър╕зр╕Щр╕Бр╕▓р╕гр╣Бр╕лр╣Ир╕Зр╕Кр╕▓р╕Хр╕┤тАЭ...,тАЬц░СцЧПш┐РхКитАЭхПСш╡╖ф║║ф╝КцЛЙхЕЛхИй┬╖хУИшГбцпФф║ЪхжВф╗КтАЬхЬицвжф╕нтАЭ
4,Zumi Zola Jalan-Jalan Di Bandara Soekarno-Hatta,false,Zumi Zola Takes a Walk at Soekarno-Hatta Airport,Naglalakad si Zumi Zola sa Soekarno-Hatta Airport,Zumi Zola ─Сi dс║бo tс║бi s├вn bay Soekarno-Hatta,Zumi Zola р╣Ар╕Фр╕┤р╕Щр╣Ар╕ер╣Ир╕Щр╕Чр╕╡р╣Ир╕кр╕Щр╕▓р╕бр╕Ър╕┤р╕Щ Soekarno-Hatta,чеЦч▒│┬╖ф╜РцЛЙ (Zumi Zola) хЬишЛПхКашп║-хУИш╛╛цЬ║хЬ║цХгцне


In [17]:
train_df['augmented_'+target_lang] = augmented_texts

In [18]:
train_df.head()

Unnamed: 0,claim,label,augmented_en,augmented_tl,augmented_vi,augmented_th,augmented_zh
0,"Son 50 y─▒l─▒n siyasi tarihine bak─▒n, tek ba┼Я─▒na...",false,Look at the political history of the last 50 y...,Tingnan ang kasaysayang pampulitika sa nakalip...,"Nh├мn v├аo lс╗Лch sс╗н ch├нnh trс╗Л 50 n─Гm qua, tс╗Сc ─Сс╗Щ ...",р╕Фр╕╣р╕Ыр╕гр╕░р╕зр╕▒р╕Хр╕┤р╕ир╕▓р╕кр╕Хр╕гр╣Мр╕Бр╕▓р╕гр╣Ар╕бр╕╖р╕нр╕Зр╣Гр╕Щр╕Кр╣Ир╕зр╕З 50 р╕Ыр╕╡р╕Чр╕╡р╣Ир╕Ьр╣Ир╕▓р╕Щр╕бр╕▓ р╕н...,чЬЛчЬЛш┐ЗхО╗50х╣┤чЪДцФ┐ц▓╗хОЖхП▓я╝МхНХф╕АцФ┐х║ЬцЧ╢цЬЯчЪДхЬЯшА│хЕ╢ч╗Пц╡ОхвЮщХ┐чОЗцШпшБФхРИцФ┐х║ЬцЧ╢цЬЯчЪДф╕дхАНуАВ
1,сГФсГХсГасГЭсГЮсГгсГЪсГШ сГжсГШсГасГФсГСсГгсГЪсГФсГСсГФсГСсГШ - сГШсГвсГРсГЪсГШсГШсГб сГбсГРсГЫсГРсГисГХсГФсГЪсГЭ сГбсГРсГЫсГб...,partly true/misleading,European values тАЛтАЛ- Italian rescue services ar...,Mga halaga sa Europa - Ipinagbabawal ang mga s...,Gi├б trс╗Л ch├вu ├Вu - Dс╗Лch vс╗е cс╗йu hс╗Щ cс╗зa ├Э bс╗Л cс║еm ...,р╕Др╣Ир╕▓р╕Щр╕┤р╕вр╕бр╕вр╕╕р╣Вр╕гр╕Ы - р╕лр╣Йр╕▓р╕бр╣Др╕бр╣Ир╣Гр╕лр╣Йр╕лр╕Щр╣Ир╕зр╕вр╕Бр╕╣р╣Йр╕ар╕▒р╕вр╕Вр╕нр╕Зр╕нр╕┤р╕Хр╕▓р╕ер╕╡р╕К...,цмзц┤▓ф╗╖хА╝шзВтАФтАФцДПхдзхИйцХСцП┤цЬНхКбшвлчжБцнвх╕охКйц▓Йц▓бчЪДчз╗ц░СшИ╣хПктАжтАж
2,"Aqui em S├гo Paulo, n├│s estamos com 2,7 milh├╡es...",partly true/misleading,"Here in S├гo Paulo, we have 2.7 million people ...","Dito sa S├гo Paulo, mayroon kaming 2.7 milyong ...","Tс║бi S├гo Paulo, ch├║ng t├┤i c├│ 2,7 triс╗Зu ng╞░с╗Эi sс╗С...",р╕Чр╕╡р╣Ир╕Щр╕╡р╣Ир╣Гр╕Щр╣Ар╕Лр╕▓р╣Ар╕Ыр╕▓р╣Вр╕е р╣Ар╕гр╕▓р╕бр╕╡р╕Ьр╕╣р╣Йр╕Др╕Щ 2.7 р╕ер╣Йр╕▓р╕Щр╕Др╕Щр╕Чр╕╡р╣Ир╕нр╕▓р╕ир╕▒р╕в...,хЬихЬгф┐Эч╜Чя╝МцИСф╗мцЬЙ 270 ф╕Зф║║чЪДчФЯц┤╗ш┤╣чФ▒ 600 щЫ╖ф║Ъх░ФхПШцИРф║Ж 300 щЫ╖ф║Ъх░Фя╝Ич┤зцАецП┤хКйя╝Й
3,"сГШсГасГРсГЩсГЪсГШ сГосГРсГосГгсГСсГШсГР, ""сГЬсГРсГксГШсГЭсГЬсГРсГЪсГгсГасГШ сГЫсГЭсГлсГасГРсГЭсГСсГШсГб"" сГбсГЮсГЭсГЬсГбсГЭ...",true,"Irakli Khakhubia, the sponsor of the ""National...","Si Irakli Khakhubia, ang sponsor ng ""National ...","Irakli Khakhubia, ng╞░с╗Эi bс║гo trс╗г cho тАЬPhong tr├а...",р╕нр╕┤р╕гр╕▓р╕Др╕ер╕╡ р╕Др╕▓р╕Др╕╣р╣Ар╕Ър╕╡р╕в р╕Ьр╕╣р╣Йр╕кр╕Щр╕▒р╕Ър╕кр╕Щр╕╕р╕Щ тАЬр╕Вр╕Ър╕зр╕Щр╕Бр╕▓р╕гр╣Бр╕лр╣Ир╕Зр╕Кр╕▓р╕Хр╕┤тАЭ...,тАЬц░СцЧПш┐РхКитАЭхПСш╡╖ф║║ф╝КцЛЙхЕЛхИй┬╖хУИшГбцпФф║ЪхжВф╗КтАЬхЬицвжф╕нтАЭ
4,Zumi Zola Jalan-Jalan Di Bandara Soekarno-Hatta,false,Zumi Zola Takes a Walk at Soekarno-Hatta Airport,Naglalakad si Zumi Zola sa Soekarno-Hatta Airport,Zumi Zola ─Сi dс║бo tс║бi s├вn bay Soekarno-Hatta,Zumi Zola р╣Ар╕Фр╕┤р╕Щр╣Ар╕ер╣Ир╕Щр╕Чр╕╡р╣Ир╕кр╕Щр╕▓р╕бр╕Ър╕┤р╕Щ Soekarno-Hatta,чеЦч▒│┬╖ф╜РцЛЙ (Zumi Zola) хЬишЛПхКашп║-хУИш╛╛цЬ║хЬ║цХгцне


In [13]:
train_df.to_csv("xlm_fakenews/train_augmented.tsv", sep="\t", index=False)

In [1]:
import pandas as pd

train_df = pd.read_csv("xlm_fakenews/train_augmented.tsv", sep="\t")