In [1]:
import torch
from transformers import BartTokenizer
from IPython.display import display

from model import Transformer, TransformerConfig
from utils import translate

In [2]:
lit_ckpt_path = "lightning_logs/version_3/checkpoints/epoch=9-step=154625.ckpt"
tokenizer_path = "pretrained_tokenizers/bart_bpe_opus_en_id_30000"

# load model from pytorch lightning ckpt
ckpt = torch.load(lit_ckpt_path, map_location="cpu")
model_state_dict = {k[6:]: v for k, v in ckpt["state_dict"].items()}  # omit "model."
transformer_cfg = TransformerConfig(
    n_encoders=4,
    n_decoders=4,
    vocab_sz=30000,
    emb_sz=512,
    ff_sz=512 * 4,
    n_heads=8,
    head_sz=512 // 8,
    pdrop=0.1,
)
model = Transformer(transformer_cfg).cuda().eval()
model.load_state_dict(model_state_dict)

# load tokenizer
tokenizer = BartTokenizer.from_pretrained(tokenizer_path)

In [3]:
# comment out to start with fresh seed
torch.manual_seed(0)

<torch._C.Generator at 0x7fc0bb612cd0>

In [4]:
# I use chatgpt to generate samples
SIMPLE_TEXTS = [
    "The cat sat on the mat.",
    "I have a red ball.",
    "The sun is shining.",
    "Birds can fly in the sky.",
    "Mom bakes yummy cookies.",
    "My teddy bear is so cuddly.",
    "The frog jumped into the pond.",
    "We plant seeds in the soil.",
    "The big tree gives us shade.",
    "Let's read a fun story together.",
]

# temperature (t) settings:
# < 1  =  more accurate
# > 1  =  more creative (kinda dumb tbh)

# see on simple texts
display(SIMPLE_TEXTS)
display(translate(model, tokenizer, max_gen_length=128, text=SIMPLE_TEXTS, t=0.5))
display(translate(model, tokenizer, max_gen_length=128, text=SIMPLE_TEXTS, t=1.2))

['The cat sat on the mat.',
 'I have a red ball.',
 'The sun is shining.',
 'Birds can fly in the sky.',
 'Mom bakes yummy cookies.',
 'My teddy bear is so cuddly.',
 'The frog jumped into the pond.',
 'We plant seeds in the soil.',
 'The big tree gives us shade.',
 "Let's read a fun story together."]

['Kucing duduk di matt.',
 'Aku punya bola merah.',
 'Matahari bersinar.',
 'Burung bisa terbang di langit.',
 'Ibu menyertakan kue.',
 'Beruang membosankanku begitu ceria.',
 'Si katak melompat ke dalam kolam.',
 'Kita memasang daging buah di tanah.',
 'Pohon besar memberi kami tempat yang bagus.',
 'Mari kita membaca cerita yang menyenangkan bersama-sama.']

['Kucing hantu sepuluh untuk bertambah.',
 'Aku punya memungkinkanBer keluargamuwa merah',
 'Bau matahariangannya bersinar.',
 ' Tapi -- M OrangPangeran bisa terbangke langit ini. berkeliling bintang...!',
 'Ibuotan akan mencengadah kusyuk.',
 'uo beruangSkydyAr vaksinku C mencongk Araknya.',
 ' apapan kali pembalasan bergerak keberteriak.',
 "Kita memasang 'E terluka' di tanah.",
 ' roti terbesar kiamat.',
 'Mari malaikat berb at memperingatkan bersama-sama.']

In [5]:
# I use chatgpt to generate samples
HARD_TEXTS = [
    "The phenomenon of quantum entanglement challenges classical notions of reality.",
    "The economic implications of globalization have sparked widespread debate.",
    "Shakespeare's intricate use of language adds layers of meaning to his plays.",
    "The principles of genetic engineering have revolutionized the field of biotechnology.",
    "The theory of relativity reshaped our understanding of space, time, and gravity.",
    "The ethical dilemma of artificial intelligence centers around its potential autonomy.",
    "Postmodern literature often blurs the line between fiction and reality.",
    "Inflation targeting has become a popular strategy for central banks in recent years.",
    "The psychological effects of childhood trauma can have far-reaching consequences in adulthood.",
    "Environmental sustainability requires a collective effort to mitigate climate change."
]

# temperature (t) settings:
# < 1  =  more accurate
# > 1  =  more creative (kinda dumb tbh)

# see on simple texts
display(HARD_TEXTS)
display(translate(model, tokenizer, max_gen_length=128, text=HARD_TEXTS, t=0.5))
display(translate(model, tokenizer, max_gen_length=128, text=HARD_TEXTS, t=1.2))

['The phenomenon of quantum entanglement challenges classical notions of reality.',
 'The economic implications of globalization have sparked widespread debate.',
 "Shakespeare's intricate use of language adds layers of meaning to his plays.",
 'The principles of genetic engineering have revolutionized the field of biotechnology.',
 'The theory of relativity reshaped our understanding of space, time, and gravity.',
 'The ethical dilemma of artificial intelligence centers around its potential autonomy.',
 'Postmodern literature often blurs the line between fiction and reality.',
 'Inflation targeting has become a popular strategy for central banks in recent years.',
 'The psychological effects of childhood trauma can have far-reaching consequences in adulthood.',
 'Environmental sustainability requires a collective effort to mitigate climate change.']

['Format dari-lipadil tantangan tidak ada kenyataan.',
 'Nomer ekonomi impres manis telah meluap wasi wetis.',
 'Sandi She menggunakan bahasa-fungsi bahasa artinya untuk permainan.',
 'Produkasi dari bahan kimia telah melakukan... dengan efek biadologi.',
 'Teori dari hubungan kami dengan beberapa masa lalu, dan gravitasi.',
 'Menurut penyebaran intelijen yang dapat dieksporasi di sekeliling potensial.',
 'Baris-baris sering untuk menghapus batas antara fBahan dan kenyataan.',
 'Dalam beberapa tahun yang lalu, pengejaran yang menjadi strategi populer bagi bank pusat dalam beberapa tahun ini.',
 'E.L... Dm. S.I.D.D.D. L.A.A.R.D. L.S. M. T.P.S. Al-Q.S.S.',
 'Enval tetap membutuhkan upaya untuk memperbesar perubahan perubahan bahkan perubahan.']

['Fefon sekali Van hidup UnitedLihatlah dulu dari tidak cahaya sifat kenyataan.',
 ' Bisa tolong kemari. mencoba mer makanan mer keluar perintahkan kami',
 ' menjelaskan hasil bersatu kuliah yang penuh arti untuk permainan melawan dua semua dapatkan.',
 'PrYAahan haveerualIest dibawah sebuahTuan Dalam kemasibMasih punya kIS saat tersebut',
 'T detektifZi menulisnya dari giliran papan keluar hidup kita, waktu, dan gravitasi.',
 'kita semua mendapat kupu bekerja Benarkah, Sama dengan kel Apamu?',
 ' macam-macam bertuangkan garis antara berp tebak dan Arukan.',
 'diaApalagi mendSayang menjadi strategi inti mohon gue TV selama keimanan ini',
 'A Node kehidupan awal bisa menyebabkanKalau sayang kehilangan Ad mulai berk./ M 10 detik.',
 ' sepanjang masalahmu Dia butuh rencana mey sel gagal,']