In [1]:
import torch
import torch.nn as nn
from torchvision.utils import save_image

torch.manual_seed(0)

# --- Encoder: Text → Latent h ---
class Encoder(nn.Module):
    def __init__(self, vocab_size, embed_dim=256, latent_dim=512, n_layers=4, n_heads=8):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=n_heads)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        self.to_latent = nn.Linear(embed_dim, latent_dim)

    def forward(self, input_ids):
        # input_ids: [seq_len]
        x = self.embed(input_ids).unsqueeze(1)      # [seq,1,embed]
        x = self.transformer(x).squeeze(1)          # [seq,embed]
        h_feat = x.mean(dim=0)                      # [embed]
        h = self.to_latent(h_feat)                  # [latent]
        return h

# --- Stylizer: Latent h → Abstract PNG Image ---
class Stylizer(nn.Module):
    def __init__(self, latent_dim=512, img_size=64):
        super().__init__()
        self.fc = nn.Linear(latent_dim, 3 * img_size * img_size)
        self.img_size = img_size

    def forward(self, h):
        img_flat = self.fc(h)                       # [3*H*W]
        img = img_flat.view(3, self.img_size, self.img_size)
        return torch.sigmoid(img)

# --- Tokenizer Stub ---
def simple_tokenize(text, vocab_size=10000):
    return torch.tensor([ord(c) % vocab_size for c in text], dtype=torch.long)

# --- End-to-End Example ---
if __name__ == '__main__':
    text = "HELLO CUBIST CIPHER"
    input_ids = simple_tokenize(text)

    # Instantiate modules
    encoder = Encoder(vocab_size=10000)
    stylizer = Stylizer(latent_dim=512, img_size=64)

    # Forward pass: Text → h → Image
    h = encoder(input_ids)
    img = stylizer(h)                           # Tensor shape: [3,64,64]

    # Save image
    save_image(img, 'cipher_output.png')
    print(f"Encrypted image saved as cipher_output.png")

    # h is the secret byproduct
    print(f"Secret h vector (first 5 dims): {h[:5].tolist()}")

Encrypted image saved as cipher_output.png
Secret h vector (first 5 dims): [0.00852050632238388, -0.05593032389879227, 0.34271544218063354, 0.1163710206747055, -0.023216672241687775]




In [None]:
import torch
import torch.nn as nn
from torchvision.utils import save_image

# --- Decoder: Latent h → Recovered Text ---
class Decoder(nn.Module):
    def __init__(self, latent_dim=512, embed_dim=256, vocab_size=10000, n_layers=4, n_heads=8):
        super().__init__()
        self.fc = nn.Linear(latent_dim, embed_dim)
        decoder_layer = nn.TransformerDecoderLayer(d_model=embed_dim, nhead=n_heads)
        self.transformer = nn.TransformerDecoder(decoder_layer, num_layers=n_layers)
        self.to_logits = nn.Linear(embed_dim, vocab_size)

    def forward(self, h, seq_len):
        # h: [latent_dim], seq_len: 원하는 복원 길이
        # 1) Latent → 임시 메모리 Representation
        memory = self.fc(h).unsqueeze(0).unsqueeze(1)  # [1,1,embed_dim]
        # 2) 빈 타깃 시퀀스 생성
        tgt = torch.zeros(seq_len, 1, memory.size(-1))
        # 3) Transformer Decoder 통과
        out = self.transformer(tgt, memory).squeeze(1)  # [seq_len, embed_dim]
        # 4) 어휘 토큰 로짓으로 변환
        logits = self.to_logits(out)                     # [seq_len, vocab_size]
        return logits

# --- 통합 예제 (Encoder→Stylizer→Decoder) ---
if __name__ == '__main__':
    from torchvision.utils import save_image

    # 1) Text → Token IDs
    text = "HELLO CUBIST CIPHER"
    input_ids = simple_tokenize(text)

    # 2) 모듈 인스턴스화
    encoder = Encoder(vocab_size=10000)
    stylizer = Stylizer(latent_dim=512, img_size=64)
    decoder = Decoder(latent_dim=512, embed_dim=256, vocab_size=10000)

    # 3) 암호화: Text → h → Image
    h = encoder(input_ids)
    img = stylizer(h)
    save_image(img, 'cipher_output.png')
    print("🔒 암호문 이미지: cipher_output.png 생성 완료")

    # 4) 복호화: h + 모델 → Text
    logits = decoder(h, seq_len=len(input_ids))
    pred_ids = logits.argmax(dim=-1)
    recovered = ''.join(chr(int(pid) % 128) for pid in pred_ids)
    print("🔓 복호 결과 (근사):", recovered)


🔒 암호문 이미지: cipher_output.png 생성 완료
🔓 복호 결과 (근사): fV6VVVVmVVVVVVmmVi


In [11]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import save_image

# 고정 시드
torch.manual_seed(0)

# --- Simple Tokenizer ---
def simple_tokenize(text, vocab_size=128, max_len=16):
    ids = [ord(c) % vocab_size for c in text][:max_len]
    ids += [0] * (max_len - len(ids))
    print(f"[Tokenizer] text='{text}' -> ids={ids}")
    return torch.tensor(ids, dtype=torch.long)

# --- Encoder: Text Batch -> Latent Batch ---
class Encoder(nn.Module):
    def __init__(self, vocab_size=128, embed_dim=256, latent_dim=512, n_layers=4, n_heads=8):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=n_heads)
        self.transformer = nn.TransformerEncoder(layer, num_layers=n_layers)
        self.to_latent = nn.Linear(embed_dim, latent_dim)

    def forward(self, input_ids):
        # input_ids: [batch, seq_len]
        x = self.embed(input_ids)                # [batch, seq_len, embed_dim]
        print(f"[Encoder] embed output shape: {x.shape}")
        x = x.permute(1, 0, 2)                    # [seq_len, batch, embed_dim]
        x = self.transformer(x)                  # [seq_len, batch, embed_dim]
        h_feat = x.mean(dim=0)                   # [batch, embed_dim]
        h = self.to_latent(h_feat)               # [batch, latent_dim]
        print(f"[Encoder] latent h shape: {h.shape}, sample h[0][:5]={h[0,:5].tolist()}")
        return h

# --- Noise & Decoy ---
def obfuscate(h, sigma=1.0, decoy_dim=0):
    noise = torch.randn_like(h) * sigma
    h_noisy = h + noise
    print(f"[Obfuscate] noise mean={noise.mean().item():.4f}, std={noise.std().item():.4f}")
    if decoy_dim > 0:
        batch = h_noisy.size(0)
        d = torch.randn(batch, decoy_dim)
        h_mix = torch.cat([h_noisy, d], dim=1)
        print(f"[Obfuscate] added decoy of dim {decoy_dim}, h_mix shape: {h_mix.shape}")
        return h_mix
    return h_noisy

# --- Stylizer: Latent Batch -> Image Batch ---
class Stylizer(nn.Module):
    def __init__(self, latent_dim=512, img_size=64):
        super().__init__()
        self.fc = nn.Linear(latent_dim, 3 * img_size * img_size)
        self.img_size = img_size

    def forward(self, h):
        img_flat = self.fc(h)                    # [batch, 3*H*W]
        b, _ = img_flat.shape
        img = img_flat.view(b, 3, self.img_size, self.img_size)
        img = torch.sigmoid(img)
        print(f"[Stylizer] generated image batch shape: {img.shape}")
        return img

# --- Decoder: Latent Batch -> Logits Batch ---
class Decoder(nn.Module):
    def __init__(self, latent_dim=512, embed_dim=256, vocab_size=128, n_layers=4, n_heads=8):
        super().__init__()
        self.fc = nn.Linear(latent_dim, embed_dim)
        layer = nn.TransformerDecoderLayer(d_model=embed_dim, nhead=n_heads)
        self.transformer = nn.TransformerDecoder(layer, num_layers=n_layers)
        self.to_logits = nn.Linear(embed_dim, vocab_size)

    def forward(self, h, seq_len):
        b = h.size(0)
        memory = self.fc(h).unsqueeze(0)         # [1, batch, embed_dim]
        tgt = torch.zeros(seq_len, b, memory.size(-1))
        out = self.transformer(tgt, memory)      # [seq_len, batch, embed_dim]
        out = out.permute(1, 0, 2)               # [batch, seq_len, embed_dim]
        logits = self.to_logits(out)             # [batch, seq_len, vocab]
        print(f"[Decoder] logits shape: {logits.shape}")
        return logits

# --- Dataset for Autoencoder Training ---
class TextDataset(Dataset):
    def __init__(self, texts, max_len=16):
        self.texts = texts
        self.max_len = max_len
    def __len__(self): return len(self.texts)
    def __getitem__(self, idx):
        ids = simple_tokenize(self.texts[idx], max_len=self.max_len)
        return ids, ids

# --- Main Training & Inference ---
def main():
    # Settings
    vocab_size = 128
    latent_dim = 512
    seq_len = 16
    batch_size = 32
    epochs = 20
    sigma = 1.5

    print(f"[Config] vocab_size={vocab_size}, latent_dim={latent_dim}, seq_len={seq_len}, batch_size={batch_size}, epochs={epochs}, sigma={sigma}")

    # Data
    texts = ["HELLO CUBIST", "NOISE DECOY", "AI ENCRYPTION"] * 200
    dataset = TextDataset(texts, max_len=seq_len)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    print(f"[Data] Loaded {len(dataset)} examples, {len(dataloader)} batches per epoch")

    # Model
    encoder = Encoder(vocab_size=vocab_size, latent_dim=latent_dim)
    stylizer = Stylizer(latent_dim=latent_dim, img_size=64)
    decoder = Decoder(latent_dim=latent_dim, embed_dim=256, vocab_size=vocab_size)

    # Optimizer & Loss
    optim = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=1e-3)
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    # Training
    for ep in range(epochs):
        total_loss = 0
        for i, (inp, tgt) in enumerate(dataloader):
            optim.zero_grad()
            h = encoder(inp)
            logits = decoder(h, seq_len)
            loss = criterion(logits.view(-1, vocab_size), tgt.view(-1))
            loss.backward()
            optim.step()
            total_loss += loss.item()
            if i % 10 == 0:
                print(f"[Train] Ep{ep+1} Batch{i}/{len(dataloader)}, Loss={loss.item():.4f}")
        print(f"[Train] Epoch {ep+1} Average Loss: {total_loss/len(dataloader):.4f}")

    # Inference
    test_text = "SECURE TEST"
    print(f"[Inference] test_text='{test_text}'")
    ids = simple_tokenize(test_text, vocab_size=vocab_size, max_len=seq_len).unsqueeze(0)
    h = encoder(ids)
    h_mix = obfuscate(h, sigma=sigma, decoy_dim=0)
    img = stylizer(h_mix)
    save_image(img, 'cipher_final.png')
    print("[Inference] Encrypted image saved: cipher_final.png")

    logits = decoder(h, seq_len=len(test_text))
    pred = logits.argmax(-1)[0]
    recovered = ''.join(chr(int(x) % 128) for x in pred)
    print(f"[Inference] Decrypted after training: '{recovered}'")

if __name__ == '__main__':
    main()

[Config] vocab_size=128, latent_dim=512, seq_len=16, batch_size=32, epochs=20, sigma=1.5
[Data] Loaded 600 examples, 19 batches per epoch
[Tokenizer] text='NOISE DECOY' -> ids=[78, 79, 73, 83, 69, 32, 68, 69, 67, 79, 89, 0, 0, 0, 0, 0]
[Tokenizer] text='AI ENCRYPTION' -> ids=[65, 73, 32, 69, 78, 67, 82, 89, 80, 84, 73, 79, 78, 0, 0, 0]
[Tokenizer] text='NOISE DECOY' -> ids=[78, 79, 73, 83, 69, 32, 68, 69, 67, 79, 89, 0, 0, 0, 0, 0]
[Tokenizer] text='AI ENCRYPTION' -> ids=[65, 73, 32, 69, 78, 67, 82, 89, 80, 84, 73, 79, 78, 0, 0, 0]
[Tokenizer] text='HELLO CUBIST' -> ids=[72, 69, 76, 76, 79, 32, 67, 85, 66, 73, 83, 84, 0, 0, 0, 0]
[Tokenizer] text='NOISE DECOY' -> ids=[78, 79, 73, 83, 69, 32, 68, 69, 67, 79, 89, 0, 0, 0, 0, 0]
[Tokenizer] text='NOISE DECOY' -> ids=[78, 79, 73, 83, 69, 32, 68, 69, 67, 79, 89, 0, 0, 0, 0, 0]
[Tokenizer] text='AI ENCRYPTION' -> ids=[65, 73, 32, 69, 78, 67, 82, 89, 80, 84, 73, 79, 78, 0, 0, 0]
[Tokenizer] text='NOISE DECOY' -> ids=[78, 79, 73, 83, 69, 32, 6

In [12]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

# --- Character-Level Tokenizer ---
class CharTokenizer:
    def __init__(self):
        self.chars = [chr(i) for i in range(32, 127)]  # printable ASCII
        self.stoi = {c:i for i,c in enumerate(self.chars, start=1)}
        self.itos = {i:c for c,i in self.stoi.items()}
        self.vocab_size = len(self.stoi) + 1  # +1 for pad
    def encode(self, text, max_len=32):
        ids = [self.stoi.get(c, 0) for c in text][:max_len]
        ids += [0]*(max_len-len(ids))
        return torch.tensor(ids, dtype=torch.long)
    def decode(self, ids):
        return ''.join(self.itos.get(i, '') for i in ids if i>0)

# --- Simple Autoencoder ---
class SimpleAE(nn.Module):
    def __init__(self, vocab_size, embed_dim=64, hidden_dim=128, seq_len=32):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.encoder = nn.GRU(embed_dim, hidden_dim, batch_first=True)
        self.decoder = nn.GRU(embed_dim, hidden_dim, batch_first=True)
        self.out = nn.Linear(hidden_dim, vocab_size)
        self.seq_len = seq_len
    def forward(self, x):
        # x: [batch, seq_len]
        emb = self.embed(x)
        _, h = self.encoder(emb)
        # Prepare decoder input: start tokens (zeros)
        dec_in = torch.zeros_like(x)
        emb_dec = self.embed(dec_in)
        out, _ = self.decoder(emb_dec, h)
        logits = self.out(out)  # [batch, seq_len, vocab]
        return logits

# --- Dataset ---
class TextDataset(Dataset):
    def __init__(self, texts, tokenizer, max_len):
        self.texts = texts
        self.tok = tokenizer
        self.max_len = max_len
    def __len__(self): return len(self.texts)
    def __getitem__(self, i):
        ids = self.tok.encode(self.texts[i], self.max_len)
        return ids, ids

# --- Run Prototype ---
def run_simple_ae():
    # Prepare
    tok = CharTokenizer()
    texts = ["HELLO WORLD", "TEST STRING", "CUBIST CIPHER"] * 100
    ds = TextDataset(texts, tok, max_len=32)
    dl = DataLoader(ds, batch_size=16, shuffle=True)
    model = SimpleAE(tok.vocab_size, seq_len=32)
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    crit = nn.CrossEntropyLoss(ignore_index=0)

    # Train
    for epoch in range(10):
        total=0
        for inp, tgt in dl:
            opt.zero_grad()
            logits = model(inp)
            loss = crit(logits.view(-1, tok.vocab_size), tgt.view(-1))
            loss.backward(); opt.step()
            total+=loss.item()
        print(f"Epoch {epoch+1}, Loss: {total/len(dl):.4f}")

    # Test
    sample = "HELLO CIPHER"
    ids = tok.encode(sample, 32).unsqueeze(0)
    with torch.no_grad():
        logits = model(ids)
        pred = logits.argmax(-1)[0]
        print("Decoded:", tok.decode(pred.tolist()))

if __name__ == '__main__':
    run_simple_ae()

Epoch 1, Loss: 4.2519
Epoch 2, Loss: 2.6721
Epoch 3, Loss: 2.4578
Epoch 4, Loss: 2.2895
Epoch 5, Loss: 2.1112
Epoch 6, Loss: 1.9121
Epoch 7, Loss: 1.7144
Epoch 8, Loss: 1.5108
Epoch 9, Loss: 1.2942
Epoch 10, Loss: 1.0680
Decoded: HELLT  ORIDERRRRRRRRRRRRRRRRRRRR


In [38]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import save_image

torch.manual_seed(0)

# --- Char Tokenizer ---
class CharTokenizer:
    def __init__(self, max_len=32):
        self.chars = [chr(i) for i in range(32, 127)]
        self.stoi = {c: i for i, c in enumerate(self.chars, start=1)}  # pad=0
        self.itos = {i: c for c, i in self.stoi.items()}
        self.vocab_size = len(self.stoi) + 1
        self.max_len = max_len

    def encode(self, text):
        ids = [self.stoi.get(c, 0) for c in text][: self.max_len]
        ids += [0] * (self.max_len - len(ids))
        return torch.tensor(ids, dtype=torch.long)

    def decode(self, ids):
        return ''.join(self.itos.get(int(i), '') for i in ids).strip()

# --- Dummy Dataset ---
class TextImageDataset(Dataset):
    def __init__(self, texts, tokenizer):
        self.tokenizer = tokenizer
        self.data = [tokenizer.encode(t) for t in texts]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# --- Models ---
class NoiseInjector(nn.Module):
    def __init__(self, noise_level=0.1):
        super().__init__()
        self.noise_level = noise_level
    def forward(self, x):
        return x + torch.randn_like(x) * self.noise_level if self.training else x

class TextEncoder(nn.Module):
    def __init__(self, vocab_size, emb=64, hid=128, lat=256):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb, padding_idx=0)
        self.noise = NoiseInjector(0.2)
        self.lstm = nn.LSTM(emb, hid, batch_first=True)
        self.fc = nn.Linear(hid, lat)
    def forward(self, x):
        e = self.noise(self.embed(x))
        _, (h,_) = self.lstm(e)
        return self.fc(h[-1])

class AbstractionGenerator(nn.Module):
    def __init__(self, lat=256, ch=3):
        super().__init__()
        self.fc = nn.Linear(lat, 128*8*8)
        self.deconv = nn.Sequential(
            nn.ConvTranspose2d(128,64,4,2,1), nn.ReLU(True),
            nn.ConvTranspose2d(64, ch,4,2,1), nn.Tanh()
        )
    def forward(self, z):
        f = self.fc(z).view(-1,128,8,8)
        return self.deconv(f), f

class DecryptionModel(nn.Module):
    def __init__(self, ch=3, hid_feat=(128,8,8), lat=256, hid=128, vocab=100):
        super().__init__()
        c,h,w = hid_feat
        self.conv = nn.Sequential(
            nn.Conv2d(ch,64,4,2,1), nn.ReLU(True),
            nn.Conv2d(64,128,4,2,1), nn.ReLU(True)
        )
        self.fc_img = nn.Linear(128*h*w, lat)
        self.fc_hid = nn.Linear(128*h*w, lat)
        self.fc_comb = nn.Linear(lat*2, hid)
        self.lstm = nn.LSTM(hid, hid, batch_first=True)
        self.out = nn.Linear(hid, vocab)
    def forward(self, img, hid, tgt_len):
        img_z = self.fc_img(self.conv(img).view(img.size(0),-1))
        hid_z = self.fc_hid(hid.view(hid.size(0),-1))
        comb = torch.tanh(self.fc_comb(torch.cat([img_z, hid_z],1)))
        init = comb.unsqueeze(0)
        inp = torch.zeros(img.size(0), tgt_len, comb.size(-1), device=img.device)
        o,_ = self.lstm(inp,(init,init))
        return self.out(o)

# --- Training Pipeline ---
def train_autoencoder(texts, epochs=20, bs=4, lr=1e-3):
    tok = CharTokenizer(max_len=32)
    ds = TextImageDataset(texts, tok)
    dl = DataLoader(ds, batch_size=bs, shuffle=True)

    enc = TextEncoder(tok.vocab_size)
    gen = AbstractionGenerator(lat=256, ch=3)
    dec = DecryptionModel(ch=3, hid_feat=(128,8,8), lat=256, hid=128, vocab=tok.vocab_size)

    opt = torch.optim.Adam(list(enc.parameters())+list(gen.parameters())+list(dec.parameters()), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    enc.train(); gen.train(); dec.train()
    for ep in range(epochs):
        total_loss = 0
        for batch in dl:
            opt.zero_grad()
            img, hid = gen(enc(batch))
            logits = dec(img, hid, tok.max_len)
            loss = criterion(logits.view(-1, tok.vocab_size), batch.view(-1))
            loss.backward(); opt.step()
            total_loss += loss.item()
        print(f"Epoch {ep+1}/{epochs}, Loss: {total_loss/len(dl):.4f}")

    return tok, enc, gen, dec

if __name__=='__main__':
    texts = [
        'HELLO WORLD', 'PICASSO CRYPTO', 'ABSTRACT CODE', 'DEEP LEARNING',
        'TORCH PYTHON', 'AUTOMATED ENCODING'
    ]
    tok, enc, gen, dec = train_autoencoder(texts)

    # Test
    enc.eval(); gen.eval(); dec.eval()
    batch = torch.stack([tok.encode(t) for t in texts])
    imgs, hid = gen(enc(batch))
    save_image(imgs, 'encrypted_images.png', normalize=True)
    print("Saved encrypted_images.png after training.")
    with torch.no_grad():
        out = dec(imgs, hid, tok.max_len)
    for i,ids in enumerate(out.argmax(-1)):
        print(f"Recovered[{i}]:", tok.decode(ids))


Epoch 1/20, Loss: 4.5898
Epoch 2/20, Loss: 4.5298
Epoch 3/20, Loss: 4.4597
Epoch 4/20, Loss: 4.3903
Epoch 5/20, Loss: 4.2877
Epoch 6/20, Loss: 4.1253
Epoch 7/20, Loss: 3.8987
Epoch 8/20, Loss: 3.5898
Epoch 9/20, Loss: 3.3079
Epoch 10/20, Loss: 3.1463
Epoch 11/20, Loss: 3.0095
Epoch 12/20, Loss: 3.0010
Epoch 13/20, Loss: 2.8970
Epoch 14/20, Loss: 2.8540
Epoch 15/20, Loss: 2.8279
Epoch 16/20, Loss: 2.8411
Epoch 17/20, Loss: 2.8190
Epoch 18/20, Loss: 2.8161
Epoch 19/20, Loss: 2.7971
Epoch 20/20, Loss: 2.7904
Saved encrypted_images.png after training.
Recovered[0]: AEEOCCCCCCCCCCCCCCCCCCCCCCCCCCCC
Recovered[1]: AEEOCCCCCCCCCCCCCCCCCCCCCCCCCCCC
Recovered[2]: AEEOCCCCCCCCCCCCCCCCCCCCCCCCCCCC
Recovered[3]: AEEOCCCCCCCCCCCCCCCCCCCCCCCCCCCC
Recovered[4]: AEEOCCCCCCCCCCCCCCCCCCCCCCCCCCCC
Recovered[5]: AEEOCCCCCCCCCCCCCCCCCCCCCCCCCCCC
