In [1]:
"""
Reusing a Saved Encoder-Decoder Model and Tokenizer in Another Notebook

Steps:
1) Load your saved tokenizer.
2) Construct the same architecture for the encoder-decoder.
3) Load the model state_dict from disk.
4) Write a simple function to generate the remainder (partial -> remainder).
5) Test on a partial snippet to confirm the model works.
"""


'\nReusing a Saved Encoder-Decoder Model and Tokenizer in Another Notebook\n\nSteps:\n1) Load your saved tokenizer.\n2) Construct the same architecture for the encoder-decoder.\n3) Load the model state_dict from disk.\n4) Write a simple function to generate the remainder (partial -> remainder).\n5) Test on a partial snippet to confirm the model works.\n'

In [2]:
# ========================================
# 1) Imports
# ========================================
import torch
import torch.nn as nn
import math
from transformers import BertTokenizer   # or whichever tokenizer you used
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu


In [4]:
# ========================================
# 2) Load the Saved Tokenizer
# ========================================
tokenizer_dir = "C:/Users/DELL/Desktop/VOIP_Phishing_Attacks/Repos/convoPredict/conversation-prediction/research/3. MidEvaluation/encoder-decoder/conversation_model_bert_tokenizer"  # or the path you used for saving
tokenizer = BertTokenizer.from_pretrained(tokenizer_dir) 
pad_token_id = tokenizer.pad_token_id
vocab_size = len(tokenizer)

print("Loaded tokenizer from:", tokenizer_dir)
print("Vocabulary size:", vocab_size)
print("Pad token:", tokenizer.pad_token, "(", pad_token_id, ")")


Loaded tokenizer from: C:/Users/DELL/Desktop/VOIP_Phishing_Attacks/Repos/convoPredict/conversation-prediction/research/3. MidEvaluation/encoder-decoder/conversation_model_bert_tokenizer
Vocabulary size: 30522
Pad token: [PAD] ( 0 )


In [5]:
# ========================================
# 3) Reconstruct the Encoder-Decoder Architecture
# ========================================
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0,d_model,2)*(-math.log(10000.0)/d_model))
        pe[:,0::2] = torch.sin(position*div_term)
        pe[:,1::2] = torch.cos(position*div_term)
        self.register_buffer("pe", pe.unsqueeze(0))

    def forward(self, x):
        seq_len = x.size(1)
        return x + self.pe[:, :seq_len, :].to(x.device)

class TransformerEncoderDecoder(nn.Module):
    def __init__(
        self,
        vocab_size,
        d_model=128,
        nhead=4,
        num_encoder_layers=2,
        num_decoder_layers=2,
        pad_token_id=0
    ):
        super().__init__()
        self.d_model = d_model
        self.pad_token_id = pad_token_id

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_enc = PositionalEncoding(d_model)
        self.pos_dec = PositionalEncoding(d_model)

        self.transformer = nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            batch_first=True
        )
        self.out_fc = nn.Linear(d_model, vocab_size)

    def _generate_causal_mask(self, sz):
        mask = torch.triu(torch.ones(sz, sz))==1
        mask = mask.transpose(0,1).masked_fill(mask==1, float('-inf'))
        return mask

    def forward(self, src, tgt):
        enc_emb = self.embedding(src)*math.sqrt(self.d_model)
        enc_emb = self.pos_enc(enc_emb)

        dec_emb = self.embedding(tgt)*math.sqrt(self.d_model)
        dec_emb = self.pos_dec(dec_emb)

        src_key_padding_mask = (src==self.pad_token_id)
        tgt_key_padding_mask = (tgt==self.pad_token_id)

        seq_len_dec = tgt.size(1)
        causal_mask = self._generate_causal_mask(seq_len_dec).to(src.device)

        out = self.transformer(
            src=enc_emb,
            tgt=dec_emb,
            src_key_padding_mask=src_key_padding_mask,
            tgt_key_padding_mask=tgt_key_padding_mask,
            tgt_mask=causal_mask
        )
        logits = self.out_fc(out)
        return logits


In [6]:
# ========================================
# 4) Load the Model State from Disk
# ========================================
model = TransformerEncoderDecoder(
    vocab_size=vocab_size,
    d_model=128,
    nhead=4,
    num_encoder_layers=2,
    num_decoder_layers=2,
    pad_token_id=pad_token_id
)
model_path = os.path.join(tokenizer_dir, "model_state.pt")  # e.g., "model_state.pt"
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()
print("Model loaded from:", model_path)


  model.load_state_dict(torch.load(model_path, map_location=device))


Model loaded from: C:/Users/DELL/Desktop/VOIP_Phishing_Attacks/Repos/convoPredict/conversation-prediction/research/3. MidEvaluation/encoder-decoder/conversation_model_bert_tokenizer\model_state.pt


In [7]:
# ========================================
# 5) Inference: Generating the Remainder
# ========================================
def generate_remainder(model, tokenizer, partial_text, max_new_tokens=50):
    """
    Use a greedy decoding approach.
    - partial_text is the input snippet for the encoder.
    - We'll start decoder with a single pad or dummy token and
      produce next tokens until we hit the pad token or max steps.
    """
    model.eval()
    with torch.no_grad():
        # encode partial
        enc_partial = tokenizer.encode(
            partial_text,
            add_special_tokens=False
        )
        src = torch.tensor([enc_partial], dtype=torch.long).to(device)

        # Start the decoder with [PAD], or define your own BOS approach
        dec_in = [tokenizer.pad_token_id]  
        dec_tensor = torch.tensor([dec_in], dtype=torch.long).to(device)

        for _ in range(max_new_tokens):
            logits = model(src, dec_tensor)
            # next token from the last step
            next_token_logits = logits[0, -1, :]
            next_id = torch.argmax(next_token_logits).item()
            dec_in.append(next_id)
            dec_tensor = torch.tensor([dec_in], dtype=torch.long).to(device)

            # If we produce PAD again, we can stop, or define a custom EOS
            if next_id == tokenizer.pad_token_id:
                break

        # skip the first token (the dummy "start" token)
        generated_ids = dec_in[1:]
        text_out = tokenizer.decode(generated_ids, skip_special_tokens=True)
    return text_out


In [8]:
# ========================================
# 6) Test on a Partial Snippet
# ========================================
test_partial = "Hello, I'm Sam. I saw an ad about a photography workshop."
print("=== PARTIAL TEXT ===")
print(test_partial)

predicted_rem = generate_remainder(model, tokenizer, test_partial, max_new_tokens=30)
print("\n=== PREDICTED REMAINDER ===")
print(predicted_rem)


=== PARTIAL TEXT ===
Hello, I'm Sam. I saw an ad about a photography workshop.


  output = torch._nested_tensor_from_mask(




=== PREDICTED REMAINDER ===

