In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 9 2025

@author: Yaning
"""

# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
# import torch.nn.functional as F # mainly for ReLU
import numpy as np
import copy
import re
import torch.nn.functional as F


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, GPT2Model

tokenizer = AutoTokenizer.from_pretrained("dbmdz/german-gpt2")
model = AutoModelForCausalLM.from_pretrained("dbmdz/german-gpt2")
model.eval()

  from .autonotebook import tqdm as notebook_tqdm


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50265, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50265, bias=False)
)

In [3]:
# Define the Transition Layer (T)
class TransitionLayer(nn.Module):
    def __init__(self, embedding_dim):
        super(TransitionLayer, self).__init__()
        # Trainable transition matrix to map standard to dialect
        self.transition_matrix = nn.Parameter(torch.randn(embedding_dim, embedding_dim))
    
    def forward(self, standard_embeddings):
        # Apply the transformation: H_dialect = T * H_standard
        return torch.matmul(standard_embeddings, self.transition_matrix)

In [4]:
embedding_dim = model.config.hidden_size
# Initialize the Transition Layer with the same embedding dimension
transition_layer = TransitionLayer(embedding_dim)

# Load the saved transition matrix into the model
transition_layer.load_state_dict(torch.load('transition_matrix_new.pth'))


  transition_layer.load_state_dict(torch.load('transition_matrix_new.pth'))


<All keys matched successfully>

In [5]:
def ask(question, model, tokenizer, max_length=100, device="cpu"):
    # Tokenize the input question
    inputs = tokenizer(question, return_tensors="pt").to(device)
    
    # Generate the response
    with torch.no_grad():
        output = model.generate(
            inputs.input_ids,
            max_length=max_length,
            temperature=0.7,  # Controls randomness (lower = more deterministic)
            top_k=50,         # Top-k sampling for diversity
            do_sample=True,    # Enable sampling for less repetitive responses
            output_hidden_states=True,
            return_dict_in_generate=True 
        )
    
    hidden_states = output.hidden_states  # List of hidden states from each layer
    # last_layer_hidden_state = hidden_states[-1]
    # Decode the response
    response = tokenizer.decode(output.sequences[0], skip_special_tokens=True)
    return response, hidden_states

In [5]:
import torch.nn.functional as F

def ask_with_transition(question, model, tokenizer, transition_layer, max_length=100, device="cpu"):
    # Tokenize the input question
    inputs = tokenizer(question, return_tensors="pt").to(device)
    
    # Initialize the input ids and attention mask
    input_ids = inputs.input_ids
    attention_mask = inputs.attention_mask
    
    # Use the model to generate responses while controlling the generation loop
    model.eval()  # Set the model to evaluation mode
    generated_ids = input_ids
    for _ in range(max_length - input_ids.shape[1]):
        # Generate the logits and hidden states for the current input
        outputs = model(input_ids=generated_ids, attention_mask=attention_mask, output_hidden_states=True)
        
        # Get the last hidden state from the outputs
        hidden_states = outputs.hidden_states
        last_hidden_state = hidden_states[-1]  # [batch_size, seq_len, hidden_size]
        
        # Apply the transition matrix to the last hidden state
        transformed_hidden_state = torch.matmul(last_hidden_state[:, -1, :], transition_layer.transition_matrix)
        
        # Transform the hidden state to logits
        # logits = model.lm_head(transformed_hidden_state)  # Project the transformed hidden state to logits
        logits = model.lm_head(transformed_hidden_state)
        # Apply softmax to get probabilities for the next token
        next_token_logits = logits[:,:]  # Get the logits for the next token
        next_token_probs = F.softmax(next_token_logits, dim=-1)
        
        # Sample the next token from the probabilities
        next_token_id = torch.multinomial(next_token_probs, 1)
        
        # Append the predicted token to the generated sequence
        generated_ids = torch.cat([generated_ids, next_token_id], dim=1)
    
    # Decode the generated tokens into text
    response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    
    # return response, transformed_hidden_state
    return response


In [29]:
transition_layer.transition_matrix.shape

torch.Size([768, 768])

In [None]:
last output[0][-2]

13

In [24]:
last = torch.matmul(output[12][-1].squeeze(), transition_layer.transition_matrix)

In [8]:
last.shape

torch.Size([4, 768])

In [25]:
logits = model.lm_head(last)

In [76]:
logits.shape

torch.Size([4, 50265])

In [26]:
probabilities = F.softmax(logits, dim=-1)

In [27]:
predicted_token_id = torch.argmax(probabilities, dim=-1)

In [28]:
predicted_token = tokenizer.decode(predicted_token_id)

In [29]:
predicted_token

'ergie'

In [54]:
model.config.num_hidden_layers

12

In [25]:
output*transition_layer.transition_matrix

TypeError: only integer tensors of a single element can be converted to an index

In [6]:
# Example: Ask a question
question = "Geschichte der TU Dresden"
response, output = ask(question, model, tokenizer)
print("LLaMA's Response:", response)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


LLaMA's Response: Geschichte der TU Dresden
Das Projekt "Wissenschaftliches Arbeiten im Studium" wurde durch das Bundesministerium für Bildung und Forschung (BMBF) ins Leben gerufen, um die Qualität der Doktorandenausbildung und der forschungsnahen Forschung in Deutschland zu verbessern.
Das Projekt wurde am 1. September 2003 beim BMBF gestartet und umfasste ein Forschungsprogramm mit folgenden Schwerpunkten:
Das Forschungsvorhaben bestand aus zwei Teilen:
Im ersten Teil werden die Doktoranden der TU Dresden in der Forschung und Lehre ausgebildet. In


In [6]:
# Example: Ask a question
question = "Geschichte der TU Dresden"
response = ask_with_transition(question, model, tokenizer, transition_layer)
print("LLaMA's Response:", response)

LLaMA's Response: Geschichte der TU DresdenignembLetztesTschüssamngkselfkilometerKlasseKlasse«.ükenbergüksgesamtüksgesamtükälüs««.ük«fla MilliarbeleOZsgesamtükälabit LippeselfüksgesamtükäliegelsselfükselfükselfükselfükselfükselfükReservKlasse«.ük«ük«ükOZOZkilometerükük«ük«ük«.ük«ük«.ükselfükükükükükükükükükükükükükükükükükReserv«.ükself
