In [36]:
from training import alibi_positional_encoding
import torch
import torch.nn.functional as F
from transformer import Transformer
from beam_searcher import BeamSearcher
from train_final_model import character_ecode_decode, read_in

In [37]:
file_path = "goe_full.txt"
full_text = read_in(file_path)
encoder = character_ecode_decode(full_text)
encoded_text = encoder.encode(full_text)

In [38]:
input_text = "Dies ist ein exakt einhundert Zeichen langer Text, der genau die gewünschte Länge einhält. Perfekt! "
input_text = " Des Lebens Fluss strömt ewig fort, in Licht und Schatten wechselnd, gleich dem wandelnden Geschick." # 100
input_text = "Des Lebens Wogen steigen und sinken, \
ein ewiges Streben, ein flüchtiges Hoffen, \
gleich dem Wind, der durch kahle Zweige fährt, \
doch nimmer ruht; so irrt der Mensch, \
von Sehnsucht getrieben, vom Schicksal geführt, bis einst der Schleier fällt und Stille ihn" # seq_length 256
print(len(input_text))

256


In [39]:
seq_length = 256
device = 'cpu' # torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_heads=8

causal_padding_mask = torch.tril(torch.ones((seq_length, seq_length)))
alibi_positional_encoding_tensor = alibi_positional_encoding(n_heads, seq_length)

model = Transformer(
        embedding_dim=80,
        num_layers=12,
        n_heads=n_heads, 
        device=device,
        net_expansion_factor=4,
        attention_type='dot_product',
        alibi_bias=alibi_positional_encoding_tensor,
        mask=causal_padding_mask
    )

searcher = BeamSearcher(
        model = model,
        device=device,
        embedding_dimension = 80, #embedding_dim=70,
        prediction_length = 40,
        beam_width = 2,
        max_candidates = 6,
    )

In [40]:
model.load_state_dict(
    torch.load(
        (
            "out/run1/Transformer,"
            "lr_schedule=OneCycleLR, "
            "loss_criterion=CrossEntropyLoss(), "
            "net_expansion_factor=4, "
            "n_layers=12, "
            "n_heads=8, "
            "attention_type=dot_product, "
            "lr=0.001, "
            "OneCycleLR, mask yes.pt"
        ),
        map_location=device
    )
)

encoded_text = torch.tensor([encoder.encode(input_text)], dtype=torch.int64)
input_batch = model.one_hot_encode(encoded_text)
searcher.do_search(input_batch)
candidate_ranking = searcher.sequence_candidate_probabilities
best_candidate = searcher.sequence_candidates[torch.argmax(candidate_ranking)]
decoded_text = encoder.decode(model.one_hot_decode(best_candidate))
model_1_prediction_beam_search = decoded_text[-(searcher.prediction_sequence_length):]
# print(model_1_prediction_beam_search)

  torch.load(


In [41]:
model.load_state_dict(
    torch.load(
        (
            "out/run1/Transformer,"
            "lr_schedule=OneCycleLR, "
            "loss_criterion=CrossEntropyLoss(), "
            "net_expansion_factor=4, "
            "n_layers=12, "
            "n_heads=8, "
            "attention_type=dot_product, "
            "lr=0.0005, "
            "OneCycleLR, mask yes.pt"
        ),
        map_location=device
    )
)

encoded_text = torch.tensor([encoder.encode(input_text)], dtype=torch.int64)
input_batch = model.one_hot_encode(encoded_text)
searcher.do_search(input_batch)
candidate_ranking = searcher.sequence_candidate_probabilities
best_candidate = searcher.sequence_candidates[torch.argmax(candidate_ranking)]
decoded_text = encoder.decode(model.one_hot_decode(best_candidate))
model_2_prediction_beam_search = decoded_text[-(searcher.prediction_sequence_length):]
# print(model_2_prediction_beam_search)

  torch.load(


In [42]:
model.load_state_dict(
    torch.load(
        (
            "out/run2/Transformer,"
            "lr_schedule=OneCycleLR, "
            "loss_criterion=CrossEntropyLoss(), "
            "net_expansion_factor=4, "
            "n_layers=12, "
            "n_heads=8, "
            "attention_type=dot_product, "
            "lr=0.0001, "
            "OneCycleLR, mask yes.pt"
        ),
        map_location=device
    )
)


encoded_text = torch.tensor([encoder.encode(input_text)], dtype=torch.int64)
input_batch = model.one_hot_encode(encoded_text)
searcher.do_search(input_batch)
candidate_ranking = searcher.sequence_candidate_probabilities
best_candidate = searcher.sequence_candidates[torch.argmax(candidate_ranking)]
decoded_text = encoder.decode(model.one_hot_decode(best_candidate))
model_3_prediction_beam_search = decoded_text[-(searcher.prediction_sequence_length):]
# print(model_3_prediction_beam_search)

  torch.load(


In [43]:
model.load_state_dict(
    torch.load(
        (
            "out/run2/Transformer,"
            "lr_schedule=OneCycleLR, "
            "loss_criterion=CrossEntropyLoss(), "
            "net_expansion_factor=4, "
            "n_layers=12, "
            "n_heads=8, "
            "attention_type=dot_product, "
            "lr=0.0005, "
            "OneCycleLR, mask yes.pt"
        ),
        map_location=device
    )
)


encoded_text = torch.tensor([encoder.encode(input_text)], dtype=torch.int64)
input_batch = model.one_hot_encode(encoded_text)
searcher.do_search(input_batch)
candidate_ranking = searcher.sequence_candidate_probabilities
best_candidate = searcher.sequence_candidates[torch.argmax(candidate_ranking)]
decoded_text = encoder.decode(model.one_hot_decode(best_candidate))
model_4_prediction_beam_search = decoded_text[-(searcher.prediction_sequence_length):]
# print(model_4_prediction_beam_search)

  torch.load(


In [44]:
model.load_state_dict(
    torch.load(
        (
            "out/run3/Transformer,"
            "lr_schedule=OneCycleLR, "
            "loss_criterion=CrossEntropyLoss(), "
            "net_expansion_factor=4, "
            "n_layers=12, "
            "n_heads=8, "
            "attention_type=dot_product, "
            "lr=0.0005, "
            "OneCycleLR, mask yes.pt"
        ),
        map_location=device
    )
)


encoded_text = torch.tensor([encoder.encode(input_text)], dtype=torch.int64)
input_batch = model.one_hot_encode(encoded_text)
searcher.do_search(input_batch)
candidate_ranking = searcher.sequence_candidate_probabilities
best_candidate = searcher.sequence_candidates[torch.argmax(candidate_ranking)]
decoded_text = encoder.decode(model.one_hot_decode(best_candidate))
model_5_prediction_beam_search = decoded_text[-(searcher.prediction_sequence_length):]
# print(model_5_prediction_beam_search)

  torch.load(


| Model  | Learning Rate | Epochs | Layers | Sequence Length |
|--------|--------------|--------|--------|--|
| Model 1 | 0.001        | 2      | 12      | 100 |
| Model 2 | 0.0005       | 4      | 12      | 100 |
| Model 3 | 0.0001       | 2     | 12      | 256 |
| Model 4 | 0.0005       | 2      | 12      | 256 |
| Model 5 | 0.0005       | 7      | 12      | 256 |


In [45]:
print(input_text)
print("...")
print("Model 1: ",model_1_prediction_beam_search)
print("Model 2: ",model_2_prediction_beam_search)
print("Model 3: ",model_3_prediction_beam_search)
print("Model 4: ",model_4_prediction_beam_search)
print("Model 5: ",model_5_prediction_beam_search)

Des Lebens Wogen steigen und sinken, ein ewiges Streben, ein flüchtiges Hoffen, gleich dem Wind, der durch kahle Zweige fährt, doch nimmer ruht; so irrt der Mensch, von Sehnsucht getrieben, vom Schicksal geführt, bis einst der Schleier fällt und Stille ihn
...
Model 1:  , daß ich dieser Schiffen unter der Schi
Model 2:   einer groß und die Schreitten und die S
Model 3:   der der derer sich der sich der sichte 
Model 4:   der Gesten der Geschaften sich der Gesc
Model 5:   seinen Geschichten und die Geschichten 
