In [1]:
# 📦 TensorFlow ve Keras
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Embedding, Dropout, LayerNormalization,
    MultiHeadAttention, Add, Lambda
)
from tensorflow.keras import layers

# 🧪 Sıralı bloklar için
from tensorflow.keras import Sequential

# 🧠 Tokenizer ve padding için
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 📊 Veri işleme (eğer kullanıyorsan)
import pandas as pd
import numpy as np


In [2]:
df = pd.read_csv("örnek_set.csv")
df.head()

Unnamed: 0,input,output
0,Merhaba,"Merhaba, size nasıl yardımcı olabilirim?"
1,Nasılsın?,"İyiyim, teşekkür ederim. Siz nasılsınız?"
2,Adın ne?,Ben bir yapay zekâ asistanıyım. Adım yok ama y...
3,Kaç yaşındasın?,"Benim yaşım yok, dijitalim!"
4,Bugün günlerden ne?,"Maalesef tarih bilgim yok, ama sistem saatinde..."


In [3]:
input_texts = df['input'].astype(str).to_list()
target_texts = df['output'].astype(str).to_list()

### TOKENİZER

In [4]:
# Başlangıç ve bitiş token'ları ekle
target_texts_in  = ['<start> ' + t for t in target_texts]
target_texts_out = [t + ' <end>'    for t in target_texts]

In [5]:
input_tokenizer  = Tokenizer(oov_token='<OOV>')
input_tokenizer.fit_on_texts(input_texts)
target_tokenizer = Tokenizer(oov_token='<OOV>')
target_tokenizer.fit_on_texts(target_texts_in + target_texts_out)

In [6]:
input_seqs  = input_tokenizer.texts_to_sequences(input_texts)
decoder_in  = target_tokenizer.texts_to_sequences(target_texts_in)
decoder_out = target_tokenizer.texts_to_sequences(target_texts_out)

max_len_input  = max(len(seq) for seq in input_seqs)
max_len_output = max(len(seq) for seq in decoder_in + decoder_out)
max_len        = max(max_len_input, max_len_output)

In [7]:
encoder_input  = pad_sequences(input_seqs, padding='post', maxlen=max_len)
decoder_input  = pad_sequences(decoder_in,   padding='post', maxlen=max_len)
decoder_target = pad_sequences(decoder_out,  padding='post', maxlen=max_len)

input_vocab_size  = len(input_tokenizer.word_index) + 1
target_vocab_size = len(target_tokenizer.word_index) + 1

print(input_vocab_size)
print(target_vocab_size)

101
237


#### 📌 2. position_encoding() -- 📌 3. token_and_position_embedding()

In [8]:
class PositionalEncoding(layers.Layer):
    def __init__(self, max_len, d_model, **kwargs):
        super().__init__(**kwargs)
        angle_rads = self._get_angles(
            np.arange(max_len)[:, np.newaxis],
            np.arange(d_model)[np.newaxis, :],
            d_model
        )
        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
        self.pos_encoding = tf.constant(angle_rads[np.newaxis, ...], dtype=tf.float32)
    def _get_angles(self, pos, i, d_model):
        return pos / np.power(10000, (2 * (i//2)) / np.float32(d_model))
    def call(self, x):
        seq_len = tf.shape(x)[1]
        return x + self.pos_encoding[:, :seq_len, :]


### 📌 4. add_and_norm() --- 📌 5. feed_forward_network()

In [9]:
def add_and_norm(x, sublayer, dp=0.1):
    out = Dropout(dp)(sublayer)
    out = layers.Add()([x, out])
    return LayerNormalization(epsilon=1e-6)(out)

def feed_forward_network(d_model):
    # Removed explicit name to avoid duplicate naming errors
    return Sequential([
        layers.Dense(d_model * 4, activation='relu'),
        Dropout(0.1),
        layers.Dense(d_model)
    ])

-------
------

### ✅  Mask Fonksiyonu – Toplu Kod

In [10]:
padding_mask_layer = Lambda(
    lambda seq: tf.cast(tf.math.equal(seq, 0), tf.float32)[:, tf.newaxis, tf.newaxis, :],
    name="padding_mask"
)

def combined_mask_layer(seq):
    seq_len = tf.shape(seq)[1]
    pad     = tf.cast(tf.math.equal(seq, 0), tf.float32)[:, tf.newaxis, tf.newaxis, :]
    look    = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
    look    = look[tf.newaxis, tf.newaxis, :, :]
    return tf.maximum(pad, look)

combined_mask_layer = Lambda(combined_mask_layer, name="combined_mask")

-----
-------

### 📌 6. encoder_block()

In [11]:
def encoder_block(x, d_model, num_heads, dp, mask):
    attn = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x, x, attention_mask=mask)
    x1   = add_and_norm(x, attn, dp)
    ffn  = feed_forward_network(d_model)(x1)
    return add_and_norm(x1, ffn, dp)

### 📌 7. decoder_block()

In [12]:
def decoder_block(x, enc_out, d_model, num_heads, dp, look_mask, pad_mask):
    attn1 = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x, x, attention_mask=look_mask)
    x1    = add_and_norm(x, attn1, dp)
    attn2 = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x1, enc_out, attention_mask=pad_mask)
    x2    = add_and_norm(x1, attn2, dp)
    ffn   = feed_forward_network(d_model)(x2)
    return add_and_norm(x2, ffn, dp)


### 📌 8. build_encoder()

In [13]:
def build_encoder(vocab_size, max_len, d_model, num_heads, num_layers, dp):
    inp  = Input(shape=(None,), name='encoder_input')
    mask = padding_mask_layer(inp)
    x    = Embedding(vocab_size, d_model)(inp)
    x    = PositionalEncoding(max_len, d_model)(x)
    for _ in range(num_layers):
        x = encoder_block(x, d_model, num_heads, dp, mask)
    return Model(inputs=inp, outputs=x, name='TransformerEncoder')


### 📌 9. build_decoder()

In [14]:
def build_decoder(vocab_size, max_len, d_model, num_heads, num_layers, dp):
    dec_in  = Input(shape=(None,), name='decoder_input')
    enc_out = Input(shape=(None, d_model), name='encoder_output')
    look    = combined_mask_layer(dec_in)
    pad     = padding_mask_layer(dec_in)
    x       = Embedding(vocab_size, d_model)(dec_in)
    x       = PositionalEncoding(max_len, d_model)(x)
    for _ in range(num_layers):
        x = decoder_block(x, enc_out, d_model, num_heads, dp, look, pad)
    return Model(inputs=[dec_in, enc_out], outputs=x, name='TransformerDecoder')

### 📌 10. build_transformer() 

In [15]:
def build_transformer(in_vocab, tar_vocab, max_len, d_model, num_heads, num_layers, dp):
    enc_inputs = Input(shape=(None,), name='encoder_inputs')
    dec_inputs = Input(shape=(None,), name='decoder_inputs')
    enc_out    = build_encoder(in_vocab, max_len, d_model, num_heads, num_layers, dp)(enc_inputs)
    dec_out    = build_decoder(tar_vocab, max_len, d_model, num_heads, num_layers, dp)([dec_inputs, enc_out])
    outputs    = Dense(tar_vocab, activation='softmax', name='final_output')(dec_out)
    return Model(inputs=[enc_inputs, dec_inputs], outputs=outputs, name='TransformerModel')

In [16]:
transformer = build_transformer(
    input_vocab_size, target_vocab_size,
    max_len, d_model=256, num_heads=4,
    num_layers=4, dp=0.1
)
transformer.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy'
)
transformer.summary()


