In [1]:
import json
import numpy as np
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

def load_data(filepath):
    """Veri setini JSON dosyasından yükler."""
    with open(filepath) as file:
        data = json.load(file)
    return data

def preprocess_data(data):
    """Verileri model için hazırlar."""
    training_sentences = []
    training_labels = []
    labels = []
    responses = []

    for intent in data['intents']:
        for pattern in intent['patterns']:
            training_sentences.append(pattern)
            training_labels.append(intent['tag'])
        responses.append(intent['responses'])
        if intent['tag'] not in labels:
            labels.append(intent['tag'])

    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(training_labels)

    return training_sentences, encoded_labels, labels, responses, label_encoder



In [2]:
def tokenize_and_pad(sentences, vocab_size, max_len, oov_token):
    """Cümleleri token'lara ayırır ve pad'ler."""
    tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
    tokenizer.fit_on_texts(sentences)
    sequences = tokenizer.texts_to_sequences(sentences)
    padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)
    return tokenizer, padded_sequences

In [3]:
def build_model(vocab_size, embedding_dim, max_len, num_classes):
    """LSTM modelini oluşturur."""
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
    model.add(LSTM(128, return_sequences=True)) # LSTM katmanı ekleniyor
    model.add(LSTM(128))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def train_model(model, padded_sequences, labels, epochs):
    """Modeli eğitir."""
    history = model.fit(padded_sequences, np.array(labels), epochs=epochs)
    return history

def save_artifacts(model, tokenizer, label_encoder, model_path, tokenizer_path, encoder_path):
    """Modeli, tokenizer'ı ve label encoder'ı kaydeder."""
    model.save(model_path)
    with open(tokenizer_path, 'wb') as handle:
        pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open(encoder_path, 'wb') as ecn_file:
        pickle.dump(label_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)


In [4]:
# Ana kod
data = load_data("dataset.json")
training_sentences, encoded_labels, labels, responses, label_encoder = preprocess_data(data)

vocab_size = 1000
embedding_dim = 16
max_len = 30
oov_token = "<OOV>"

tokenizer, padded_sequences = tokenize_and_pad(training_sentences, vocab_size, max_len, oov_token)

num_classes = len(labels)
model = build_model(vocab_size, embedding_dim, max_len, num_classes)
model.summary()

epochs = 150
history = train_model(model, padded_sequences, encoded_labels, epochs)

save_artifacts(model, tokenizer, label_encoder, "chat_model.h5", "tokenizer.pickle", "label_encoder.pickle")



Epoch 1/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - accuracy: 0.0746 - loss: 2.6389
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.1384 - loss: 2.6338
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.1697 - loss: 2.6242
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.1384 - loss: 2.6179
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1384 - loss: 2.6004
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.1384 - loss: 2.6116
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.1489 - loss: 2.5860
Epoch 8/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.1593 - loss: 2.5694
Epoch 9/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

