In [1]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, Dropout
from keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import os
from matplotlib import pyplot as plt

# Step 1: Load data
df = pd.read_excel('diploma.xlsx')

# Step 2: Text preprocessing
# Combine all text data that the model should learn from
all_text = df['theme'].str.cat(sep=' ') #tema

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts([all_text])
encoded = tokenizer.texts_to_sequences([all_text])[0]

# Determine vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size:', vocab_size)

# Create sequences
sequence_length = 11
sequences = []
for i in range(sequence_length, len(encoded)):
    sequence = encoded[i-sequence_length:i+1]
    sequences.append(sequence)

sequences = np.array(sequences)

# Split into X and y
X, y = sequences[:,:-1], sequences[:,-1]
y = np.eye(vocab_size)[y]  # One hot encoding

# Step 3: Model setup
model = Sequential()
model.add(Embedding(vocab_size, 11, input_length=sequence_length))
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))
model.add(Dense(50, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.01), metrics=['accuracy'])

# Summarize the model
model.summary()

# Fit the model
model.fit(X, y, epochs=50, verbose=2, callbacks=[EarlyStopping(monitor='loss', patience=5)])

# Function to generate text
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)
        predicted_index = np.argmax(predicted_probs, axis=-1)[0]
        output_word = ''
        for word, index in tokenizer.word_index.items():
            if index == predicted_index:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

model.save('thesis_generator_model.h5')



Vocabulary Size: 1007
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 11, 11)            11077     
                                                                 
 lstm (LSTM)                 (None, 11, 50)            12400     
                                                                 
 lstm_1 (LSTM)               (None, 50)                20200     
                                                                 
 dense (Dense)               (None, 50)                2550      
                                                                 
 dense_1 (Dense)             (None, 1007)              51357     
                                                                 
Total params: 97584 (381.19 KB)
Trainable params: 97584 (381.19 KB)
Non-trainable params: 0 (0.00 Byte)
______________________________________________________________

  saving_api.save_model(


In [2]:
# Generate a new theme
#field1 = 'web'
#field2 = 'security'
#seed_text = field1
#seed_text = f"{field1} and {field2}"
#new_text=generate_text(seed_text, 11, model, sequence_length)
#print(new_text)

web and security from structure generation creation of an application for the management of


In [3]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Assuming your notebook is already running necessary imports and initializations

In [25]:
#field1_input = widgets.Text(description='Field 1:')
seed_text_input = widgets.Text(description='Field:')
next_words_input = widgets.Text(description='next_words:')
model_input = widgets.Text(description='Model:')
max_sequence_len_input = widgets.Text(description='max_sequence_len:')
gjenero_button = widgets.Button(description='Gjenero')
#rekomando_button = widgets.Button(description='Rekomando')
output = widgets.Output()

def on_gjenero_clicked(b):
    with output:
        clear_output()
        seed_text = seed_text_input.value
        #author = author_input.value
        result = generate_text(seed_text, 11, model, sequence_length)
        print(result)


gjenero_button.on_click(on_gjenero_clicked)


In [26]:
display(seed_text_input, gjenero_button, output)

Text(value='', description='Field:')

Button(description='Gjenero', style=ButtonStyle())

Output()