In [45]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('srpski.csv', sep='\t')

df = df.drop(columns=['Rbr', 'SR', 'sr/sr', 'Naslov', 'Jezik'])
for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = df[column].str.lower()

max_text_length = df['Tekst'].apply(lambda x: len(str(x).split())).max()
max_text_length

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['Tekst'], df['Autor'], test_size=0.2, random_state=42, stratify=df['Autor'])
y_train_numerical = y_train.factorize()[0]
y_test_numerical = y_test.factorize()[0]



early_stopping = EarlyStopping(
    patience=3,  # Number of epochs with no improvement after which training will be stopped
    monitor='val_accuracy',  # Metric to monitor (e.g., validation loss)
    restore_best_weights=True  # Restore model weights from the epoch with the best value of the monitored metric
)


# Tokenization and sequence padding
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)
sequences = tokenizer.texts_to_sequences(X_train)
X = pad_sequences(sequences, maxlen=max_text_length)

optimizer = Adam(learning_rate=0.001)  

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50, input_length=max_text_length))
model.add(LSTM(100, return_sequences=True))
model.add(Dense(len(set(df['Autor'])), activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the model
model.fit(X, y_train_numerical, epochs=10, validation_split=0.2, callbacks=early_stopping)


new_sequences = tokenizer.texts_to_sequences(X_test)
new_X = pad_sequences(new_sequences, maxlen=max_text_length)

predictions = model.predict(new_X)

evaluation_result = model.evaluate(new_X, y_test_numerical)



Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/traitlets/config/application.py", line 1053, in launch_instance

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 737, in start

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 596, in run_forever

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 1890, in _run_once

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/events.py", line 80, in _run

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 524, in dispatch_queue

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 513, in process_one

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 418, in dispatch_shell

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 758, in execute_request

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 426, in do_execute

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code

  File "/var/folders/9w/5w1pj3t928988w99hkbbct300000gn/T/ipykernel_2394/2477736002.py", line 51, in <module>

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1151, in train_step

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1209, in compute_loss

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/losses.py", line 143, in __call__

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/losses.py", line 270, in call

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/losses.py", line 2454, in sparse_categorical_crossentropy

  File "/Users/jelenalazovic/Desktop/5/IP2/env/lib/python3.9/site-packages/keras/src/backend.py", line 5775, in sparse_categorical_crossentropy

logits and labels must have the same first dimension, got logits shape [112000,15] and labels shape [32]
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_21114]

In [40]:
evaluation_result[1]

0.1071428582072258