In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Load data
data_path = 'C:\\Users\\USER\\Downloads\\bible_passage_finder\\BIble passage data.csv'
bible_data = pd.read_csv(data_path)

# Extract relevant columns
texts = bible_data['BIBLE TEXT'].astype(str).values  # Verse text
labels = bible_data['VERSE'].astype(str).values      # Verse references

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to ensure uniform input length
max_sequence_length = max(len(seq) for seq in sequences)
X = pad_sequences(sequences, maxlen=max_sequence_length)

# Encode labels
distinct_labels = list(set(labels))
label_to_index = {label: idx for idx, label in enumerate(distinct_labels)}
indices_to_label = {idx: label for label, idx in label_to_index.items()}
y = np.array([label_to_index[label] for label in labels])
y = to_categorical(y, num_classes=len(distinct_labels))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define RNN model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_sequence_length),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(len(distinct_labels), activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
epochs = 10
batch_size = 32
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size)

# Save the model and tokenizer
model.save('bible_verse_rnn_model.h5')
with open('tokenizer.json', 'w') as f:
    f.write(tokenizer.to_json())

# Display the model's summary
model.summary()




Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 791ms/step - accuracy: 0.1459 - loss: 3.2857 - val_accuracy: 0.9062 - val_loss: 3.2164
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 290ms/step - accuracy: 0.6739 - loss: 3.1856 - val_accuracy: 0.9062 - val_loss: 2.7848
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 216ms/step - accuracy: 0.7993 - loss: 2.4776 - val_accuracy: 0.9062 - val_loss: 0.6784
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 253ms/step - accuracy: 0.7762 - loss: 1.4262 - val_accuracy: 0.9062 - val_loss: 0.6833
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 253ms/step - accuracy: 0.7755 - loss: 1.6217 - val_accuracy: 0.9062 - val_loss: 0.6810
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 233ms/step - accuracy: 0.7608 - loss: 1.5694 - val_accuracy: 0.9062 - val_loss: 0.6736
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━

