In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical

In [3]:
# Load the dataset
df = pd.read_excel('dataset/quran.xlsx',engine='openpyxl')


# Drop rows with missing values
df.dropna(inplace=True)

# Checking for duplicates and removing them
df.drop_duplicates(inplace=True)

# Ensure the columns are correctly named
df.columns = ['juzno', 'surahno', 'qurantext']

# Check the cleaned data
print(df.tail())

      juzno  surahno                                qurantext
6231     30      114                          مَلِكِ النَّاسِ
6232     30      114                         إِلَٰهِ النَّاسِ
6233     30      114      مِنْ شَرِّ الْوَسْوَاسِ الْخَنَّاسِ
6234     30      114  الَّذِي يُوَسْوِسُ فِي صُدُورِ النَّاسِ
6235     30      114               مِنَ الْجِنَّةِ وَالنَّاسِ


In [4]:
# Initialize Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['qurantext'])

# Convert text to sequences
X = tokenizer.texts_to_sequences(df['qurantext'])

# Pad sequences to ensure uniform length
X = tf.keras.preprocessing.sequence.pad_sequences(X)

# Check the shape of X
print(X.shape)

(6144, 145)


In [6]:
# Initialize LabelEncoder
label_encoder_juzno = LabelEncoder()
label_encoder_surahno = LabelEncoder()

# Convert juzno and surahno to numerical values
y_juzno = label_encoder_juzno.fit_transform(df['juzno'])
y_surahno = label_encoder_surahno.fit_transform(df['surahno'])

# One-hot encoding the target variables
y_juzno = to_categorical(y_juzno)
y_surahno = to_categorical(y_surahno)

print(y_juzno.shape, y_surahno.shape)

(6144, 30) (6144, 114)


In [7]:
X_train, X_test, y_train_juzno, y_test_juzno, y_train_surahno, y_test_surahno = train_test_split(
    X, y_juzno, y_surahno, test_size=0.2, random_state=42
)

In [8]:
# Define a functional model for multiple outputs
input_layer = tf.keras.layers.Input(shape=(X.shape[1],))
embedding = tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(input_layer)
lstm = tf.keras.layers.LSTM(128, return_sequences=True)(embedding)
lstm = tf.keras.layers.LSTM(64)(lstm)

# Define two separate output layers
juzno_output = tf.keras.layers.Dense(y_juzno.shape[1], activation='softmax', name='juzno_output')(lstm)
surahno_output = tf.keras.layers.Dense(y_surahno.shape[1], activation='softmax', name='surahno_output')(lstm)

# Create the model
model = tf.keras.Model(inputs=input_layer, outputs=[juzno_output, surahno_output])
# Compile the model
model.compile(optimizer='adam',
              loss={'juzno_output': 'categorical_crossentropy', 'surahno_output': 'categorical_crossentropy'},
              metrics={
                  'juzno_output':'accuracy',
                  'surahno_output':'accuracy'
              } )

# Check model summary
model.summary()

In [None]:
history = model.fit(
    X_train, 
    {'juzno_output': y_train_juzno, 'surahno_output': y_train_surahno},
    validation_data=(X_test, {'juzno_output': y_test_juzno, 'surahno_output': y_test_surahno}),
    epochs=25, 
    batch_size=16
)

Epoch 1/22
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 159ms/step - juzno_output_accuracy: 0.1040 - juzno_output_loss: 3.1377 - loss: 7.5073 - surahno_output_accuracy: 0.0511 - surahno_output_loss: 4.3696 - val_juzno_output_accuracy: 0.0000e+00 - val_juzno_output_loss: 3.1977 - val_loss: 7.1160 - val_surahno_output_accuracy: 0.0000e+00 - val_surahno_output_loss: 3.9182
Epoch 2/22
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 169ms/step - juzno_output_accuracy: 0.1442 - juzno_output_loss: 2.8303 - loss: 6.5913 - surahno_output_accuracy: 0.0857 - surahno_output_loss: 3.7610 - val_juzno_output_accuracy: 0.2000 - val_juzno_output_loss: 3.1049 - val_loss: 6.6210 - val_surahno_output_accuracy: 0.1000 - val_surahno_output_loss: 3.5161
Epoch 3/22
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 174ms/step - juzno_output_accuracy: 0.2137 - juzno_output_loss: 2.5227 - loss: 5.9550 - surahno_output_accuracy: 0.1138 - surahno_output_los

In [9]:
import joblib 
joblib.dump(model,"reciter_model2")

['reciter_model2']

In [10]:
# Evaluate the model on validation data
val_loss, val_juzno_loss, val_surahno_loss, val_juzno_accuracy, val_surahno_accuracy = model.evaluate(
    X_test, 
    {'juzno_output': y_test_juzno, 'surahno_output': y_test_surahno},
    verbose=1
)

# Print validation metrics
print(f"Validation Total Loss: {val_loss}")
print(f"Validation Juzno Loss: {val_juzno_loss}")
print(f"Validation Surahno Loss: {val_surahno_loss}")
print(f"Validation Juzno Accuracy: {val_juzno_accuracy * 100:.2f}%")
print(f"Validation Surahno Accuracy: {val_surahno_accuracy * 100:.2f}%")

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 95ms/step - juzno_output_accuracy: 0.0258 - juzno_output_loss: 3.4025 - loss: 8.1390 - surahno_output_accuracy: 0.0024 - surahno_output_loss: 4.7365
Validation Total Loss: 8.138574600219727
Validation Juzno Loss: 3.4025285243988037
Validation Surahno Loss: 4.7360663414001465
Validation Juzno Accuracy: 2.60%
Validation Surahno Accuracy: 0.41%


In [11]:
# Make predictions
predictions = model.predict(X_test)

# Decode predictions back to original labels
predicted_juzno = label_encoder_juzno.inverse_transform(np.argmax(predictions[0], axis=1))
predicted_surahno = label_encoder_surahno.inverse_transform(np.argmax(predictions[1], axis=1))

# Show some predictions
for i in range(10):
    print(f"Predicted Juzno: {predicted_juzno[i]}, Predicted Surahno: {predicted_surahno[i]}")

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 99ms/step
Predicted Juzno: 11, Predicted Surahno: 44
Predicted Juzno: 5, Predicted Surahno: 59
Predicted Juzno: 16, Predicted Surahno: 31
Predicted Juzno: 16, Predicted Surahno: 48
Predicted Juzno: 16, Predicted Surahno: 16
Predicted Juzno: 5, Predicted Surahno: 106
Predicted Juzno: 11, Predicted Surahno: 96
Predicted Juzno: 20, Predicted Surahno: 44
Predicted Juzno: 11, Predicted Surahno: 31
Predicted Juzno: 16, Predicted Surahno: 85


In [12]:
import joblib

model=joblib.load("model/quran_model")

In [14]:
# Input the text to predict
input_text = "أَلَمْ يَجْعَلْ كَيْدَهُمْ فِي تَضْلِيلٍ"     # Replace with your input text

# Tokenize and pad the text
sequence = tokenizer.texts_to_sequences([input_text])
padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=X.shape[1])

# Predict using the trained model
predictions = model.predict(padded_sequence)

# Extract and decode predictions for Juz No and Surah No
juzno_prediction = np.argmax(predictions[0], axis=1)
surahno_prediction = np.argmax(predictions[1], axis=1)

# Decode the predicted classes back to labels
predicted_juzno = label_encoder_juzno.inverse_transform(juzno_prediction)
predicted_surahno = label_encoder_surahno.inverse_transform(surahno_prediction)

# Print the results
print(f"Predicted Juz No: {predicted_juzno[0]}")
print(f"Predicted Surah No: {predicted_surahno[0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 609ms/step
Predicted Juz No: 30
Predicted Surah No: 105
