In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd


data = pd.read_csv('/content/drive/MyDrive/Datasets/ecommerceDataset.csv', header=None, names=['Class', 'Text'])

class_counts = data['Class'].value_counts()
print("Class counts:\n", class_counts)


Class counts:
 Class
Household                 19313
Books                     11820
Electronics               10621
Clothing & Accessories     8671
Name: count, dtype: int64


In [None]:
data.head()

Unnamed: 0,Class,Text
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...


In [None]:


import re
import nltk
from nltk.corpus import stopwords

# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):

    if not isinstance(text, str):
        return ""

    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()

    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text


data['Text'] = data['Text'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:


from sklearn.model_selection import train_test_split


train_data, test_data = train_test_split(
    data, test_size=0.3, stratify=data['Class'], random_state=42
)


train_texts = train_data['Text'].values
train_labels = train_data['Class'].values
test_texts = test_data['Text'].values
test_labels = test_data['Class'].values


In [None]:

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical


encoder = LabelEncoder()
train_labels = encoder.fit_transform(train_labels)
test_labels = encoder.transform(test_labels)


train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)


In [None]:
!pip install --upgrade tensorflow




In [None]:
# 1. using TextVectorization with one-gram multi_hot encoding, keeping max feature size = 10000

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.optimizers import Adam


vectorizer = TextVectorization(max_tokens=10000, output_mode='multi_hot', ngrams=1)
vectorizer.adapt(train_texts)

model = Sequential([
    vectorizer,
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(4, activation='softmax')
])


model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


history = model.fit(train_texts, train_labels, epochs=10, validation_split=0.1)


Epoch 1/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 47ms/step - accuracy: 0.8639 - loss: 0.4450 - val_accuracy: 0.9620 - val_loss: 0.1568
Epoch 2/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 47ms/step - accuracy: 0.9826 - loss: 21469798483361792.0000 - val_accuracy: 0.9640 - val_loss: 0.1513
Epoch 3/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 46ms/step - accuracy: 0.9943 - loss: 0.0246 - val_accuracy: 0.9669 - val_loss: 0.1830
Epoch 4/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 46ms/step - accuracy: 0.9968 - loss: 0.0156 - val_accuracy: 0.9669 - val_loss: 0.1799
Epoch 5/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 46ms/step - accuracy: 0.9972 - loss: 0.0107 - val_accuracy: 0.9691 - val_loss: 0.1963
Epoch 6/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 46ms/step - accuracy: 0.9976 - loss: 0.0092 - val_accuracy: 0.9666 - val_loss: 0.2086


In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd



train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=42)


val_predictions = model.predict(val_texts)

val_predictions = val_predictions.argmax(axis=1)

val_true_labels = val_labels.argmax(axis=1)


report = classification_report(val_true_labels, val_predictions, target_names=['Class 1', 'Class 2', 'Class 3', 'Class 4'], output_dict=True)

report_df = pd.DataFrame(report).transpose()


print(report_df)


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
              precision    recall  f1-score      support
Class 1        0.568733  0.931567  0.706276   453.000000
Class 2        0.000000  0.000000  0.000000   380.000000
Class 3        0.652244  0.922902  0.764319   441.000000
Class 4        0.851182  0.754624  0.800000   811.000000
accuracy       0.691127  0.691127  0.691127     0.691127
macro avg      0.518040  0.652273  0.567649  2085.000000
weighted avg   0.592606  0.691127  0.626287  2085.000000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# 2. using TextVectorization with two-gram multi_hot encoding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.optimizers import Adam


vectorizer = TextVectorization(max_tokens=10000, output_mode='multi_hot', ngrams=2)
vectorizer.adapt(train_texts)

model = Sequential([
    vectorizer,
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(4, activation='softmax')
])


model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_texts, train_labels, epochs=10, validation_split=0.1)


Epoch 1/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 88ms/step - accuracy: 0.8548 - loss: nan - val_accuracy: 0.2320 - val_loss: nan
Epoch 2/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 96ms/step - accuracy: 0.2351 - loss: nan - val_accuracy: 0.2320 - val_loss: nan
Epoch 3/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 94ms/step - accuracy: 0.2355 - loss: nan - val_accuracy: 0.2320 - val_loss: nan
Epoch 4/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 97ms/step - accuracy: 0.2343 - loss: nan - val_accuracy: 0.2320 - val_loss: nan
Epoch 5/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 95ms/step - accuracy: 0.2395 - loss: nan - val_accuracy: 0.2320 - val_loss: nan
Epoch 6/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 94ms/step - accuracy: 0.2351 - loss: nan - val_accuracy: 0.2320 - val_loss: nan
Epoch 7/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd

train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=42)


val_predictions = model.predict(val_texts)


val_predictions = val_predictions.argmax(axis=1)


val_true_labels = val_labels.argmax(axis=1)

report = classification_report(val_true_labels, val_predictions, target_names=['Class 1', 'Class 2', 'Class 3', 'Class 4'], output_dict=True)

report_df = pd.DataFrame(report).transpose()

print(report_df)


[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
              precision    recall  f1-score      support
Class 1        0.607143  0.927948  0.734024   458.000000
Class 2        0.000000  0.000000  0.000000   333.000000
Class 3        0.660714  0.939086  0.775681   394.000000
Class 4        0.842276  0.749638  0.793262   691.000000
accuracy       0.699893  0.699893  0.699893     0.699893
macro avg      0.527533  0.654168  0.575742  1876.000000
weighted avg   0.597231  0.699893  0.634299  1876.000000


In [None]:
#3. using TextVectorization with two-gram tf_idf encoding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.optimizers import Adam

vectorizer = TextVectorization(max_tokens=10000, output_mode='tf_idf', ngrams=2)
vectorizer.adapt(train_texts)


model = Sequential([
    vectorizer,
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(4, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


history = model.fit(train_texts, train_labels, epochs=10, validation_split=0.1)


Epoch 1/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 89ms/step - accuracy: 0.8622 - loss: 0.5376 - val_accuracy: 0.9598 - val_loss: 0.2051
Epoch 2/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 85ms/step - accuracy: 0.9821 - loss: 0.0840 - val_accuracy: 0.9615 - val_loss: 0.1879
Epoch 3/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 85ms/step - accuracy: 0.9908 - loss: 0.0411 - val_accuracy: 0.9654 - val_loss: 0.2213
Epoch 4/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 85ms/step - accuracy: 0.9919 - loss: 0.0331 - val_accuracy: 0.9575 - val_loss: 0.2599
Epoch 5/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 88ms/step - accuracy: 0.9916 - loss: 0.0391 - val_accuracy: 0.9657 - val_loss: 0.2466
Epoch 6/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 84ms/step - accuracy: 0.9953 - loss: 0.0242 - val_accuracy: 0.9674 - val_loss: 0.2262
Epoch 7/10
[

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd



train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=42)


val_predictions = model.predict(val_texts)

val_predictions = val_predictions.argmax(axis=1)


val_true_labels = val_labels.argmax(axis=1)


report = classification_report(val_true_labels, val_predictions, target_names=['Class 1', 'Class 2', 'Class 3', 'Class 4'], output_dict=True)


report_df = pd.DataFrame(report).transpose()


print(report_df)


[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
              precision    recall  f1-score      support
Class 1        0.609635  0.926768  0.735471   396.000000
Class 2        1.000000  0.003534  0.007042   283.000000
Class 3        0.660886  0.947514  0.778661   362.000000
Class 4        0.840989  0.735703  0.784831   647.000000
accuracy       0.703199  0.703199  0.703199     0.703199
macro avg      0.777878  0.653380  0.576501  1688.000000
weighted avg   0.774749  0.703199  0.641529  1688.000000


In [None]:
# 4. Define TextVectorization layer with max length 200, max tokens = 10000, and output mode = 'int'

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, TextVectorization
from tensorflow.keras.optimizers import Adam

vectorizer = TextVectorization(
    max_tokens=10000,
    output_mode='int',
    output_sequence_length=200
)

vectorizer.adapt(train_texts)

model = Sequential([
    vectorizer,
    Embedding(input_dim=10000, output_dim=32),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(4, activation='softmax')
])


model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


history = model.fit(train_texts, train_labels, epochs=5, validation_split=0.1)


Epoch 1/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.3964 - loss: 1.3321 - val_accuracy: 0.4255 - val_loss: 1.2953
Epoch 2/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 12ms/step - accuracy: 0.4871 - loss: 1.1798 - val_accuracy: 0.4159 - val_loss: 1.2925
Epoch 3/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - accuracy: 0.4585 - loss: 1.2263 - val_accuracy: 0.6813 - val_loss: 0.8134
Epoch 4/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 12ms/step - accuracy: 0.7264 - loss: 0.6868 - val_accuracy: 0.7841 - val_loss: 0.5607
Epoch 5/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 12ms/step - accuracy: 0.8987 - loss: 0.3867 - val_accuracy: 0.9280 - val_loss: 0.3540


In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd



train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=42)
val_predictions = model.predict(val_texts)

val_predictions = val_predictions.argmax(axis=1)

val_true_labels = val_labels.argmax(axis=1)
report = classification_report(val_true_labels, val_predictions, target_names=['Class 1', 'Class 2', 'Class 3', 'Class 4'], output_dict=True)

report_df = pd.DataFrame(report).transpose()


print(report_df)


[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
              precision    recall  f1-score      support
Class 1        0.571942  0.893258  0.697368   356.000000
Class 2        0.000000  0.000000  0.000000   259.000000
Class 3        0.684564  0.932927  0.789677   328.000000
Class 4        0.866538  0.776430  0.819013   577.000000
accuracy       0.705263  0.705263  0.705263     0.705263
macro avg      0.530761  0.650654  0.576515  1520.000000
weighted avg   0.610619  0.705263  0.644637  1520.000000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# 5. using GloVe
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, TextVectorization
from tensorflow.keras.optimizers import Adam

vectorizer = TextVectorization(
    max_tokens=10000,
    output_mode='int',
    output_sequence_length=200
)

vectorizer.adapt(train_texts)

def load_glove_embeddings(glove_file, embedding_dim=100):
    embeddings_index = {}
    with open(glove_file, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

glove_file = '/content/drive/MyDrive/Datasets/glove.6B.100d.txt'
embedding_dim = 100

embeddings_index = load_glove_embeddings(glove_file, embedding_dim)

embedding_matrix = np.zeros((10000, embedding_dim))
for i in range(10000):
    word = vectorizer.get_vocabulary()[i]
    if word in embeddings_index:
        embedding_matrix[i] = embeddings_index[word]

model = Sequential([
    vectorizer,
    Embedding(input_dim=10000, output_dim=embedding_dim,
              weights=[embedding_matrix], input_length=200, trainable=False),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(4, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_texts, train_labels, epochs=10, validation_split=0.1)




Epoch 1/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 12ms/step - accuracy: 0.4820 - loss: 1.1885 - val_accuracy: 0.6779 - val_loss: 0.8387
Epoch 2/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - accuracy: 0.6939 - loss: 0.7941 - val_accuracy: 0.7255 - val_loss: 0.7341
Epoch 3/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 11ms/step - accuracy: 0.6313 - loss: 0.8809 - val_accuracy: 0.6941 - val_loss: 0.8218
Epoch 4/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - accuracy: 0.7080 - loss: 0.7543 - val_accuracy: 0.7227 - val_loss: 0.7416
Epoch 5/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - accuracy: 0.6928 - loss: 0.7913 - val_accuracy: 0.6459 - val_loss: 0.9762
Epoch 6/10
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - accuracy: 0.7155 - loss: 0.7900 - val_accuracy: 0.7742 - val_loss: 0.6185
Epoch 7/10
[1m9

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd



train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=42)
val_predictions = model.predict(val_texts)

val_predictions = val_predictions.argmax(axis=1)

val_true_labels = val_labels.argmax(axis=1)


report = classification_report(val_true_labels, val_predictions, target_names=['Class 1', 'Class 2', 'Class 3', 'Class 4'], output_dict=True)

report_df = pd.DataFrame(report).transpose()

print(report_df)


[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
              precision    recall  f1-score      support
Class 1        0.587549  0.917933  0.716489   329.000000
Class 2        0.000000  0.000000  0.000000   236.000000
Class 3        0.698980  0.910299  0.790765   301.000000
Class 4        0.850649  0.782869  0.815353   502.000000
accuracy       0.708333  0.708333  0.708333     0.708333
macro avg      0.534294  0.652775  0.580652  1368.000000
weighted avg   0.607253  0.708333  0.645506  1368.000000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
 #6 with FastText
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, TextVectorization
from tensorflow.keras.optimizers import Adam

vectorizer = TextVectorization(
    max_tokens=10000,
    output_mode='int',
    output_sequence_length=200
)

vectorizer.adapt(train_texts)

def load_fasttext_embeddings(fasttext_file, embedding_dim=100):
    embeddings_index = {}
    with open(fasttext_file, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.rstrip().split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

fasttext_file = '/content/drive/MyDrive/Datasets/wiki-news-300d-1M.vec'
embedding_dim = 300

embeddings_index = load_fasttext_embeddings(fasttext_file, embedding_dim)

embedding_matrix = np.zeros((10000, embedding_dim))
for i in range(10000):
    word = vectorizer.get_vocabulary()[i]
    if word in embeddings_index:
        embedding_matrix[i] = embeddings_index[word]

model = Sequential([
    vectorizer,
    Embedding(input_dim=10000, output_dim=embedding_dim,
              weights=[embedding_matrix], input_length=200, trainable=False),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(4, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_texts, train_labels, epochs=5, validation_split=0.1)


Epoch 1/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.4225 - loss: 1.2780 - val_accuracy: 0.7221 - val_loss: 0.7849
Epoch 2/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - accuracy: 0.6941 - loss: 0.7727 - val_accuracy: 0.7278 - val_loss: 0.7724
Epoch 3/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - accuracy: 0.6647 - loss: 0.8043 - val_accuracy: 0.5929 - val_loss: 0.9728
Epoch 4/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 12ms/step - accuracy: 0.4793 - loss: 1.1818 - val_accuracy: 0.5649 - val_loss: 1.0088
Epoch 5/5
[1m993/993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 20ms/step - accuracy: 0.5906 - loss: 0.9535 - val_accuracy: 0.7000 - val_loss: 0.7521


In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd


train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=42)

val_predictions = model.predict(val_texts)

val_predictions = val_predictions.argmax(axis=1)

val_true_labels = val_labels.argmax(axis=1)

report = classification_report(val_true_labels, val_predictions, target_names=['Class 1', 'Class 2', 'Class 3', 'Class 4'], output_dict=True)

report_df = pd.DataFrame(report).transpose()
print(report_df)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
              precision    recall  f1-score     support
Class 1        0.609544  0.936667  0.738502   300.00000
Class 2        1.000000  0.004630  0.009217   216.00000
Class 3        0.624277  0.919149  0.743546   235.00000
Class 4        0.836879  0.737500  0.784053   480.00000
accuracy       0.692120  0.692120  0.692120     0.69212
macro avg      0.767675  0.649486  0.568829  1231.00000
weighted avg   0.769513  0.692120  0.629261  1231.00000
