In [146]:
import tensorflow as tf
import os
import numpy as np 
from sklearn.utils import shuffle
import pandas as pd 
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

# Data

In [147]:
train_df,test_df = pd.read_csv("seed.csv"), pd.read_csv('test.csv')
total_df = pd.concat([train_df,test_df],axis=0)
label_to_id = {label : id  for id, label in enumerate(total_df.label.unique())}
test_df['label'] = test_df['label'].map(label_to_id)
examples = test_df['example'][:150]
labels = test_df['label'][:150]

# text_vec_layer

In [148]:
vocab_size = 10000  # Set your vocabulary size
max_sequence_length = 150  # Set your desired sequence length
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens=vocab_size,  output_sequence_length=max_sequence_length )

In [149]:
text_vec_layer.adapt(train_df['example']) # import step
vectorized_examples = text_vec_layer(examples) # import steps 

- Example

In [150]:
vectorized_examples

<tf.Tensor: shape=(150, 150), dtype=int64, numpy=
array([[  2,  84,   4, ...,   0,   0,   0],
       [ 13, 301,   2, ...,   0,   0,   0],
       [  2,  22,   6, ...,   0,   0,   0],
       ...,
       [  3,   8,  26, ...,   0,   0,   0],
       [ 83, 135,   2, ...,   0,   0,   0],
       [  9, 349,  11, ...,   0,   0,   0]], dtype=int64)>

# Embedding Layer

In [151]:
# Embedding layer
embed_size = 128  # Set your desired embedding size
embedding_layer = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_size)
# Apply the embedding layer to the vectorized examples
embedded_examples = embedding_layer(vectorized_examples)

# Print the shape of the output
print("TextVectorization Output Shape:", vectorized_examples.shape)
print("Embedding Output Shape:", embedded_examples.shape)

TextVectorization Output Shape: (150, 150)
Embedding Output Shape: (150, 150, 128)
