In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SpatialDropout1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import Accuracy

# Load data from CSV
data = pd.read_csv('data.csv')

# Split data into features (X) and target (y)
X = data['Text']
y = data['Target']

# Initialize and fit tokenizer
vocab_size = 700
tokenizer = Tokenizer(num_words=vocab_size, split=' ')
tokenizer.fit_on_texts(X.values)

# Convert text data to sequences and pad them
X = tokenizer.texts_to_sequences(X.values)
X = pad_sequences(X)

# Convert target variable to categorical dummies
y = pd.get_dummies(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Build the model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=120, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(150, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(y.shape[1], activation='softmax'))  # Adjust activation based on the number of classes

# Compile the model
model.compile(loss=BinaryCrossentropy(), optimizer=Adam(), metrics=[Accuracy()])

# Train the model
epochs = 10
batch_size = 64
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")


In [10]:
# Creating the tokenizer with 700 vocab size
tokenizer = Tokenizer(num_words = 700, split = ' ') 

tokenizer.fit_on_texts(data['Text'].values)

# converting text to sequences
X = tokenizer.texts_to_sequences(data['Text'].values)

# Padding the sequences
X = pad_sequences(X)
# creating the target variable
y = pd.get_dummies(data['Target'])

# Splitting the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)
# Initializing the sequential model
model = Sequential()
# Adding Embedding layer with 700 vocabularies, embedding vectors of 120, and input size of the train data
model.add(Embedding(700, 120, input_length = X_train.shape[1]))
# Adding SpatialDropout1D with ratio of 0.2
model.add(SpatialDropout1D(0.2))
# Adding LSTM layer with 150 units having both dropout and recurrent_dropout ratio of 0.2
model.add(LSTM(150, dropout = 0.2, recurrent_dropout = 0.2))
# Adding output layer with 2 units with sigmoid as activation function
model.add(Dense(2, activation = 'sigmoid'))

# compile the model
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])\
# Summary of the model
model.summary()
# specifying the batch size 
batch_size = 64

# fitting the model on the training data with 10 epochs
his = model.fit(X_train, y_train, epochs = 10, batch_size = batch_size, verbose = 'auto')

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 167, 120)          84000     
                                                                 
 spatial_dropout1d_1 (Spatia  (None, 167, 120)         0         
 lDropout1D)                                                     
                                                                 
 lstm (LSTM)                 (None, 150)               162600    
                                                                 
 dense_1 (Dense)             (None, 2)                 302       
                                                                 
Total params: 246,902
Trainable params: 246,902
Non-trainable params: 0
_________________________________________________________________
