In [None]:
import numpy as np
import pandas as pd
import nltk
from datasets import load_dataset
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers.legacy import Adam
from sentence_transformers import SentenceTransformer
import random

In [None]:
dataset = load_dataset("paws", "labeled_final")

In [None]:
embedding_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

In [None]:
def get_sentence_embedding(sentence):
    embedding = embedding_model.encode(sentence)
    return embedding

def create_features(data):
    features = []
    labels = []
    for item in data:
        emb1 = get_sentence_embedding(item['sentence1'])
        emb2 = get_sentence_embedding(item['sentence2'])
        feature = np.concatenate([emb1, emb2])
        features.append(feature)
        labels.append(item['label'])
        print(item['id'])
    return np.array(features), np.array(labels)

In [None]:
new_training_dataset = dataset['train'].shard(num_shards=20,index = 0)
new_val_dataset = dataset['validation'].shard(num_shards=20,index = 0)
new_test_dataset = dataset['test'].shard(num_shards=20,index = 0)

print(new_training_dataset)
print(new_val_dataset)
print(new_test_dataset)

train_features, train_labels = create_features(new_training_dataset)
val_features, val_labels = create_features(new_val_dataset)
test_features, test_labels = create_features(new_test_dataset)


In [None]:
# Define the model
def create_model(input_shape):
    input_layer = Input(shape=input_shape)
    
    x = Dense(256, activation='relu')(input_layer)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    output_layer = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = create_model(train_features.shape[1:])

# Train the model
model.fit(train_features, train_labels, epochs=100, batch_size=32, validation_data=(val_features, val_labels))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_features, test_labels)
print(f'Test Accuracy: {test_accuracy}')