In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Dense, SpatialDropout1D

In [None]:
data = pd.read_csv('dataset.csv')


In [None]:
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

In [None]:
max_words = 10000
tokenizer = Tokenizer(num_words=max_words, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, data['label'].values, test_size=0.2, random_state=42)

In [None]:
embedding_dim = 128
lstm_units = 100

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(Bidirectional(LSTM(lstm_units)))
model.add(Dense(1, activation='relu'))


In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
batch_size = 64
epochs = 5
model.fit(X_train, y_train, validation_split=0.2, epochs=epochs, batch_size=batch_size)

In [None]:
with open(r'C:\Users\abhig\DPH Model\loaded_model\trained_bilstm.pkl', 'wb') as file:
    pickle.dump(model, file)

In [None]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

In [None]:
with open(r'C:\Users\abhig\DPH Model\loaded_model\trained_bilstm.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [None]:
new_texts = ['75% off Hurry! Sale ends soon', '12 seats remaining', 'Almost Sold Out', 'Hello','goodbye']
sequences = tokenizer.texts_to_sequences(new_texts)
padded_sequences = pad_sequences(sequences, maxlen=X.shape[1])

In [None]:
predictions = loaded_model.predict(padded_sequences)

decoded_predictions = label_encoder.inverse_transform((predictions > 0.5).astype(int).flatten())

dark_patterns = [new_texts[i] for i, label in enumerate(decoded_predictions) if label == 1]

In [None]:

print("These are Dark Patterns:")
for text in dark_patterns:
    print(text)