In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
import joblib
import requests 

In [21]:
data = pd.read_csv('dataset.csv')

In [22]:
data

Unnamed: 0,page_id,text,label,Pattern Category
0,1012,FLASH SALE | LIMITED TIME ONLY Shop Now,1,Urgency
1,158,Pillowcases & Shams,0,Not Dark Pattern
2,108,Write a review,0,Not Dark Pattern
3,1425,"To start your return, simply click on the foll...",0,Not Dark Pattern
4,1658,newsletter signup (privacy policy),0,Not Dark Pattern
...,...,...,...,...
2351,1304,Bra Panty Set,0,Not Dark Pattern
2352,1038,DressesFootwearHatsHoodies & SweatshirtsJersey...,0,Not Dark Pattern
2353,1081,Only 4 left in stock,1,Scarcity
2354,767,Trending Players,0,Not Dark Pattern


In [23]:
print(data.head())

   page_id                                               text  label  \
0     1012            FLASH SALE | LIMITED TIME ONLY Shop Now      1   
1      158                                Pillowcases & Shams      0   
2      108                                     Write a review      0   
3     1425  To start your return, simply click on the foll...      0   
4     1658                newsletter signup (privacy policy)       0   

   Pattern Category  
0           Urgency  
1  Not Dark Pattern  
2  Not Dark Pattern  
3  Not Dark Pattern  
4  Not Dark Pattern  


In [24]:
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)

In [25]:
vectorizer = CountVectorizer(stop_words='english')

In [26]:
model = MultinomialNB()

In [27]:
pipeline = Pipeline([
    ('vectorizer', vectorizer),
    ('classifier', model)
])

In [28]:
pipeline.fit(X_train, y_train)

In [29]:
y_pred = pipeline.predict(X_test)

In [30]:
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.8008474576271186
              precision    recall  f1-score   support

           0       0.96      0.64      0.77       244
           1       0.72      0.97      0.83       228

    accuracy                           0.80       472
   macro avg       0.84      0.81      0.80       472
weighted avg       0.84      0.80      0.80       472



In [31]:
joblib.dump(pipeline, 'dark_pattern_model.joblib')

['dark_pattern_model.joblib']

In [38]:
def predict_dark_pattern(text):
    model = joblib.load('dark_pattern_model.joblib')
    prediction = model.predict([text])
    return "Dark Pattern" if prediction[0] == 1 else "Not Dark Pattern"

In [39]:
sample_text = "FLASH SALE | LIMITED TIME ONLY Shop Now"

In [40]:
print(predict_dark_pattern(sample_text))

Dark Pattern


In [41]:
from bs4 import BeautifulSoup
import requests

In [42]:
def get_text_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  
        soup = BeautifulSoup(response.text, 'html.parser')
        text = soup.get_text(separator=' ')
        return text
    except requests.RequestException as e:
        print(f"An error occurred: {e}")
        return ""


In [45]:
webpage_text =get_text_from_url('https://www.flipkart.com/6bo/ai3/0xm/~cs-a5y58608su/pr?sid=6bo%2Cai3%2C0xm&collection-tab-name=USB+Gadget&param=5641&hpid=d4EPbInicQqc_QH_VKsz0qp7_Hsxr70nj65vMAAFKlc%3D&ctx=eyJjYXJkQ29udGV4dCI6eyJhdHRyaWJ1dGVzIjp7InZhbHVlQ2FsbG91dCI6eyJtdWx0aVZhbHVlZEF0dHJpYnV0ZSI6eyJrZXkiOiJ2YWx1ZUNhbGxvdXQiLCJpbmZlcmVuY2VUeXBlIjoiVkFMVUVfQ0FMTE9VVCIsInZhbHVlcyI6WyJGcm9tIOKCuTE3OSJdLCJ2YWx1ZVR5cGUiOiJNVUxUSV9WQUxVRUQifX0sImhlcm9QaWQiOnsic2luZ2xlVmFsdWVBdHRyaWJ1dGUiOnsia2V5IjoiaGVyb1BpZCIsImluZmVyZW5jZVR5cGUiOiJQSUQiLCJ2YWx1ZSI6IlVTR0c0WjNLRVdIWVcyTUciLCJ2YWx1ZVR5cGUiOiJTSU5HTEVfVkFMVUVEIn19LCJ0aXRsZSI6eyJtdWx0aVZhbHVlZEF0dHJpYnV0ZSI6eyJrZXkiOiJ0aXRsZSIsImluZmVyZW5jZVR5cGUiOiJUSVRMRSIsInZhbHVlcyI6WyJVU0IgR2FkZ2V0cyJdLCJ2YWx1ZVR5cGUiOiJNVUxUSV9WQUxVRUQifX19fX0%3D')
print(predict_dark_pattern(webpage_text))

An error occurred: 500 Server Error: Internal Server Error for url: https://www.flipkart.com/6bo/ai3/0xm/~cs-a5y58608su/pr?sid=6bo%2Cai3%2C0xm&collection-tab-name=USB+Gadget&param=5641&hpid=d4EPbInicQqc_QH_VKsz0qp7_Hsxr70nj65vMAAFKlc%3D&ctx=eyJjYXJkQ29udGV4dCI6eyJhdHRyaWJ1dGVzIjp7InZhbHVlQ2FsbG91dCI6eyJtdWx0aVZhbHVlZEF0dHJpYnV0ZSI6eyJrZXkiOiJ2YWx1ZUNhbGxvdXQiLCJpbmZlcmVuY2VUeXBlIjoiVkFMVUVfQ0FMTE9VVCIsInZhbHVlcyI6WyJGcm9tIOKCuTE3OSJdLCJ2YWx1ZVR5cGUiOiJNVUxUSV9WQUxVRUQifX0sImhlcm9QaWQiOnsic2luZ2xlVmFsdWVBdHRyaWJ1dGUiOnsia2V5IjoiaGVyb1BpZCIsImluZmVyZW5jZVR5cGUiOiJQSUQiLCJ2YWx1ZSI6IlVTR0c0WjNLRVdIWVcyTUciLCJ2YWx1ZVR5cGUiOiJTSU5HTEVfVkFMVUVEIn19LCJ0aXRsZSI6eyJtdWx0aVZhbHVlZEF0dHJpYnV0ZSI6eyJrZXkiOiJ0aXRsZSIsImluZmVyZW5jZVR5cGUiOiJUSVRMRSIsInZhbHVlcyI6WyJVU0IgR2FkZ2V0cyJdLCJ2YWx1ZVR5cGUiOiJNVUxUSV9WQUxVRUQifX19fX0%3D
Dark Pattern
