In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
import joblib
import requests 

In [2]:
data = pd.read_csv('dataset.csv')

In [3]:
data

Unnamed: 0,page_id,text,label,Pattern Category
0,1012,FLASH SALE | LIMITED TIME ONLY Shop Now,1,Urgency
1,158,Pillowcases & Shams,0,Not Dark Pattern
2,108,Write a review,0,Not Dark Pattern
3,1425,"To start your return, simply click on the foll...",0,Not Dark Pattern
4,1658,newsletter signup (privacy policy),0,Not Dark Pattern
...,...,...,...,...
2351,1304,Bra Panty Set,0,Not Dark Pattern
2352,1038,DressesFootwearHatsHoodies & SweatshirtsJersey...,0,Not Dark Pattern
2353,1081,Only 4 left in stock,1,Scarcity
2354,767,Trending Players,0,Not Dark Pattern


In [4]:
print(data.head())

   page_id                                               text  label  \
0     1012            FLASH SALE | LIMITED TIME ONLY Shop Now      1   
1      158                                Pillowcases & Shams      0   
2      108                                     Write a review      0   
3     1425  To start your return, simply click on the foll...      0   
4     1658                newsletter signup (privacy policy)       0   

   Pattern Category  
0           Urgency  
1  Not Dark Pattern  
2  Not Dark Pattern  
3  Not Dark Pattern  
4  Not Dark Pattern  


In [5]:
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)

In [6]:
vectorizer = CountVectorizer(stop_words='english')

In [7]:
model = MultinomialNB()

In [8]:
pipeline = Pipeline([
    ('vectorizer', vectorizer),
    ('classifier', model)
])

In [9]:
pipeline.fit(X_train, y_train)

In [10]:
y_pred = pipeline.predict(X_test)

In [11]:
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.8008474576271186
              precision    recall  f1-score   support

           0       0.96      0.64      0.77       244
           1       0.72      0.97      0.83       228

    accuracy                           0.80       472
   macro avg       0.84      0.81      0.80       472
weighted avg       0.84      0.80      0.80       472



In [12]:
joblib.dump(pipeline, 'dark_pattern_model.joblib')

['dark_pattern_model.joblib']

In [13]:
def predict_dark_pattern(text):
    model = joblib.load('dark_pattern_model.joblib')
    prediction = model.predict([text])
    return "Dark Pattern" if prediction[0] == 1 else "Not Dark Pattern"

In [14]:
sample_text = "FLASH SALE | LIMITED TIME ONLY Shop Now"

In [15]:
print(predict_dark_pattern(sample_text))

Dark Pattern


In [16]:
from bs4 import BeautifulSoup
import requests

In [17]:
def get_text_from_url(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        # Get the webpage content
        response = requests.get(url, headers=headers, timeout=10)
        # Check the status code
        if response.status_code != 200:
            print(f"Error fetching the webpage: Status code {response.status_code}")
            return ""
        # Parse the content using Beautiful Soup
        soup = BeautifulSoup(response.text, 'html.parser')
        # Extract the text from the webpage
        text = soup.get_text(separator=' ')
        return text
    except requests.Timeout as e:
        print("The request timed out: ", e)
        return ""
    except requests.RequestException as e:
        print(f"An error occurred: {e}")
        return ""

In [None]:
webpage_text = get_text_from_url('	https://arigec.com/')
print(predict_dark_pattern(webpage_text))