In [5]:
import pandas as pd

selected_classification = "Pattern Category"

df = pd.read_csv('dark_patterns.csv')

df = df[pd.notnull(df["Pattern String"])]
col = ["Pattern String", selected_classification]
df = df[col]

print(df[selected_classification].value_counts())

Scarcity         678
Social Proof     314
Misdirection     237
Urgency          237
Obstruction       30
Sneaking          12
Forced Action      4
Name: Pattern Category, dtype: int64


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn import metrics
from joblib import dump

from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm, tree
from sklearn.linear_model import SGDClassifier, LogisticRegression


df1 = pd.read_csv('normie.csv')
df2 = pd.read_csv('dark_patterns.csv')

df1 = df1[pd.notnull(df1["Pattern String"])]
df1 = df1[df1["classification"] == 0]
df1["classification"] = "Not Dark"
df1.drop_duplicates(subset="Pattern String")

df2 = df2[pd.notnull(df2["Pattern String"])]
df2["classification"] = "Dark"
col = ["Pattern String", "classification"]
df2 = df2[col]

df = pd.concat([df1, df2])

X_train, X_test, y_train, y_test = train_test_split(
    df['Pattern String'], df["classification"], random_state=42, test_size=.3)
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

# Model creation

classifiers = []
accs = []
cms = []

classifiers.append(BernoulliNB())
classifiers.append(MultinomialNB())
classifiers.append(RandomForestClassifier())
classifiers.append(svm.SVC())
classifiers.append(tree.DecisionTreeClassifier())
classifiers.append(SGDClassifier())
classifiers.append(LogisticRegression())

for clf in classifiers:
    clf.fit(X_train_tfidf, y_train)
    y_pred = clf.predict(count_vect.transform(X_test))
    accs.append(metrics.accuracy_score(y_test, y_pred))
    cms.append(metrics.confusion_matrix(y_test, y_pred))

for i in range(len(classifiers)):
    print(f"{classifiers[i]} accuracy: {accs[i]}")
    print(f"Confusion Matris: {cms[i]}")

BernoulliNB() accuracy: 0.9686888454011742
Confusion Matris: [[410  19]
 [ 13 580]]
MultinomialNB() accuracy: 0.9334637964774951
Confusion Matris: [[419  10]
 [ 58 535]]
RandomForestClassifier() accuracy: 0.9452054794520548
Confusion Matris: [[402  27]
 [ 29 564]]
SVC() accuracy: 0.6956947162426614
Confusion Matris: [[120 309]
 [  2 591]]
DecisionTreeClassifier() accuracy: 0.9187866927592955
Confusion Matris: [[375  54]
 [ 29 564]]
SGDClassifier() accuracy: 0.9461839530332681
Confusion Matris: [[420   9]
 [ 46 547]]
LogisticRegression() accuracy: 0.9452054794520548
Confusion Matris: [[416  13]
 [ 43 550]]


In [4]:
import pandas as pd

# Load the dataset
df = pd.read_csv('dark_patterns.csv')

# Extract unique values of "Pattern Category"
pattern_categories = df['Pattern Category'].unique()

# Save unique pattern categories to another file
with open('pattern_categories.txt', 'w') as f:
    for category in pattern_categories:
        f.write(category + '\n')


In [2]:
import pandas as pd
import random

# Load the dataset
df = pd.read_csv('dark_patterns.csv')

# Get unique existing categories
existing_categories = df['Pattern Category'].dropna().unique()

# Fill blank or undefined categories with a random value
df['Pattern Category'] = df['Pattern Category'].apply(lambda x: random.choice(existing_categories) if pd.isna(x) else x)

# Save the updated dataframe
df.to_csv('darkpatterns_filled.csv', index=False)
