In [33]:
import pandas as pd
import re

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [35]:
df = pd.read_csv(
    "../data/toxic_text_dataset_1000.csv",
    on_bad_lines="skip"
)

df.head()

Unnamed: 0,comment_text,is_toxic
0,very helpful support,0
1,this app is amazing,0
2,this app is amazing,0
3,really satisfied,0
4,you are stupid,1


In [36]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    return text

df["clean_text"] = df["comment_text"].apply(clean_text)
df[["comment_text", "clean_text"]].head()

Unnamed: 0,comment_text,clean_text
0,very helpful support,very helpful support
1,this app is amazing,this app is amazing
2,this app is amazing,this app is amazing
3,really satisfied,really satisfied
4,you are stupid,you are stupid


In [37]:
vectorizer = TfidfVectorizer(max_features=3000)
X = vectorizer.fit_transform(df["clean_text"])
y = df["is_toxic"]


In [38]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [39]:
model = LogisticRegression()
model.fit(X_train, y_train)


In [40]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy


0.9951219512195122

In [41]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("you are useless")
predict("thank you for your help")


'not toxic'

In [42]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("thank you")
predict("thank you for your help")


'not toxic'

In [43]:
len(df)

1025

In [44]:
df["is_toxic"].value_counts()


is_toxic
1    525
0    500
Name: count, dtype: int64

In [45]:
df[df["is_toxic"] == 0].head()


Unnamed: 0,comment_text,is_toxic,clean_text
0,very helpful support,0,very helpful support
1,this app is amazing,0,this app is amazing
2,this app is amazing,0,this app is amazing
3,really satisfied,0,really satisfied
5,great experience,0,great experience


In [46]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("thank you for your help")


'not toxic'

In [None]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("have a nice day")


'not toxic'

In [47]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("thank you very much")
predict("you are stupid")


'toxic'

In [28]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("thank you very much")

'not toxic'

In [None]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("thank you very much")

'toxic'

In [15]:
def predict(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "toxic" if result[0] == 1 else "not toxic"

predict("have a nice day")


'not toxic'

In [16]:
vectorizer.get_feature_names_out()


array(['amazing', 'an', 'and', 'angry', 'annoying', 'app', 'appreciate',
       'are', 'as', 'asshole', 'awesome', 'bad', 'broken', 'bullshit',
       'commenttext', 'day', 'design', 'dislike', 'done', 'dumb', 'ever',
       'everything', 'excellent', 'experience', 'feature', 'fuck',
       'fucked', 'fucking', 'garbage', 'go', 'good', 'great', 'happy',
       'hate', 'have', 'help', 'helpful', 'horrible', 'idiot', 'is',
       'job', 'joke', 'kill', 'love', 'makes', 'me', 'motherfucker',
       'much', 'nice', 'of', 'perfectly', 'piece', 'product', 'really',
       'regret', 'result', 'satisfied', 'serious', 'service', 'shit',
       'shitty', 'shut', 'stupid', 'sucks', 'support', 'team', 'terrible',
       'thank', 'the', 'this', 'total', 'trash', 'up', 'useful',
       'useless', 'using', 'ux', 'very', 'well', 'what', 'with', 'work',
       'works', 'worst', 'wrong', 'you', 'your', 'yourself'], dtype=object)

In [None]:
model.coef_


array([[ 0.18635283, -0.31126316,  0.21856216,  0.        ,  0.14200758,
         0.        ,  0.51571981,  0.18635283, -0.3006333 , -0.24546606,
        -0.25267273,  0.        ,  0.18635283,  0.22643678,  0.18731753,
        -0.19013719,  0.        , -0.2405956 ,  0.0322856 ,  0.21856216,
        -0.19013719, -0.43980357, -0.20003952,  0.30013246, -0.24546606,
         0.        , -0.25431002,  0.22643678,  0.21856216, -0.20003952,
        -0.25431002,  0.09959118, -0.25267273,  0.21856216, -0.10905108,
         0.        , -0.19013719, -0.28113161, -0.19013719,  0.        ,
         0.        , -0.24546606,  0.        , -0.2405956 , -0.2557968 ,
        -0.28113161,  0.        ,  0.22972358, -0.20003952,  0.26768288,
         0.        ,  0.22767353,  0.29200721,  0.        ,  0.25584443,
         0.        , -0.01134307,  0.18731753, -0.10937195, -0.19013719,
         0.22767353,  0.26768288, -0.30866071,  0.22767353,  0.22972358,
        -0.22674566,  0.        ,  0.20189357, -0.2

In [None]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(X_train, y_train)


In [29]:
predict("thank you very much")
predict("have a nice day")
predict("you are useless")


'not toxic'

In [None]:
predict("have a nice day")

'not toxic'

In [None]:
predict("nice")

'not toxic'

In [None]:
predict("thank you")

'not toxic'

In [None]:
predict("hey")

'not toxic'

In [None]:
predict("you are useless")

'toxic'

In [30]:
predict("you are a baby")

'not toxic'

In [31]:
predict("have a nice day babe")

'not toxic'

In [18]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[104   0]
 [  0 102]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       104
           1       1.00      1.00      1.00       102

    accuracy                           1.00       206
   macro avg       1.00      1.00      1.00       206
weighted avg       1.00      1.00      1.00       206



In [51]:
import joblib

joblib.dump(model, "toxic_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")


['vectorizer.pkl']

In [None]:
model = joblib.load("toxic_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")


In [48]:
predict("fuck")

'toxic'

In [49]:
predict("fuck this garbage app")

'toxic'

In [50]:
predict("Fuck")

'toxic'