In [None]:
import pandas as pd
import numpy as np
import fasttext
import re
import nltk
from nltk.tokenize.toktok import ToktokTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer 
import unicodedata
from emoji import demojize

In [None]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
lemmatizer = nltk.stem.WordNetLemmatizer()

In [None]:
def text_cleaning(text_data):

  # Remove accented characters
  text_data = unicodedata.normalize('NFKD', text_data).encode('ascii', 'ignore').decode('utf-8', 'ignore')

  # Case conversion
  text_data = text_data.lower()

  # Demojize
  text_data = demojize(text_data)

  # Reducing repeated punctuations
  pattern_punct = re.compile(r'([.,/#!$%^&*?;:{}=_`~()+-])\1{1,}')
  text_data = pattern_punct.sub(r'\1', text_data)
  
  # Prevent redundant replacements of single-space with single-space
  text_data = re.sub(' {2,}',' ', text_data)
  
  # Remove special characters
  text_data = re.sub(r"[^a-zA-Z?!]+", ' ', text_data)

  # Converting text to strings
  text_data = str(text_data)

  # Tokenization
  tokenizer = ToktokTokenizer()
  text_data = tokenizer.tokenize(text_data)

  # Removing stopwords
  text_data = [item for item in text_data if item not in stop_words]
  
  # Lemmatization
  text_data = [lemmatizer.lemmatize(word = w, pos = 'v') for w in text_data]
  
  # Convert list of tokens to string data type
  text_data = ' '.join (text_data)

  return text_data

In [None]:
model = fasttext.load_model('profanity_model_eng.bin')

In [None]:
nltk.download('wordnet')

In [None]:
user_input = ""
while user_input != "stop":
    user_input = input("Enter something: ")
    print("\nYou entered:", user_input)
    user_input = text_cleaning(user_input)

    labels, probabilities = model.predict(user_input, k=2)

    for label, probability in zip(labels, probabilities):
        if label[9:] == "1":
            print(f'Profane: {round(probability*100, 1)}%')
        else:
            print(f'Clean: {round(probability*100, 1)}%')