<a href="https://colab.research.google.com/github/elliemci/chatbots/blob/main/toxicity_classification_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

NLP toxicity classification integrated into a telegram chatbot that classifyes any user message as toxic or not.

In [None]:
# install Python transformers development version
!pip install transformers[sentencepiece] pyTelegramBotAPI

Collecting pyTelegramBotAPI
  Downloading pyTelegramBotAPI-4.14.0.tar.gz (243 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.1/243.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentencepiece!=0.1.92,>=0.1.91 (from transformers[sentencepiece])
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pyTelegramBotAPI
  Building wheel for pyTelegramBotAPI (setup.py) ... [?25l[?25hdone
  Created wheel for pyTelegramBotAPI: filename=pyTelegramBotAPI-4.14.0-py3-none-any.whl size=215250 sha256=3d4e78eda6fbb7789423bb1138a6c12151818ad32a887366e7f41ab04ae89a45
  Stored in directory: /root/.cache/pip/wheels/25/51/2d/24b40a366c85c37928d5aa36ddf257e5a79fad25e1ecd11b2c
Successfully built pyTelegramBot

In [6]:
import telebot

from transformers import TextClassificationPipeline, AutoTokenizer, AutoModelForSequenceClassification

In [7]:
import os
from getpass import getpass

# text /newbot to the BotFather in the Telegram Chat choose a bot's name and username
os.environ['TELEGRAM_BOT_TOKEN'] = getpass('Enter your bot token: ')
TOKEN = os.getenv('TELEGRAM_BOT_TOKEN')

# create the bot an instance of TeleBot when set the token
bot = telebot.TeleBot(TOKEN)

Enter your bot token: ··········


In [8]:
# Use toxic-bert model from Hugging Face library for toxic comments classification
classifier = TextClassificationPipeline(model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert"),
                                        tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert"))

# define a dictionary to store the state of the conversation
conversation_state = {}

# function that handles all incoming messages, filter always returns True
@bot.message_handler(func=lambda message: True)
def handle_message(message):
    global conversation_state
    # get the chat id
    chat_id = message.chat.id
    # check if the chat id is in the conversation state dictionary
    if chat_id not in conversation_state:
      conversation_state[chat_id] = "start"
      print(f"conversation state {conversation_state}")
    # get the current state of the conversation
    state = conversation_state[chat_id]

    # check the content of the message and classify it as toxic or non-toxic
    if state == "start":
      bot.send_message(chat_id, "Hi, how can I help you?")
      conversation_state[chat_id] = "waiting_for_input"
    elif state == "waiting_for_input":
      # get user's message intent
      classification = classifier(message.text)[0]
      print(f"classified as {classification['label']} with score {classification['score']}")
      # get the label of the intent
      label = classification["label"]
      # get the score of the intent
      score = classification["score"]
      # reply with "toxic" if score is higher than 0.5, "non-toxic" if score < 0.1,
      # otherwise reply with default message
      if score > 0.5:
        bot.send_message(chat_id, f"I consider this a toxic message")
      elif score < 0.15:
        bot.send_message(chat_id, f"Message doesn't look toxic")
      else:
        bot.send_message(chat_id, "I'm sorry, I am not sure of the message toxicity content.")
        conversation_state[chat_id] = "waiting_for_input"

# start the bot
bot.polling()

conversation state {6432666165: 'start'}
classified as toxic with score 0.9265441298484802
classified as toxic with score 0.0009226974216289818
classified as toxic with score 0.0007705428288318217
classified as toxic with score 0.0005617672577500343
