#  **Can mental health professionals trust AI-based systems to prevent suicide? Effects of educational intervention and explanations on Trust**

**Developer:** Adonias Caetano de Oliveira

**Version:** Interface Without XAI

## **Library installation and environment configuration**

In [None]:
from google.colab import drive

PATH = '/content/drive'
drive.mount(PATH)

Mounted at /content/drive


In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.40.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.112.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.2.0 (from gradio)
  Downloading gradio_client-1.2.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradi

In [None]:
!pip install Unidecode

Collecting Unidecode
  Downloading Unidecode-1.3.8-py3-none-any.whl.metadata (13 kB)
Downloading Unidecode-1.3.8-py3-none-any.whl (235 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Unidecode
Successfully installed Unidecode-1.3.8


In [None]:
!pip install transformers



## **Importing library**

In [None]:
# Auxiliaries
import pandas as pd
import random
import time
import datetime
import numpy as np
import io
from scipy.special import expit
import re

# Deep learning and BERT
import torch
from torch.utils.data import TensorDataset
from transformers import BertTokenizer
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification
from tqdm.notebook import tqdm

#NLP
from unidecode import unidecode
from string import punctuation

# Interface with Gradio
import gradio as gr

## **Reading the data set**

In [None]:
url = '<link google drive of dataset CSV'
file_id = url.split('/')[-2]
read_url='https://drive.google.com/uc?id=' + file_id

# read the data
dataset = pd.read_csv(read_url)

# display the first 5 rows
dataset.head()

Unnamed: 0,text,target
0,Aquela vontade de acabar com a minha vida voltou,1
1,to triste e com vontade de acabar com a minha ...,1
2,Corinthians ta querendo acabar com minha vida ...,0
3,Alguém poderia por favor me dar um tiro a acab...,1
4,TAYLOR TU VAI acabar com a minha vida MULHER,0


In [None]:
dataset.shape

(3788, 2)

## **Text pre-processing**

In [None]:
def clean(sentences):

  new_texts = []

  for text in sentences:
    text = text.lower()
    text = re.sub('@[^\s]+', '', text)
    text = unidecode(text)
    text = re.sub('<[^<]+?>','', text)
    text = ''.join(c for c in text if not c.isdigit())
    text = re.sub('((www\.[^\s]+)|(https?://[^\s]+)|(http?://[^\s]+))', '', text)
    text = ''.join(c for c in text if c not in punctuation)
    new_texts.append(text)

  return new_texts

In [None]:
dataset['text'] = clean(dataset['text'].values)

In [None]:
def get_examples_sent():
  negativos = dataset.loc[dataset['target'] == 0].sample(n = 3)
  positivos = dataset.loc[dataset['target'] == 1].sample(n = 3)

  return list(negativos['text'].values) + list(positivos['text'].values)

## **Classification with BERT**

In [None]:
PRETRAINED_LM = 'neuralmind/bert-large-portuguese-cased'
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
tokenizer

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/210k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/648 [00:00<?, ?B/s]

BertTokenizer(name_or_path='neuralmind/bert-large-portuguese-cased', vocab_size=29794, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [None]:
def encode(docs):
    '''
    This function takes list of texts and returns input_ids and attention_mask of texts
    '''
    encoded_dict = tokenizer.batch_encode_plus(docs, add_special_tokens=True, max_length=128, padding='max_length',
                            return_attention_mask=True, truncation=True, return_tensors='pt')
    input_ids = encoded_dict['input_ids']
    attention_masks = encoded_dict['attention_mask']
    return input_ids, attention_masks

In [None]:
def createDataloader(text):
  test_input_ids, test_att_masks = encode([text])
  BATCH_SIZE = 16
  test_y = torch.LongTensor([0])
  test_dataset = TensorDataset(test_input_ids, test_att_masks, test_y)
  test_sampler = SequentialSampler(test_dataset)
  test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)
  return test_dataloader

In [None]:
labels_names = ['negativo', 'positivo']

In [None]:
device = torch.device("cpu")

In [None]:
MODEL_PATH = PATH + '/My Drive/Colab Notebooks/Interface XAI - BERT - LIME/Models/model_95.bin'
def get_bert_model():
    N_labels = 2
    model = BertForSequenceClassification.from_pretrained(PRETRAINED_LM,
                                                      num_labels=N_labels,
                                                      output_attentions=False,
                                                      output_hidden_states=False)

    model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')), strict=False)

    return model

model = get_bert_model()

pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-large-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## **Classificação de Texto**

In [None]:
def predict(text):
  model.eval()
  test_dataloader = createDataloader(text)


  with torch.no_grad():
    for step_num, batch_data in tqdm(enumerate(test_dataloader)):
        input_ids, att_mask, labels = [data.to(device) for data in batch_data]
        output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)

        logits = output.logits.cpu().detach().numpy()
        index_pred = np.argmax(logits,axis=-1)[0]
        probabilities = expit(logits)[0]


  df = pd.DataFrame(columns=['Label', 'Probabilidade'])
  df['Label'] = labels_names
  df['Probabilidade'] = probabilities

  return index_pred, labels_names[index_pred], df

**Verifica se o texto é 100% negativo**

In [None]:
# Palavras que podem indicm pensamentos suicidas
suicidal_word_list = ["suicida",  "suicídio", "sucidio", "matar", "bilhete", "carta",
             "despedida", "adeus", "acabar", "vida", "nunca", "acordar",
             "acorda", "não", "nao", "consigo", "continuar", "vale", "pena",
             "viver", "pronto", "para", "pular", "dormir", "sempre", "quero",
             "morrer", "morto", "melhor", "sem", "mim", "plano", "pacto",
             "cansado", "sozinho", "dormi", "sonho", "feliz", "só", "melhores",
             "ajuda", "valor", "entende", "bom", "escuridão", "ama", "amam", "confio", "passa", "passar", "vou", "outra"]

In [None]:
def words_in_string(word_list, a_string):
    return set(word_list).intersection(a_string.split())

def is_negative(text):
  return not text or not text.strip() or not words_in_string(suicidal_word_list, text)

**Classsifica um texto**

In [None]:
def classify(text):

  if is_negative(text):
    info = f"<h4>O sistema verificou que o texto digitado não apresenta termos que podem indicar pensamento suicida!<br/>"
    info += f"Portanto, o sistema considerou o texto como 100% Negativo para ideação suicida.<br/>"
    info += f"Não foi aplicado inteligência artificial neste caso.</h4>"
    result = {"Negativo": 1, "Positivo" : 0}
  else:
    # Classificação do texto
    index_pred, label, df = predict(text)
    d = df['Probabilidade'].to_dict()
    result = {"Negativo" if k == 0 else "Positivo" : v for k,v in d.items()}

    prob = df['Probabilidade'].max()

    info = f"<h4>O sistema classificou o texto como {labels_names[index_pred]} para ideação suicida com probabilidade = {prob*100:.2f}%.</h4>"

  return result, info

###**Interface Without XAI**

In [None]:
with gr.Blocks() as demo_no_xai:
  gr.Markdown(
    """
    # Interface de reconhecimento de ideação suicida sem explicação dos resultados
    Comece a digitar abaixo para ver o resultado.
    """)
  input = gr.Textbox(label="Entrada:", placeholder="Digite ou selecione uma frase positiva ou negativa para ideação suicida...")

  gr.Examples(
        examples = get_examples_sent(),
        inputs = input
  )


  #Botão de classificação
  classificar_btn = gr.Button("Classificar")
  output = gr.Label(label="Predicted Suicidal Ideation")
  info_classification = gr.Markdown("Acione o botão classificar")
  classificar_btn.click(fn=classify, inputs=input, outputs=[output, info_classification])

demo_no_xai.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://d9a4f27b66b83dc6bd.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


