In [1]:
import pandas as pd
import numpy as np
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, precision_score, recall_score

import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, Dropout, GlobalMaxPooling1D, Conv1D, BatchNormalization, MaxPooling1D, Flatten, Input
from keras.optimizers import SGD
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.regularizers import l2

import prettytable

import matplotlib.pyplot as plt
%matplotlib inline

from word2vec import *

from data_preprocessing import preprocess_text

2023-08-31 20:59:35.337154: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load the best model and make predictions

In [2]:
def predict_classes(model, text_to_predict):
    probabilities = model.predict(text_to_predict)
    # This will yield a 2D array containing one probability - the probability of the text belonging to class 1
    return {'suicide': probabilities[0][0], 'non-suicide': 1 - probabilities[0][0]}

In [7]:
# Load the tokenizer and the best model
with open('Data/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

best_model = tf.keras.models.load_model('Models/CNN_best_model.h5')

texts_to_predict = [
    'I love you',
    'I want to kill myself',
    "I'm crying tears of joy!",
    "I don't want to live anymore",
    "I hate ice cream"
]

for txt in texts_to_predict:
    # Preprocess the text
    text_to_predict = preprocess_text(txt)

    # Tokenize the text
    text_to_predict = tokenizer.texts_to_sequences([txt])

    # Pad the sequences
    text_to_predict = pad_sequences(text_to_predict, maxlen=100, padding='post')

    # Predict the class of the text. print the probability of the text belonging to each class
    prediction = predict_classes(best_model, text_to_predict)
    print(f'{txt}: {prediction}')

I love you: {'suicide': 0.18202238, 'non-suicide': 0.8179776221513748}
I want to kill myself: {'suicide': 0.8856266, 'non-suicide': 0.11437338590621948}
I'm crying tears of joy!: {'suicide': 0.3011573, 'non-suicide': 0.6988427042961121}
I don't want to live anymore: {'suicide': 0.949522, 'non-suicide': 0.05047798156738281}
I hate ice cream: {'suicide': 0.00279928, 'non-suicide': 0.997200720012188}
