In [8]:
#Importing library

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

In [9]:
# Muat dataset
df = pd.read_csv('mental-health2.csv')

# Asumsi kolom 'text' berisi kalimat dan 'label' berisi label
texts = df['text'].values
labels = df['label'].values
df.head()

Unnamed: 0,text,label
0,This seroquel withdrawal sucks Problems sleepi...,1
1,Mental/emotional pain What do you do when you ...,1
2,Bipolar and different perspectives? Hi everyon...,1
3,2019 resolution: be S.T.A.B.L.E. Hey friends! ...,1
4,"Enjoy life with this one weird trick. Yes, you...",1


In [10]:
# Calculate class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

print("Class Weights: ", class_weights_dict)

Class Weights:  {0: 1.8856749311294765, 1: 5.014652014652015, 2: 0.44047619047619047}


In [11]:
def weighted_loss(weights):
    def loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.int32)
        sample_weights = tf.gather(weights, y_true)
        unweighted_losses = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
        weighted_losses = unweighted_losses * sample_weights
        return tf.reduce_mean(weighted_losses)
    return loss

In [12]:
# Preprocessing
vocab_size = 1000
embedding_dim = 16
max_length = 20
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)


In [13]:
# Membagi data menjadi training dan testing
train_sequences, test_sequences, train_labels, test_labels = train_test_split(padded, labels, test_size=0.2, random_state=42)

train_labels = train_labels.astype(int)
test_labels = test_labels.astype(int)



In [14]:
# Membuat model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    GlobalAveragePooling1D(),
    Dense(24, activation='relu'),
    Dense(3, activation='softmax')  # 3 kelas untuk depresi, anxiety, dan bipolar
])
weights = tf.constant(class_weights, dtype=tf.float32)
model.compile(loss=weighted_loss(weights), optimizer='adam', metrics=['accuracy'])







In [15]:
# Melatih model
num_epochs = 10  
history = model.fit(train_sequences, train_labels, epochs=num_epochs, validation_data=(test_sequences, test_labels))



Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
# Evaluasi model
loss, accuracy = model.evaluate(test_sequences, test_labels)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

Loss: 0.4295767545700073
Accuracy: 0.8476990461349487


In [52]:
# Prediksi menggunakan model
new_texts = '''diee
'''
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_padded = pad_sequences(new_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)


In [53]:
predictions = model.predict(new_padded)
print(predictions)  #

[[0.43852186 0.0238173  0.5376608 ]
 [0.45506802 0.01705538 0.5278766 ]
 [0.43852186 0.0238173  0.5376608 ]
 [0.43852186 0.0238173  0.5376608 ]
 [0.47072288 0.01789435 0.51138276]]


In [47]:
# Konversi hasil prediksi ke persentase
predictions_percent = predictions * 100
for i, prediction in enumerate(predictions_percent[0]):
    print(f"Class {i}: {prediction:.2f}%")

# Interpretasi hasil prediksi
predicted_class = predictions.argmax(axis=1)
print(f'Predicted class: {predicted_class[0]}')

Class 0: 45.51%
Class 1: 1.71%
Class 2: 52.79%
Predicted class: 2


Data is imbalance. Bias so high