# Import Dependencies

In [None]:
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
from helper_functions import unzip_data, create_tensorboard_callback, plot_loss_curves,compare_historys

--2023-11-27 07:16:24--  https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10246 (10K) [text/plain]
Saving to: ‘helper_functions.py’


2023-11-27 07:16:24 (57.6 MB/s) - ‘helper_functions.py’ saved [10246/10246]



In [None]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import string
import nltk

# Unduh dataset stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
from google.colab import files

# Upload kaggle.json
uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [None]:
!pip install kaggle
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json



In [None]:
def preprocess_text(text):
    # Normalisasi teks
    text = text.lower()

    # Menghilangkan tanda baca
    text = text.translate(str.maketrans('', '', string.punctuation))

    # Tokenization
    tokens = text.split()

    # Stopword removal
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Stemming
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]

    # Menggabungkan kembali token menjadi teks
    processed_text = ' '.join(tokens)

    return processed_text

# Get Data

In [None]:
!kaggle datasets download -d infamouscoder/depression-reddit-cleaned

Downloading depression-reddit-cleaned.zip to /content
  0% 0.00/979k [00:00<?, ?B/s]
100% 979k/979k [00:00<00:00, 86.3MB/s]


In [None]:
unzip_data('/content/depression-reddit-cleaned.zip')

# Data Exploration

In [None]:
df = pd.read_csv('depression_dataset_reddit_cleaned.csv')
df.head()

Unnamed: 0,clean_text,is_depression
0,we understand that most people who reply immed...,1
1,welcome to r depression s check in post a plac...,1
2,anyone else instead of sleeping more when depr...,1
3,i ve kind of stuffed around a lot in my life d...,1
4,sleep is my greatest and most comforting escap...,1


In [None]:
df = df.rename(columns = {"clean_text":"text", "is_depression" :"target" })
df.head()

Unnamed: 0,text,target
0,we understand that most people who reply immed...,1
1,welcome to r depression s check in post a plac...,1
2,anyone else instead of sleeping more when depr...,1
3,i ve kind of stuffed around a lot in my life d...,1
4,sleep is my greatest and most comforting escap...,1


In [None]:
# Visualize some random data
import random
random_index = random.randint(0, len(df)-5)
for row in df[['text', 'target']][random_index:random_index+5].itertuples():
  _, text, target = row
  print(f'Target: {target}', '(depression)' if target > 0 else '(not depression)')
  print(f'Text:\n{text}\n')
  print('---\n')

Target: 1 (depression)
Text:
my adult child with depression is very challenging to deal with i realize that she ha depression but she refuse to get a job amp expects u to pay for her living expense in an apartment when we bring it up she cut u off i feel bad because i know doing new thing can be difficult especially for someone with depression and anxiety but i can not afford to pay for my daughter s apartment and my apartment she is finished with school in august with a college degree i offered for her to move into my apartment but she won t now she isn t talking to me because of asking her to get a job but i m still paying for everything because i dont know what else to do

---

Target: 1 (depression)
Text:
i fell pain on my left chest and on my left back when i looking for an answer on google i feel it s a symptom of heart disease my mother also ha this and a far a i know heart disease can be inherited i didn t dare to go to the doctor because in our family there is a saying that sa

# Data Preprocessing

In [None]:
# Lakukan preprocessing pada kolom 'text'
df['text'] = df['text'].apply(preprocess_text)

In [None]:
X = df['text']
y = df['target']

# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Preprocessing teks dengan TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Create Model

## KNN

In [None]:
# Inisialisasi model KNN
knn_model = KNeighborsClassifier(n_neighbors=3)

# Latih model
knn_model.fit(X_train_tfidf, y_train)

In [None]:
# Lakukan prediksi pada data uji
y_pred = knn_model.predict(X_test_tfidf)

# Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Classification report
print('\nClassification Report:\n', classification_report(y_test, y_pred))

Accuracy: 0.53

Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.99      0.68       783
           1       0.89      0.06      0.12       764

    accuracy                           0.53      1547
   macro avg       0.71      0.53      0.40      1547
weighted avg       0.70      0.53      0.40      1547



## SVM (Model yang digunakan)

In [None]:
from sklearn.svm import SVC

# Inisialisasi model SVM
svm_model = SVC(kernel='linear')

# Latih model
svm_model.fit(X_train_tfidf, y_train)

In [None]:
# Lakukan prediksi pada data uji
y_pred = svm_model.predict(X_test_tfidf)

# Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Classification report
print('\nClassification Report:\n', classification_report(y_test, y_pred))

Accuracy: 0.95

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.97      0.95       783
           1       0.97      0.94      0.95       764

    accuracy                           0.95      1547
   macro avg       0.95      0.95      0.95      1547
weighted avg       0.95      0.95      0.95      1547



In [None]:
import joblib

# assuming 'clf' is your model
joblib.dump(svm_model, 'svm_model.joblib')

['svm_model.joblib']

In [None]:
# Simpan TF-IDF vectorizer
joblib.dump(vectorizer, 'tfidf_vectorizer.joblib')

['tfidf_vectorizer.joblib']

## Naive Bayes

In [None]:
from sklearn.naive_bayes import MultinomialNB

# Inisialisasi model Naive Bayes
nb_model = MultinomialNB()

# Latih model
nb_model.fit(X_train_tfidf, y_train)

In [None]:
# Lakukan prediksi pada data uji
y_pred = nb_model.predict(X_test_tfidf)

# Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Classification report
print('\nClassification Report:\n', classification_report(y_test, y_pred))

Accuracy: 0.89

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.80      0.88       783
           1       0.83      0.97      0.89       764

    accuracy                           0.89      1547
   macro avg       0.90      0.89      0.89      1547
weighted avg       0.90      0.89      0.89      1547



# Prediction

In [None]:
#Teks kustom untuk prediksi
custom_text = ["My life is hard, i want to die"]

# Preprocessing teks kustom
preprocessed_custom_text = [preprocess_text(text) for text in custom_text]

# Melakukan vektorisasi dengan model TF-IDF
custom_text_tfidf = vectorizer.transform(preprocessed_custom_text)

# Melakukan prediksi dengan model SVM
prediction = svm_model.predict(custom_text_tfidf)

# Tampilkan hasil prediksi
for text, pred in zip(custom_text, prediction):
    print(f"Text: {text}")
    if pred == 1:
        print("Prediction: Depression")
    else:
        print("Prediction: Not Depression")

Text: My life is hard, i want to die
Prediction: Depression
