__Decision Tree Model__

In [2]:
import pandas as pd
import numpy as np
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Read and clean the dataset
df = pd.read_excel('C://Users//amins//Desktop//Cleaned Dataset.xlsx')


# Extract emotion features using VADER sentiment analysis
sia = SentimentIntensityAnalyzer()
df['sentiment_scores'] = df['Text'].apply(lambda x: sia.polarity_scores(' '.join(x)))
df['compound'] = df['sentiment_scores'].apply(lambda x: x['compound'])

# Extract statistical features
df['text_length'] = df['Text'].apply(len)
df['average_word_length'] = df['Text'].apply(lambda x: np.mean([len(word) for word in x]) if x else 0)

# Combine emotion and statistical features into a single DataFrame
feature_df = df[['compound', 'text_length', 'average_word_length']]

# Standardize features using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(feature_df)

# Get the target labels
# Assuming your target label is in a column named 'Class'
y = df['Class'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an Decision Tree classifier using Scikit-learn
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))
print(classification_report(y_test, y_pred))

Test Accuracy: 74.05%
              precision    recall  f1-score   support

 non-suicide       0.73      0.76      0.75     23287
     suicide       0.75      0.72      0.74     23128

    accuracy                           0.74     46415
   macro avg       0.74      0.74      0.74     46415
weighted avg       0.74      0.74      0.74     46415



In [3]:
import joblib

model = clf 

joblib.dump(model, 'C://Users//amins//Desktop//decision_tree_model.pkl')

['C://Users//amins//Desktop//decision_tree_model.pkl']