In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import LabelBinarizer

# Load the dataset
df = pd.read_csv('data.csv')

# Combine 'post_title' and 'post_text' into a single column 'text'
df['text'] = df['post_title'] + ' ' + df['post_text']

# Define feature and target columns
feature_cols = ['text']
target_cols = df.columns[2:11]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[feature_cols], df[target_cols], test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train['text'])
X_test_vect = vectorizer.transform(X_test['text'])

# Initialize lists to store overall scores
overall_accuracy = []
overall_precision = []
overall_recall = []
overall_f1 = []
overall_auc_roc = []

# Iterate over each target column
for col in target_cols:
    print(f"Training model for: {col}")
    
    # Initialize and train the Gradient Boosting Classifier
    model = GradientBoostingClassifier()
    model.fit(X_train_vect, y_train[col])
    
    # Make predictions
    y_pred = model.predict(X_test_vect)
    
    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test[col], y_pred)
    precision = precision_score(y_test[col], y_pred, pos_label='yes', zero_division=1)  
    recall = recall_score(y_test[col], y_pred, pos_label='yes', zero_division=1)  
    f1 = f1_score(y_test[col], y_pred, pos_label='yes', zero_division=1)  
    binarizer = LabelBinarizer()
    y_test_binarized = binarizer.fit_transform(y_test[col])
    y_pred_binarized = binarizer.transform(y_pred)
    auc_roc = roc_auc_score(y_test_binarized, y_pred_binarized)
    
    # Append individual scores to overall lists
    overall_accuracy.append(accuracy)
    overall_precision.append(precision)
    overall_recall.append(recall)
    overall_f1.append(f1)
    overall_auc_roc.append(auc_roc)
    
    # Print evaluation metrics
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print(f"AUC-ROC: {auc_roc}\n")

# Calculate overall scores
overall_accuracy_avg = sum(overall_accuracy) / len(overall_accuracy)
overall_precision_avg = sum(overall_precision) / len(overall_precision)
overall_recall_avg = sum(overall_recall) / len(overall_recall)
overall_f1_avg = sum(overall_f1) / len(overall_f1)
overall_auc_roc_avg = sum(overall_auc_roc) / len(overall_auc_roc)

print("Overall Accuracy:", overall_accuracy_avg)
print("Overall Precision:", overall_precision_avg)
print("Overall Recall:", overall_recall_avg)
print("Overall F1 Score:", overall_f1_avg)
print("Overall AUC-ROC Score:", overall_auc_roc_avg)


Training model for: Feeling-bad-about-yourself-or-that-you-are-a-failure-or-have-let-yourself-or-your-family-down
Accuracy: 0.8379052369077307
Precision: 0.8421052631578947
Recall: 0.9940828402366864
F1 Score: 0.9118046132971506
AUC-ROC: 0.4970414201183432

Training model for: Feeling-down-depressed-or-hopeless
Accuracy: 0.8104738154613467
Precision: 0.8202531645569621
Recall: 0.9848024316109423
F1 Score: 0.8950276243093924
AUC-ROC: 0.49934566024991556

Training model for: Feeling-tired-or-having-little-energy
Accuracy: 0.7281795511221946
Precision: 0.7065217391304348
Recall: 0.4421768707482993
F1 Score: 0.5439330543933054
AUC-ROC: 0.6679388290749371

Training model for: Little-interest-or-pleasure-in-doing 
Accuracy: 0.5311720698254364
Precision: 0.5064102564102564
Recall: 0.41578947368421054
F1 Score: 0.4566473988439307
AUC-ROC: 0.525430281865802

Training model for: Moving-or-speaking-so-slowly-that-other-people-could-have-noticed-Or-the-opposite-being-so-fidgety-or-restless-that-yo