# Install required libraries

In [None]:
!pip install transformers torch accelerate tensorflow-hub bert-tensorflow tensorflow tqdm

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM, Trainer, TrainingArguments, BertTokenizer, BertForSequenceClassification, MarianMTModel, MarianTokenizer, BertConfig
import torch
from transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification
from transformers import LlamaTokenizer, LlamaForCausalLM
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
import pandas as pd
import tensorflow_hub as hub
import tensorflow as tf
from datetime import datetime
from torch.utils.data import DataLoader
import re
import nltk
from nltk.corpus import wordnet
import random
from tqdm import tqdm
import concurrent.futures
from nltk.tokenize import sent_tokenize
import matplotlib.pyplot as plt
import json
import numpy as np
from google.colab import drive
drive.mount("/content/drive")


# Re-read sentences with supportive label

In [None]:
input_csv_path = "/content/drive/My Drive/Diss_Dataset/predicted_supportiveness.csv"

sentences_df = pd.read_csv(input_csv_path)


# Pre-load MentaLLaMA-7b and its tokenizer

In [None]:
tokenizer = LlamaTokenizer.from_pretrained("/content/drive/My Drive/Diss_Dataset/Mental7b")
model = LlamaForCausalLM.from_pretrained(
    "/content/drive/My Drive/Diss_Dataset/Mental7b",
    device_map="auto"
)

# Define prompt for model

In [None]:
prompt = """
[INST]
Please clarify the following sentence into one of the following categories:

1: Patient is either supporting other patients, or does not mention any ongoing distress or negative feelings.
2: Patient mentions ongoing distress or negative feelings, including past trauma such as suicide of others.
3: Patient mentions thoughts of personally wanting to die or hurt themself but without any specific plan.
4: Patient mentions actions taken towards a potential suicide attempt or mentions ongoing self-harm.
5: Patient mentions recent actual suicide attempts, future planned attempts, or severe self-harm with a clear intent to die.

Only place the sentence in a higher category if it is clear that it belongs there.
Sentence: "{sentence}"

Category:
[/INST]
"""

# Provide model with prompt + sentence, to generate a rating

In [None]:
def get_classification(sentence):
    inputs = tokenizer(prompt.format(sentence=sentence), return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_length=4000, num_return_sequences=1)
    result = tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True).strip()
    match = re.search(r'\[/INST\].*?([1-5])', result, re.DOTALL)
    if match:
        rating = int(match.group(1))
        return rating
    else:
        return 1

model.to("cuda")
torch.cuda.empty_cache()


for index, row in tqdm(sentences_df.iterrows(), total=len(sentences_df), desc="Processing sentences"):
    if row["predicted_supportiveness_label"] == 0:
        sentence = row["sentence"]
        rating = get_classification(sentence)
        sentences_df.at[index, "rating"] = rating

print(sentences_df.head())

# Save rating labels to a CSV file

In [None]:
print(sentences_df.head())

csv_filenmame = "/content/drive/My Drive/Diss_Dataset/all_labels.csv"
sentences_df.to_csv(csv_filenmame, index=False)