In [6]:
from transformers import pipeline

# Load the masked language model
# This is DepRoBERTa pre-trained model for general language modeling
model_name = "rafalposwiata/deproberta-large-v1"
fill_mask = pipeline("fill-mask", model=model_name)

# Define depression-related words
# We'll check if the model predicts these words at the <mask> position
depression_keywords = {
    "depressed", "sad", "hopeless", "miserable", "worthless",
    "down", "tired", "empty", "lost", "lonely", "broken"
} # Is not comprehensive but good enough for the test that we are doing--

# Define the heuristic classifier function
def classify_depression(text, top_k=10, moderate_threshold=0.02, depressed_threshold=0.08): # I just have the 3 model one bc i had it before in my prelim testing.
    """
    3-level depression classifier using a masked language model.
    
    Args:
        text (str): Input text
        top_k (int): How many top mask predictions to check
        moderate_threshold (float): Score threshold for 'moderate'
        depressed_threshold (float): Score threshold for 'depressed'
        
    Returns:
        label (str): 'not_depressed', 'moderate', or 'depressed'
        score (float): Sum of probabilities for depression words
        predictions (list): List of top_k predictions from the model
    """
    # Add a masked prompt at the end of the sentence
    prompt = f"{text.strip()} I feel <mask>."
    
    # Get the model's top predictions for the masked word
    preds = fill_mask(prompt, top_k=top_k)
    
    # Calculate a "depression score" by summing probabilities of depression-related words
    score = sum(p['score'] for p in preds if p['token_str'].strip().lower() in depression_keywords)
    
    # Assign a label based on thresholds
    if score >= depressed_threshold:
        label = "depressed"
    elif score >= moderate_threshold:
        label = "moderate"
    else:
        label = "not_depressed"
    
    return label, score, preds

# Next up is to Test the classifier on some example sentences
examples = [
    "I can't get out of bed and everything feels heavy.",
    "I'm okay, just a bit tired today.",
    "I'm so excited for my vacation next week!",
    "Nothing makes sense anymore. I feel worthless.",
    "Life is alright, but I'm feeling kind of down lately.",
    "The only thing is, the withdrawels are horrible, worse than herion according to many H addicts, wish my luck, as this will be panic, anxiety inducing hell with burning skin, cold sweats, so weak I can't walk, every bone on my body hurts, my hair hurts, breathing hurts"
]
# The last example was an example taken directly from the data set to see if it would work.

#Run the classifier and print results. 
for msg in examples:
    label, score, predictions = classify_depression(msg)
    print(f" Text: {msg}")
    print(f" → Classification: {label} (score={score:.4f})")
    # Show top predictions for debugging/inspection
    print(" → Top predictions:", [(p['token_str'], f"{p['score']:.3f}") for p in predictions])
    print("-"*80)

# This is not a comprehensive approach but the heuistic should be good enough to test the model from working. We have a lot of work to do.


Device set to use cuda:0


 Text: I can't get out of bed and everything feels heavy.
 → Classification: depressed (score=0.2891)
 → Top predictions: [(' hopeless', '0.121'), (' worthless', '0.086'), (' trapped', '0.077'), (' alone', '0.063'), (' empty', '0.049'), (' useless', '0.048'), (' numb', '0.034'), (' lost', '0.033'), (' stuck', '0.030'), (' horrible', '0.029')]
--------------------------------------------------------------------------------
 Text: I'm okay, just a bit tired today.
 → Classification: not_depressed (score=0.0000)
 → Top predictions: [(' okay', '0.221'), (' good', '0.152'), (' fine', '0.137'), (' alright', '0.046'), (' great', '0.045'), (' better', '0.035'), (' normal', '0.022'), (' alone', '0.018'), (' safe', '0.016'), (' nothing', '0.016')]
--------------------------------------------------------------------------------
 Text: I'm so excited for my vacation next week!
 → Classification: not_depressed (score=0.0000)
 → Top predictions: [(' great', '0.215'), (' amazing', '0.161'), (' ready'