In [None]:
!pip install transformers sentencepiece datasets

In [None]:
from datasets import load_dataset

ds = load_dataset("hugginglearners/reddit-depression-cleaned", split='train')

ds = ds.rename_column("clean_text", "text")
ds = ds.rename_column("is_depression", "label")

ds = ds.train_test_split(test_size=0.2, shuffle=True)

ds

In [None]:
from transformers import AutoTokenizer

model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tok_func(x): return tokenizer(x["text"], padding=True, truncation=True)

tok_ds = ds.map(tok_func, batched=True)
tok_ds

In [None]:
from datasets import load_metric
import numpy as np
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

bs = 32
epochs = 3
lr = 1e-5

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

args = TrainingArguments('outputs', learning_rate=lr, warmup_ratio=0.1, lr_scheduler_type='cosine', fp16=True, 
                         evaluation_strategy="epoch", per_device_train_batch_size=bs, per_device_eval_batch_size=bs*2, 
                         num_train_epochs=epochs, weight_decay=0.01, report_to='none')

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

trainer = Trainer(model, args, train_dataset=tok_ds['train'], eval_dataset=tok_ds['test'], 
                  tokenizer=tokenizer, compute_metrics=compute_metrics)

trainer.train();

In [13]:
from transformers import TextClassificationPipeline

label_map = {
    'LABEL_0':"You're doing great!",
    'LABEL_1':"You need a hug? (っ◕‿◕)っ",
}

pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=0)

text = "I have depression of epic proportions."
print(label_map[pipe(text)[0]['label']])

You need a hug? (っ◕‿◕)っ
