In [12]:
# pip install transformers datasets
import torch
print("torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


from datasets import load_dataset
from transformers import pipeline, AutoTokenizer
classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    device=0        # use GPU 0
)
tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")


torch: 2.5.1+cu121
CUDA available: True
GPU: NVIDIA GeForce RTX 3060 Laptop GPU


Device set to use cuda:0


In [13]:
import os
import json

# Folders containing your test files
folders = ["../data/low", "../data/med", "../data/high"]

# Dictionary to hold all loaded texts
texts = {}

for folder in folders:
    for filename in os.listdir(folder):
        if filename.endswith(".txt"):
            file_path = os.path.join(folder, filename)
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
                
                # Use filename without extension as key
                key_base = os.path.splitext(filename)[0]
                
                # Save all keys from the JSON file (like 'text', 'reference_text') inside a sub-dictionary
                texts[key_base] = {k: v for k, v in data.items()}

# Example access
print(texts["test1"]["text"])
print(texts["DocumentForParsing2"]["reference_text"])

Reports of unidentified drones hovering over Temple, Texas have residents on edge. Residents demand transparency and urgent action from elected leaders.
Killeen Cyber Attack Disrupts Local Power Plant. KILLEEN, TX – On the morning of March 26th, the Killeen Power Plant experienced a cyber-attack that cut power to about 2,300 homes in Killeen. Operators detected unusual network activity at 4:00 a.m. and shut off four feeder circuits as a precaution. Workers are facilitating return of power and expect power to return at around 10:00am. What Happened Systems Hit: Main control server and backup communication lines. Impact: Widespread outages in four neighborhoods. Response: Plant staff followed emergency protocols and alerted the Killeen Police Department’s Cyber Crimes Unit. Investigation Underway The Cyber Crimes Unit is working with CISA and the FBI to trace the attack. So far, investigators have found: Malware in the plant’s network logs. IP addresses using anonymizing services. Signs 

In [14]:
from collections import defaultdict


def chunk_text(text, tokenizer, max_tokens=480):
    tokens = tokenizer.encode(text)
    chunks = [tokens[i:i + max_tokens] for i in range(0, len(tokens), max_tokens)]
    return [tokenizer.decode(chunk) for chunk in chunks]


def avg_emotion_scores(text, classifier, tokenizer):
    if (len(tokenizer.encode(text)) <= 480):
        result = classifier(text)[0]
        return {item['label']: item['score'] for item in result}

    chunks = chunk_text(text, tokenizer)
    cumulative_scores = defaultdict(float)
    total_chars = 0

    for chunk in chunks:
        result = classifier(chunk)[0]
        chunk_len = len(chunk)
        total_chars += chunk_len
        for item in result:
            cumulative_scores[item['label']] += item['score'] * chunk_len

    averaged_scores = {label: score / total_chars for label, score in cumulative_scores.items()}
    return averaged_scores

In [15]:
test1results = avg_emotion_scores(texts["test1"]["text"], classifier, tokenizer)
print(test1results)

test2results = avg_emotion_scores(texts["test2"]["text"], classifier, tokenizer)
print(test2results)

test3results = avg_emotion_scores(texts["test3"]["text"], classifier, tokenizer)
print(test3results)

test4results = avg_emotion_scores(texts["test4"]["text"], classifier, tokenizer)
print(test4results)

test5results = avg_emotion_scores(texts["test5"]["text"], classifier, tokenizer)
print(test5results)

test6results = avg_emotion_scores(texts["test6"]["text"], classifier, tokenizer)
print(test6results)

test7results = avg_emotion_scores(texts["test7"]["text"], classifier, tokenizer)
print(test7results)

test8results = avg_emotion_scores(texts["test8"]["text"], classifier, tokenizer)
print(test8results)

test9results = avg_emotion_scores(texts["test9"]["text"], classifier, tokenizer)
print(test9results)

test10results = avg_emotion_scores(texts["test10"]["text"], classifier, tokenizer)
print(test10results)

test11results = avg_emotion_scores(texts["test11"]["text"], classifier, tokenizer)
print(test11results)

test12results = avg_emotion_scores(texts["test12"]["text"], classifier, tokenizer)
print(test12results)

docForParsingResults = avg_emotion_scores(texts["DocumentForParsing"]["text"], classifier, tokenizer)
print(docForParsingResults)

docForParsing2Results = avg_emotion_scores(texts["DocumentForParsing2"]["reference_text"], classifier, tokenizer)
print(docForParsing2Results)


Token indices sequence length is longer than the specified maximum sequence length for this model (1129 > 512). Running this sequence through the model will result in indexing errors


{'fear': 0.7887980937957764, 'neutral': 0.09400472044944763, 'anger': 0.09049765020608902, 'surprise': 0.010985282249748707, 'disgust': 0.009524580091238022, 'sadness': 0.003922010771930218, 'joy': 0.0022676396183669567}
{'fear': 0.7549806833267212, 'anger': 0.14637158811092377, 'sadness': 0.049290768802165985, 'surprise': 0.02684193104505539, 'neutral': 0.011862237937748432, 'disgust': 0.008166802115738392, 'joy': 0.0024859264958649874}
{'neutral': 0.9374578595161438, 'surprise': 0.014258219860494137, 'fear': 0.013915886171162128, 'sadness': 0.012051484547555447, 'anger': 0.009648777544498444, 'disgust': 0.008449282497167587, 'joy': 0.004218461457639933}
{'neutral': 0.7806512117385864, 'disgust': 0.083732970058918, 'sadness': 0.05595644935965538, 'anger': 0.04798654094338417, 'fear': 0.017749501392245293, 'surprise': 0.010741572827100754, 'joy': 0.003181770443916321}
{'neutral': 0.8786172866821289, 'anger': 0.03598341718316078, 'disgust': 0.02693687193095684, 'sadness': 0.025738673284

In [None]:
from typing import Dict

# Clamps emotions to [-1, 1]
def clamp(emotions: Dict[str, float]) -> Dict[str, float]:
    for emotion in emotions:
        if emotions[emotion] < -1:
            emotions[emotion] = -1
        elif emotions[emotion] > 1:
            emotions[emotion] = 1
    return emotions

# Map emotions to fear, stress, trust and morale
def emotions_to_fsmt(emotions: Dict[str, float]) -> Dict[str, str]:
    fear = emotions.get("fear")
    anger = emotions.get("anger")
    neutral = emotions.get("neutral")
    disgust = emotions.get("disgust")
    sadness = emotions.get("sadness")
    joy = emotions.get("joy")

    fsmt_dict = {}
    
    if(fear > anger + neutral + disgust + sadness + joy):
        fsmt_dict["Fear Level"] = 3
    elif(fear > 50):
        fsmt_dict["Fear Level"] = 2
    else:
        fsmt_dict["Fear Level"] = 1

    if(sadness > anger + neutral + disgust + joy):
        fsmt_dict["Morale Level"] = 3
    elif(sadness > 50):
        fsmt_dict["Morale Level"] = 2
    else:
        fsmt_dict["Morale Level"] = 1

    return fsmt_dict

In [17]:
print('test1',emotions_to_fsmt(test1results))
print('test2',emotions_to_fsmt(test2results))
print('test3',emotions_to_fsmt(test3results))
print('test4',emotions_to_fsmt(test4results))
print('test5',emotions_to_fsmt(test5results))
print('test6',emotions_to_fsmt(test6results))
print('test7',emotions_to_fsmt(test7results))
print('test8',emotions_to_fsmt(test8results))
print('test9',emotions_to_fsmt(test9results))
print('test10',emotions_to_fsmt(test10results))
print('test11',emotions_to_fsmt(test11results))
print('test12',emotions_to_fsmt(test12results))
print('docForParsing',emotions_to_fsmt(docForParsingResults))
print('docForParsing2',emotions_to_fsmt(docForParsing2Results))

test1 {'Fear Level': 3, 'Morale Level': 1}
test2 {'Fear Level': 3, 'Morale Level': 1}
test3 {'Fear Level': 1, 'Morale Level': 1}
test4 {'Fear Level': 1, 'Morale Level': 1}
test5 {'Fear Level': 1, 'Morale Level': 1}
test6 {'Fear Level': 3, 'Morale Level': 1}
test7 {'Fear Level': 3, 'Morale Level': 1}
test8 {'Fear Level': 1, 'Morale Level': 1}
test9 {'Fear Level': 3, 'Morale Level': 1}
test10 {'Fear Level': 1, 'Morale Level': 1}
test11 {'Fear Level': 3, 'Morale Level': 1}
test12 {'Fear Level': 3, 'Morale Level': 1}
docForParsing {'Fear Level': 3, 'Morale Level': 1}
docForParsing2 {'Fear Level': 3, 'Morale Level': 1}
