In [6]:
# pip install transformers datasets
import torch
print("torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


from datasets import load_dataset
from transformers import pipeline, AutoTokenizer
classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,     # replaces return_all_scores=True
    device=0        # use GPU 0
)
tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")


torch: 2.8.0+cpu
CUDA available: False


Device set to use cpu


In [28]:
import os
import json

# Folders containing your test files
folders = ["../data/low", "../data/med", "../data/high"]

# Dictionary to hold all loaded texts
texts = {}

for folder in folders:
    for filename in os.listdir(folder):
        if filename.endswith(".txt"):
            file_path = os.path.join(folder, filename)
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
                
                # Use filename without extension as key
                key_base = os.path.splitext(filename)[0]
                
                # Save all keys from the JSON file (like 'text', 'reference_text') inside a sub-dictionary
                texts[key_base] = {k: v for k, v in data.items()}

# Example access
print(texts["test1"]["text"])
print(texts["DocumentForParsing2"]["reference_text"])

Reports of unidentified drones hovering over Temple, Texas have residents on edge. Residents demand transparency and urgent action from elected leaders.
Killeen Cyber Attack Disrupts Local Power Plant. KILLEEN, TX – On the morning of March 26th, the Killeen Power Plant experienced a cyber-attack that cut power to about 2,300 homes in Killeen. Operators detected unusual network activity at 4:00 a.m. and shut off four feeder circuits as a precaution. Workers are facilitating return of power and expect power to return at around 10:00am. What Happened Systems Hit: Main control server and backup communication lines. Impact: Widespread outages in four neighborhoods. Response: Plant staff followed emergency protocols and alerted the Killeen Police Department’s Cyber Crimes Unit. Investigation Underway The Cyber Crimes Unit is working with CISA and the FBI to trace the attack. So far, investigators have found: Malware in the plant’s network logs. IP addresses using anonymizing services. Signs 

In [8]:
from collections import defaultdict


def chunk_text(text, tokenizer, max_tokens=480):
    tokens = tokenizer.encode(text)
    chunks = [tokens[i:i + max_tokens] for i in range(0, len(tokens), max_tokens)]
    return [tokenizer.decode(chunk) for chunk in chunks]


def avg_emotion_scores(text, classifier, tokenizer):
    if (len(tokenizer.encode(text)) <= 480):
        result = classifier(text)[0]
        return {item['label']: item['score'] for item in result}

    chunks = chunk_text(text, tokenizer)
    cumulative_scores = defaultdict(float)
    total_chars = 0

    for chunk in chunks:
        result = classifier(chunk)[0]
        chunk_len = len(chunk)
        total_chars += chunk_len
        for item in result:
            cumulative_scores[item['label']] += item['score'] * chunk_len

    averaged_scores = {label: score / total_chars for label, score in cumulative_scores.items()}
    return averaged_scores

In [29]:
test1results = avg_emotion_scores(texts["test1"]["text"], classifier, tokenizer)
print(test1results)

test2results = avg_emotion_scores(texts["test2"]["text"], classifier, tokenizer)
print(test2results)

test3results = avg_emotion_scores(texts["test3"]["text"], classifier, tokenizer)
print(test3results)

test4results = avg_emotion_scores(texts["test4"]["text"], classifier, tokenizer)
print(test4results)

test5results = avg_emotion_scores(texts["test5"]["text"], classifier, tokenizer)
print(test5results)

test6results = avg_emotion_scores(texts["test6"]["text"], classifier, tokenizer)
print(test6results)

test7results = avg_emotion_scores(texts["test7"]["text"], classifier, tokenizer)
print(test7results)

test8results = avg_emotion_scores(texts["test8"]["text"], classifier, tokenizer)
print(test8results)

test9results = avg_emotion_scores(texts["test9"]["text"], classifier, tokenizer)
print(test9results)

test10results = avg_emotion_scores(texts["test10"]["text"], classifier, tokenizer)
print(test10results)

test11results = avg_emotion_scores(texts["test11"]["text"], classifier, tokenizer)
print(test11results)

test12results = avg_emotion_scores(texts["test12"]["text"], classifier, tokenizer)
print(test12results)

docForParsingResults = avg_emotion_scores(texts["DocumentForParsing"]["text"], classifier, tokenizer)
print(docForParsingResults)

docForParsing2Results = avg_emotion_scores(texts["DocumentForParsing2"]["reference_text"], classifier, tokenizer)
print(docForParsing2Results)


{'fear': 0.788798451423645, 'neutral': 0.09400440752506256, 'anger': 0.09049759805202484, 'surprise': 0.010985255241394043, 'disgust': 0.009524556808173656, 'sadness': 0.003922006580978632, 'joy': 0.002267639385536313}
{'fear': 0.7549809217453003, 'anger': 0.1463717371225357, 'sadness': 0.049290575087070465, 'surprise': 0.026841888204216957, 'neutral': 0.011862149462103844, 'disgust': 0.008166777901351452, 'joy': 0.0024859283585101366}
{'neutral': 0.9374577403068542, 'surprise': 0.014258286915719509, 'fear': 0.013915931805968285, 'sadness': 0.012051542289555073, 'anger': 0.009648788720369339, 'disgust': 0.008449293673038483, 'joy': 0.00421846704557538}
{'neutral': 0.7806509137153625, 'disgust': 0.08373308926820755, 'sadness': 0.05595659092068672, 'anger': 0.04798661544919014, 'fear': 0.017749501392245293, 'surprise': 0.010741558857262135, 'joy': 0.0031817734707146883}
{'neutral': 0.8786174654960632, 'anger': 0.03598334640264511, 'disgust': 0.02693680301308632, 'sadness': 0.025738630443

In [30]:
from typing import Dict

# Clamps emotions to [-1, 1]
def clamp(emotions: Dict[str, float]) -> Dict[str, float]:
    for emotion in emotions:
        if emotions[emotion] < -1:
            emotions[emotion] = -1
        elif emotions[emotion] > 1:
            emotions[emotion] = 1
    return emotions

# Map emotions to fear, stress, trust and morale
def emotions_to_fsmt(emotions: Dict[str, float]) -> Dict[str, float]:
    fear = emotions.get("fear")
    anger = emotions.get("anger")
    neutral = emotions.get("neutral")
    disgust = emotions.get("disgust")
    sadness = emotions.get("sadness")
    joy = emotions.get("joy")
    
    newEmotions = {
        "fear": fear,
        "stress": 0.75*fear + 0.4 * anger + 0.1 * disgust + 0.2 * sadness,
        "trust": 0.6*joy + 0.3*neutral - 0.3*fear - 0.1*anger,
        "morale": 0.5*joy + 0.3*neutral - 0.2*fear
    }
    
    return clamp(newEmotions)

In [32]:
print('test1',emotions_to_fsmt(test1results))
print('test2',emotions_to_fsmt(test2results))
print('test3',emotions_to_fsmt(test3results))
print('test4',emotions_to_fsmt(test4results))
print('test5',emotions_to_fsmt(test5results))
print('test6',emotions_to_fsmt(test6results))
print('test7',emotions_to_fsmt(test7results))
print('test8',emotions_to_fsmt(test8results))
print('test9',emotions_to_fsmt(test9results))
print('test10',emotions_to_fsmt(test10results))
print('test11',emotions_to_fsmt(test11results))
print('test12',emotions_to_fsmt(test12results))
print('docForParsing',emotions_to_fsmt(docForParsingResults))
print('docForParsing2',emotions_to_fsmt(docForParsing2Results))

test1 {'fear': 0.788798451423645, 'stress': 0.6295347347855568, 'trust': -0.2161273893434554, 'morale': -0.1284245483344421}
test2 {'fear': 0.7549809217453003, 'stress': 0.6354591789655387, 'trust': -0.23608124838210642, 'morale': -0.14619457533117383}
test3 {'fear': 0.013915931805968285, 'stress': 0.01755170216783881, 'trust': 0.27862874390557407, 'morale': 0.2805633692536503}
test4 {'fear': 0.017749501392245293, 'stress': 0.05207139933481813, 'trust': 0.22598082623444496, 'morale': 0.23223626057151703}
test5 {'fear': 0.019731668755412102, 'stress': 0.037033496517688044, 'trust': 0.25918361973017456, 'morale': 0.2639024186879396}
test6 {'fear': 0.9734414219856262, 'stress': 0.7350446255877614, 'trust': -0.28996999440714716, 'morale': -0.19171417260076853}
test7 {'fear': 0.8260776400566101, 'stress': 0.6365498924627899, 'trust': -0.21904654065147042, 'morale': -0.1341562173794955}
test8 {'fear': 0.11700055003166199, 'stress': 0.36539958529174327, 'trust': -0.09028621246106922, 'morale'