In [4]:
import zipfile, os

zip_path = "/content/conllu.zip"  # adjust if you used a different name
extract_path = "/content/conllu"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Files extracted to:", extract_path)


✅ Files extracted to: /content/conllu


In [35]:
import os
import csv
from collections import defaultdict, Counter

# ---------- CONFIG ----------
CONLLU_ROOT = "conllu/conllu_file"
OUT_CSV = "conllu/adjectives_appraisal_with_examples_by_class.csv"
KEEP_TOP_N = 20            # top-N adjectives per (lang, discipline) by total count
EXAMPLES_PER_CLASS = 5     # how many example sentences to keep per classification
# ---------- end CONFIG ----------

# Patterns / heuristics
# Patterns / heuristics
FIRST_PERSON = {
    # Subject + Object
    "i", "me", "we", "us",
    # Possessive determiners
    "my", "our",
    # Possessive pronouns
    "mine", "ours",
    # Reflexives
    "myself", "ourselves"
}

# Second-person pronouns (for completeness — often neutral)
SECOND_PERSON = {
    "you",
    # Possessive determiners
    "your",
    # Possessive pronouns
    "yours",
    # Reflexives
    "yourself", "yourselves"
}

APPRECIATION_PRONOUNS = {
    # Core expletive / dummy subject
    "it", "there", "here",

    # Demonstratives (singular/plural)
    "this", "that", "these", "those",

    # Indefinites referring to things/objects
    "something", "anything", "nothing", "everything",
    "someone", "anyone", "no one", "everyone",
    "somebody", "anybody", "nobody", "everybody",

    # Quantified / abstract pronouns
    "all", "each", "both", "neither", "none", "one",

    # Wh-pronouns often referring to things
    "what", "whatever", "which", "whichever",

    # Event-like references (treated as abstract appreciation subjects)
    "this one", "that one", "such", "so"
}

FEEL_VERBS = {
    # Core copular / linking
    "be", "become", "get", "remain", "stay", "keep", "prove", "turn",

    # Seem / appear / perception-of-truth
    "seem", "appear", "sound", "look", "smell", "taste", "feel",

    # Evaluation / cognition (mental stance)
    "think", "consider", "believe", "find", "judge", "deem", "regard", "suppose",
    "reckon", "assume", "imagine", "guess", "suspect", "presume", "conclude",

    # Communication verbs that take complements
    "say", "claim", "argue", "maintain", "suggest", "assert", "contend", "report",

    # Desire / volition
    "want", "wish", "hope", "long", "yearn", "desire", "prefer", "intend", "plan",
    "aim", "aspire",

    # Liking / disliking / attitude
    "like", "love", "enjoy", "admire", "appreciate", "cherish", "favor", "fancy",
    "care", "mind",
    "dislike", "hate", "detest", "despise", "loathe", "resent", "scorn", "disdain",

    # Emotional / affective
    "fear", "dread", "regret", "worry", "doubt", "mourn", "lament", "envy", "pity",
    "sympathize", "empathize", "applaud", "praise", "criticize", "blame", "condemn",

    # Modal / necessity / obligation
    "must", "should", "ought", "need", "have_to", "require", "demand", "deserve",

    # Experiential / perception
    "watch", "observe", "notice", "perceive", "recognize", "detect", "spot",

    # Misc. evaluative
    "value", "assess", "evaluate", "measure", "rate", "score", "grade", "review",
    "endorse", "recommend", "reject", "approve", "disapprove",

    # Expressions of causation / change-of-state (leading to adj)
    "render", "make", "leave", "drive"
}


# ---------- IO: parse conllu ----------
def parse_conllu_file(path):
    """Return list of sentences; each sentence is a list of token dicts."""
    sentences = []
    sent = []
    with open(path, "r", encoding="utf-8") as fh:
        for line in fh:
            line = line.rstrip("\n")
            if not line:
                if sent:
                    sentences.append(sent)
                    sent = []
                continue
            if line.startswith("#"):
                continue
            parts = line.split("\t")
            if len(parts) < 8:
                continue
            if "-" in parts[0] or "." in parts[0]:
                continue
            try:
                idx = int(parts[0])
            except:
                continue
            token = {
                "id": idx,
                "form": parts[1],
                "lemma": parts[2].lower(),
                "upos": parts[3],
                "head": int(parts[6]) if parts[6].isdigit() else 0,
                "deprel": parts[7],
            }
            sent.append(token)
    if sent:
        sentences.append(sent)
    return sentences

# ---------- classification & example collection ----------
def _append_example(adj_examples, lemma, category, sentence_text):
    """Append sentence_text to adj_examples[lemma][category] up to EXAMPLES_PER_CLASS, avoiding duplicates."""
    lst = adj_examples.setdefault(lemma, {"affect":[], "judgement":[], "appreciation":[], "ambiguous":[]})
    if sentence_text in lst[category]:
        return
    if len(lst[category]) < EXAMPLES_PER_CLASS:
        lst[category].append(sentence_text)

def classify_adjectives_in_sentence(sent, adj_stats, adj_examples):
    by_id = {t["id"]: t for t in sent}
    children = defaultdict(list)
    for t in sent:
        children[t["head"]].append(t)

    sentence_text = " ".join(t["form"] for t in sent)

    for t in sent:
        if t["upos"] != "ADJ":
            continue
        lemma = t["lemma"]
        adj_stats[lemma]["total"] += 1

        # find nsubj children of this ADJ (predicative adjective case)
        subj_candidates = [c for c in children.get(t["id"], []) if c["deprel"].startswith("nsubj")]
        cop_children = [c for c in children.get(t["id"], []) if c["deprel"] == "cop"]
        classified = False

        # Case A: ADJ has explicit subject (nsubj)
        if subj_candidates:
            subj = subj_candidates[0]
            s_lemma = subj["lemma"]
            s_upos = subj["upos"]
            if s_upos == "PRON" and s_lemma in FIRST_PERSON:
                adj_stats[lemma]["affect"] += 1
                _append_example(adj_examples, lemma, "affect", sentence_text)
                classified = True
            elif s_upos == "PRON" and s_lemma in THIRD_PERSON_PRONOUNS:
                adj_stats[lemma]["judgement"] += 1
                _append_example(adj_examples, lemma, "judgement", sentence_text)
                classified = True
            elif s_upos == "PROPN":
                adj_stats[lemma]["judgement"] += 1
                _append_example(adj_examples, lemma, "judgement", sentence_text)
                classified = True
            elif s_upos == "PRON" and s_lemma in APPRECIATION_PRONOUNS:
                adj_stats[lemma]["appreciation"] += 1
                _append_example(adj_examples, lemma, "appreciation", sentence_text)
                classified = True
            elif s_upos == "NOUN":
                adj_stats[lemma]["appreciation"] += 1
                _append_example(adj_examples, lemma, "appreciation", sentence_text)
                classified = True

        # Case B: ADJ is xcomp of a verb (I feel happy / she seems interesting)
        if not classified and t["deprel"] == "xcomp":
            head = by_id.get(t["head"])
            if head and head["upos"].startswith("V"):
                verb_children = children.get(head["id"], [])
                v_subjs = [c for c in verb_children if c["deprel"].startswith("nsubj")]
                if v_subjs:
                    vsub = v_subjs[0]
                    vl = vsub["lemma"]; vup = vsub["upos"]
                    if vup == "PRON" and vl in FIRST_PERSON:
                        adj_stats[lemma]["affect"] += 1
                        _append_example(adj_examples, lemma, "affect", sentence_text)
                        classified = True
                    elif vup == "PRON" and vl in THIRD_PERSON_PRONOUNS:
                        adj_stats[lemma]["judgement"] += 1
                        _append_example(adj_examples, lemma, "judgement", sentence_text)
                        classified = True
                    elif vup == "PROPN":
                        adj_stats[lemma]["judgement"] += 1
                        _append_example(adj_examples, lemma, "judgement", sentence_text)
                        classified = True
                    elif vup == "NOUN":
                        adj_stats[lemma]["appreciation"] += 1
                        _append_example(adj_examples, lemma, "appreciation", sentence_text)
                        classified = True
                else:
                    # no explicit subject; guess by verb lemma
                    if head["lemma"] in FEEL_VERBS:
                        adj_stats[lemma]["affect"] += 1
                        _append_example(adj_examples, lemma, "affect", sentence_text)
                        classified = True

        # Case C: ADJ with copula child (e.g. "I was happy")
        if not classified and cop_children:
            if subj_candidates:
                subj = subj_candidates[0]
                sl = subj["lemma"]; sup = subj["upos"]
                if sup == "PRON" and sl in FIRST_PERSON:
                    adj_stats[lemma]["affect"] += 1
                    _append_example(adj_examples, lemma, "affect", sentence_text)
                    classified = True
                elif sup == "PRON" and sl in THIRD_PERSON_PRONOUNS:
                    adj_stats[lemma]["judgement"] += 1
                    _append_example(adj_examples, lemma, "judgement", sentence_text)
                    classified = True
                elif sup == "PROPN":
                    adj_stats[lemma]["judgement"] += 1
                    _append_example(adj_examples, lemma, "judgement", sentence_text)
                    classified = True
                elif sup == "NOUN":
                    adj_stats[lemma]["appreciation"] += 1
                    _append_example(adj_examples, lemma, "appreciation", sentence_text)
                    classified = True
                elif sup == "PRON" and sl in APPRECIATION_PRONOUNS:
                    adj_stats[lemma]["appreciation"] += 1
                    _append_example(adj_examples, lemma, "appreciation", sentence_text)
                    classified = True

        # fallback: mark ambiguous if no classification matched
        if not classified:
            adj_stats[lemma]["ambiguous"] += 1
            _append_example(adj_examples, lemma, "ambiguous", sentence_text)

# ---------- aggregation ----------
def compute_appraisal_with_examples_by_class(root, top_n=None, lang_filter=None):
    all_rows = []
    for lang in sorted(os.listdir(root)):
        if lang_filter and lang != lang_filter:
            continue
        lpath = os.path.join(root, lang)
        if not os.path.isdir(lpath):
            continue
        for disc in sorted(os.listdir(lpath)):
            dpath = os.path.join(lpath, disc)
            if not os.path.isdir(dpath):
                continue

            adj_stats = defaultdict(lambda: Counter({"total":0, "affect":0, "judgement":0, "appreciation":0, "ambiguous":0}))
            adj_examples = {}  # lemma -> dict of lists for each class

            # parse all files
            for fname in sorted(os.listdir(dpath)):
                if not fname.endswith(".conllu"):
                    continue
                path = os.path.join(dpath, fname)
                sents = parse_conllu_file(path)
                for s in sents:
                    classify_adjectives_in_sentence(s, adj_stats, adj_examples)

            # assemble rows
            rows = []
            for lemma, cnts in adj_stats.items():
                tot = cnts["total"]
                aff = cnts["affect"]
                jud = cnts["judgement"]
                app = cnts["appreciation"]
                amb = cnts["ambiguous"]
                co_total = aff + jud + app
                if co_total > 0:
                    p_aff = aff / co_total
                    p_jud = jud / co_total
                    p_app = app / co_total
                else:
                    p_aff = p_jud = p_app = 0.0

                examples = adj_examples.get(lemma, {"affect":[], "judgement":[], "appreciation":[], "ambiguous":[]})
                # join example sentences with a visible separator
                ex_aff = " || ".join(examples["affect"])
                ex_jud = " || ".join(examples["judgement"])
                ex_app = " || ".join(examples["appreciation"])
                ex_amb = " || ".join(examples["ambiguous"])

                rows.append({
                    "language": lang,
                    "discipline": disc,
                    "adjective": lemma,
                    "total_count": tot,
                    "affect_count": aff,
                    "judgement_count": jud,
                    "appreciation_count": app,
                    "ambiguous_count": amb,
                    "p_affect": round(p_aff, 4),
                    "p_judgement": round(p_jud, 4),
                    "p_appreciation": round(p_app, 4),
                    "example_affect": ex_aff,
                    "example_judgement": ex_jud,
                    "example_appreciation": ex_app,
                    "example_ambiguous": ex_amb
                })

            # keep top_n by total_count for this (lang, discipline)
            rows = sorted(rows, key=lambda r: r["total_count"], reverse=True)
            if top_n:
                rows = rows[:top_n]
            all_rows.extend(rows)
    return all_rows

# ---------- run ----------
def main():
    out = compute_appraisal_with_examples_by_class(CONLLU_ROOT, top_n=KEEP_TOP_N, lang_filter="en")
    if not out:
        print("No results (check your CONLLU_ROOT path and folder structure).")
        return

    os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
    with open(OUT_CSV, "w", encoding="utf-8", newline="") as fh:
        writer = csv.DictWriter(fh, fieldnames=list(out[0].keys()))
        writer.writeheader()
        for row in out:
            writer.writerow(row)
    print("Saved:", OUT_CSV)

if __name__ == "__main__":
    main()


Saved: conllu/adjectives_appraisal_with_examples_by_class.csv


#Trying out convertaffect"



In [32]:
import os
import csv
import re
from collections import defaultdict, Counter

# ---------- CONFIG ----------
CONLLU_ROOT = "conllu/conllu_file"
OUT_CSV = "conllu/adjectives_appraisal_with_seedboost.csv"
SEED_PSEUDOCOUNT = 2    # how much to boost Affect count when adjective is in seed list
KEEP_TOP_N = None       # not used; we pass top_n explicitly to compute_appraisal_with_seedboost

# Patterns / heuristics
FIRST_PERSON = {
    # Subject + Object
    "i", "me", "we", "us",
    # Possessive determiners
    "my", "our",
    # Possessive pronouns
    "mine", "ours",
    # Reflexives
    "myself", "ourselves"
}

# Second-person pronouns (for completeness — often neutral)
SECOND_PERSON = {
    "you",
    # Possessive determiners
    "your",
    # Possessive pronouns
    "yours",
    # Reflexives
    "yourself", "yourselves"
}

APPRECIATION_PRONOUNS = {
    # Core expletive / dummy subject
    "it", "there", "here",

    # Demonstratives (singular/plural)
    "this", "that", "these", "those",

    # Indefinites referring to things/objects
    "something", "anything", "nothing", "everything",
    "someone", "anyone", "no one", "everyone",
    "somebody", "anybody", "nobody", "everybody",

    # Quantified / abstract pronouns
    "all", "each", "both", "neither", "none", "one",

    # Wh-pronouns often referring to things
    "what", "whatever", "which", "whichever",

    # Event-like references (treated as abstract appreciation subjects)
    "this one", "that one", "such", "so"
}

FEEL_VERBS = {
    # Core copular / linking
    "be", "become", "get", "remain", "stay", "keep", "prove", "turn",

    # Seem / appear / perception-of-truth
    "seem", "appear", "sound", "look", "smell", "taste", "feel",

    # Evaluation / cognition (mental stance)
    "think", "consider", "believe", "find", "judge", "deem", "regard", "suppose",
    "reckon", "assume", "imagine", "guess", "suspect", "presume", "conclude",

    # Communication verbs that take complements
    "say", "claim", "argue", "maintain", "suggest", "assert", "contend", "report",

    # Desire / volition
    "want", "wish", "hope", "long", "yearn", "desire", "prefer", "intend", "plan",
    "aim", "aspire",

    # Liking / disliking / attitude
    "like", "love", "enjoy", "admire", "appreciate", "cherish", "favor", "fancy",
    "care", "mind",
    "dislike", "hate", "detest", "despise", "loathe", "resent", "scorn", "disdain",

    # Emotional / affective
    "fear", "dread", "regret", "worry", "doubt", "mourn", "lament", "envy", "pity",
    "sympathize", "empathize", "applaud", "praise", "criticize", "blame", "condemn",

    # Modal / necessity / obligation
    "must", "should", "ought", "need", "have_to", "require", "demand", "deserve",

    # Experiential / perception
    "watch", "observe", "notice", "perceive", "recognize", "detect", "spot",

    # Misc. evaluative
    "value", "assess", "evaluate", "measure", "rate", "score", "grade", "review",
    "endorse", "recommend", "reject", "approve", "disapprove",

    # Expressions of causation / change-of-state (leading to adj)
    "render", "make", "leave", "drive"
}

# NOTE: The code below references THIRD_PERSON_PRONOUNS in classification.
# If you don't already have that set defined elsewhere, define a simple set here:
THIRD_PERSON_PRONOUNS = {"he", "she", "it", "they", "him", "her", "them", "his", "hers", "theirs"}

# for loading conllu file
def parse_conllu_file(path):
    """Parse CONLLU file into list of sentences; each sentence is a list of token dicts.
       Assumes standard UD CONLLU columns (index, form, lemma, upos, ... head, deprel at cols 7/8)."""
    sentences = []
    sent = []
    with open(path, "r", encoding="utf-8") as fh:
        for line in fh:
            line = line.rstrip("\n")
            if not line:
                if sent:
                    sentences.append(sent)
                    sent = []
                continue
            if line.startswith("#"):
                continue
            parts = line.split("\t")
            if len(parts) < 8:
                continue
            # skip multiword or empty token lines
            if "-" in parts[0] or "." in parts[0]:
                continue
            try:
                idx = int(parts[0])
            except:
                continue
            token = {
                "id": idx,
                "form": parts[1],
                "lemma": parts[2].lower(),
                "upos": parts[3],
                "head": int(parts[6]) if parts[6].isdigit() else 0,
                "deprel": parts[7],
            }
            sent.append(token)
    if sent:
        sentences.append(sent)
    return sentences

# ---------- classification logic ----------
def classify_adjectives_in_sentence(sent, adj_stats):
    by_id = {t["id"]: t for t in sent}
    children = defaultdict(list)
    for t in sent:
        children[t["head"]].append(t)

    for t in sent:
        if t["upos"] != "ADJ":
            continue
        lemma = t["lemma"]
        adj_stats[lemma]["total"] += 1

        # find nsubj children of this ADJ (predicative adjective case)
        subj_candidates = [c for c in children.get(t["id"], []) if c["deprel"].startswith("nsubj")]
        cop_children = [c for c in children.get(t["id"], []) if c["deprel"] == "cop"]
        classified = False

        # Case A: ADJ has explicit subject (nsubj)
        if subj_candidates:
            subj = subj_candidates[0]
            s_lemma = subj["lemma"]
            s_upos = subj["upos"]
            if s_upos == "PRON" and s_lemma in FIRST_PERSON:
                adj_stats[lemma]["affect"] += 1
                classified = True
            elif s_upos == "PRON" and s_lemma in THIRD_PERSON_PRONOUNS:
                adj_stats[lemma]["judgement"] += 1
                classified = True
            elif s_upos == "PROPN":
                adj_stats[lemma]["judgement"] += 1
                classified = True
            elif s_upos == "PRON" and s_lemma in APPRECIATION_PRONOUNS:
                adj_stats[lemma]["appreciation"] += 1
                classified = True
            elif s_upos == "NOUN":
                adj_stats[lemma]["appreciation"] += 1
                classified = True

        # Case B: ADJ is xcomp of a verb (I feel happy / she seems interesting)
        if not classified and t["deprel"] == "xcomp":
            head = by_id.get(t["head"])
            if head and head["upos"].startswith("V"):
                verb_children = children.get(head["id"], [])
                v_subjs = [c for c in verb_children if c["deprel"].startswith("nsubj")]
                if v_subjs:
                    vsub = v_subjs[0]
                    vl = vsub["lemma"]; vup = vsub["upos"]
                    if vup == "PRON" and vl in FIRST_PERSON:
                        adj_stats[lemma]["affect"] += 1
                        classified = True
                    elif vup == "PRON" and vl in THIRD_PERSON_PRONOUNS:
                        adj_stats[lemma]["judgement"] += 1
                        classified = True
                    elif vup == "PROPN":
                        adj_stats[lemma]["judgement"] += 1
                        classified = True
                    elif vup == "NOUN":
                        adj_stats[lemma]["appreciation"] += 1
                        classified = True
                else:
                    # no explicit subject; guess by verb lemma
                    if head["lemma"] in FEEL_VERBS:
                        adj_stats[lemma]["affect"] += 1
                        classified = True

        # Case C: ADJ with copula child (e.g. "I was happy")
        if not classified and cop_children:
            if subj_candidates:
                subj = subj_candidates[0]
                sl = subj["lemma"]; sup = subj["upos"]
                if sup == "PRON" and sl in FIRST_PERSON:
                    adj_stats[lemma]["affect"] += 1
                    classified = True
                elif sup == "PRON" and sl in THIRD_PERSON_PRONOUNS:
                    adj_stats[lemma]["judgement"] += 1
                    classified = True
                elif sup == "PROPN":
                    adj_stats[lemma]["judgement"] += 1
                    classified = True
                elif sup == "NOUN":
                    adj_stats[lemma]["appreciation"] += 1
                    classified = True
                elif sup == "PRON" and sl in APPRECIATION_PRONOUNS:
                    adj_stats[lemma]["appreciation"] += 1
                    classified = True

        # fallback: mark ambiguous if no classification matched
        if not classified:
            adj_stats[lemma]["ambiguous"] += 1

# ---------- seed logic ----------
def compute_appraisal_with_seedboost(root, seed_set, seed_pseudocount=2, top_n=None, lang_filter=None):
    """
    root: path containing language subfolders (e.g., conllu/conllu_file)
    lang_filter: if set (e.g. "en") only that language folder will be processed.
    top_n: keep top-N adjectives per (lang,discipline) by total_count if provided.
    """
    all_rows = []
    for lang in sorted(os.listdir(root)):
        if lang_filter and lang != lang_filter:
            continue
        lpath = os.path.join(root, lang)
        if not os.path.isdir(lpath):
            continue
        for disc in sorted(os.listdir(lpath)):
            dpath = os.path.join(lpath, disc)
            if not os.path.isdir(dpath):
                continue

            adj_stats = defaultdict(lambda: Counter({"total":0, "affect":0, "judgement":0, "appreciation":0, "ambiguous":0}))

            # parse all files
            for fname in os.listdir(dpath):
                if not fname.endswith(".conllu"):
                    continue
                path = os.path.join(dpath, fname)
                sents = parse_conllu_file(path)
                for s in sents:
                    classify_adjectives_in_sentence(s, adj_stats)

            # assemble rows
            rows = []
            for lemma, cnts in adj_stats.items():
                tot = cnts["total"]
                aff = cnts["affect"]
                jud = cnts["judgement"]
                app = cnts["appreciation"]
                amb = cnts["ambiguous"]
                co_total = aff + jud + app
                # raw normalized (corpus evidence only)
                if co_total > 0:
                    p_aff = aff / co_total
                    p_jud = jud / co_total
                    p_app = app / co_total
                else:
                    p_aff = p_jud = p_app = 0.0

                # seed boost: add pseudo-count to Affect if lemma in seed set
                aff_boost = aff + (seed_pseudocount if lemma in seed_set else 0)
                co_total_boost = aff_boost + jud + app
                if co_total_boost > 0:
                    p_aff_boost = aff_boost / co_total_boost
                    p_jud_boost = jud / co_total_boost
                    p_app_boost = app / co_total_boost
                else:
                    p_aff_boost = p_jud_boost = p_app_boost = 0.0

                rows.append({
                    "language": lang,
                    "discipline": disc,
                    "adjective": lemma,
                    "total_count": tot,
                    "affect_count": aff,
                    "judgement_count": jud,
                    "appreciation_count": app,
                    "ambiguous_count": amb,
                    "p_affect": round(p_aff, 4),
                    "p_judgement": round(p_jud, 4),
                    "p_appreciation": round(p_app, 4),
                    "seed_affect": (lemma in seed_set),
                    "p_affect_seed": round(p_aff_boost, 4),
                    "p_judgement_seed": round(p_jud_boost, 4),
                    "p_appreciation_seed": round(p_app_boost, 4),
                })

            # optionally keep top_n by total_count
            rows = sorted(rows, key=lambda r: r["total_count"], reverse=True)
            if top_n:
                rows = rows[:top_n]
            all_rows.extend(rows)
    return all_rows

# ---------- run ----------
def main():
    seed_set = ["angst-ridden","amused","buoyant","carried away","cheerful","cheery","chipper","chirpy","content","contented","delighted","delirious","ecstatic","elated","enraptured","enthused","euphoric","exhilarated","exultant","feverish","glad","gladdened","gleeful","gratified","happy","honoured","joyful","joyous","jubilant","keyed-up","light-hearted","manic","merry","mirthful","over-excited","overjoyed","pleased","proud","satisfied","starry-eyed","thankful","thrilled","tickled","touched","triumphant","upbeat","uplifted","admiring","adoring","amorous","appreciative","approving","bedazzled","besotted","bewitched","broody","charmed","chuffed","crazy","doting","dotty","enamoured","enchanted","enthusiastic","fanatical","fervent","fervid","fulfilled","grateful","hung up","indebted","infatuated","keen","lovesick","love-struck","mad","obligated","partial","rabid","smitten","sold","taken with","well-disposed","worshipful","agog","anxious","bursting","desirous","desperate","edgy","fevered","heedful","hungry","itching","passionate","prepared","psyched up","stirred up","uptight","wholehearted","willing","affronted","aggrieved","agonised","anguished","blue","broken-hearted","browned-off","bruised","burdened","chagrined","cheesed off","conscience-stricken","crestfallen","cut up","deflated","dejected","demoralized","depressed","desolate","despairing","despondent","devastated","disappointed","disconsolate","discontented","discouraged","disenchanted","disgusted","disheartened","disillusioned","dismayed","dispirited","distressed","doleful","down","downcast","downhearted","fed up","forlorn","gloomy","glum","gutted","harrassed","heartbroken","heavy-hearted","heavy-laden","homesick","hopeless","horrified","huffy","hurt","hurting","inconsolable","low","malcontent","melancholic","miffed","miserable","morose","mournful","offended","overwhelmed","pressured","rotten","sad","saddened","self-pitying","shattered","sick","sickened","sombre","sorrowful","sorrowing","stung","suicidal","tired","tormented","traumatized","unhappy","unhopeful","unsatisfied","woeful","wounded","wretched","addled","addlepated","agitated","alarmed","apprehensive","baffled","bewildered","bothered","concerned","disconcerted","distraught","disturbed","excited","fearful","flummoxed","flustered","frantic","fraught","het up","jittery","jumpy","moonstruck","mystified","nervous","nervy","nonplussed","overstrung","overwrought","perplexed","perturbed","punch-drunk","puzzled","shaken","stressed","stumped","stupefied","tense","troubled","twitchy","uneasy","unsettled","wired","worked-up","worried","wrought-up","aggravated","angry","annoyed","antagonistic","antipathetic","antsy","apoplectic","bad-tempered","bitter","choleric","crabbed","crabby","cranky","crazed","cross","crotchety","disgruntled","displeased","dissatisfied","embittered","enraged","exasperated","fractious","frenzied","frustrated","fuming","furious","grouchy","grumpy","hacked off","ill-disposed","impatient","incensed","indignant","infuriated","irate","ireful","irked","irritable","irritated","livid","maddened","narked","nettled","outraged","peeved","peevish","petulant","piqued","rancorous","ratty","riled","seething","sore","strung up","sulky","teed off","testy","tetchy","ticked off","vengeful","vexed","vindictive","waspish","wrathful","abashed","apologetic","ashamed","contrite","deprecatory","embarrassed","guilt-ridden","guilty","humbled","humiliated","mortified","penitent","regretful","remorseful","repentant","rueful","shamefaced","sheepish","afraid","cowed","frightened","horror-stricken","intimidated","panicked","panicky","panic-stricken","petrified","scared","terrified","terrorised","terror-stricken","unnerved","daunted","discomfited","disquieted","mistrustful","paranoid","queasy","squirmy","unglued","aghast","amazed","appalled","astonished","astounded","awe-stricken","awe-struck","bowled over","electrified","flabbergasted","gob-smacked","horror-struck","impressed","incredulous","knocked out","overawed","scandalised","shocked","staggered","startled","stunned","stupefied","surprised","taken aback","thunderstruck","covetous","envious","green-eyed","jealous","territorial","avid","bent on","dying","eager","gasping for","intent"
    ]
    print(f"Loaded {len(seed_set)} seed items from Bednarek (sample): {list(seed_set)[:10]}")

    # Run only for English ("en") and keep top 20 adjectives per (lang, discipline)
    out = compute_appraisal_with_seedboost(CONLLU_ROOT, seed_set, seed_pseudocount=SEED_PSEUDOCOUNT, top_n=20, lang_filter="en")
    if not out:
        print("No results (check your CONLLU_ROOT path and folder structure).")
        return

    os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
    with open(OUT_CSV, "w", encoding="utf-8", newline="") as fh:
        writer = csv.DictWriter(fh, fieldnames=list(out[0].keys()))
        writer.writeheader()
        for row in out:
            writer.writerow(row)
    print("Saved:", OUT_CSV)

if __name__ == "__main__":
    main()


Loaded 371 seed items from Bednarek (sample): ['angst-ridden', 'amused', 'buoyant', 'carried away', 'cheerful', 'cheery', 'chipper', 'chirpy', 'content', 'contented']
Saved: conllu/adjectives_appraisal_with_seedboost.csv
