In [None]:
from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
os.chdir("/content/drive/My Drive/School/CS5322 - NLP/Program 3")

In [None]:
os.listdir()

['result_sample_conviction_test.txt',
 'sample_conviction_test.txt',
 '5322s25prog3.pdf',
 'cs5322s25.py',
 'camper_test.txt',
 'result_camper_test.txt',
 'conviction_test.txt',
 'result_conviction_test.txt',
 'deed_test.txt',
 'result_deed_test.txt',
 'NLP_Program_3.html',
 'conviction_clf.joblib',
 'conviction_vec.joblib',
 'result_deed_YourName.txt',
 'result_conviction_YourName.txt',
 'result_camper_YourName.txt',
 'conviction_pipe.joblib',
 'camper_pipe.joblib',
 'deed_pipe.joblib',
 'conviction_ensemble_pipe.joblib',
 'camper_ensemble_pipe.joblib',
 'deed_ensemble_pipe.joblib',
 'conviction_stack_pipe.joblib',
 'camper_stack_pipe.joblib',
 'deed_stack_pipe_augmented.joblib',
 'camper_stack_pipe_augmented.joblib',
 'conviction_stack_pipe_augmented.joblib',
 'camper_mlp_sbert.joblib',
 'deed_mlp_sbert.joblib',
 'prog3.zip',
 'camper_new_test.txt',
 'result_camper_new_test.txt',
 'conviction_new_test.txt',
 'result_conviction_new_test.txt',
 'deed_new_test.txt',
 'result_deed_new_te

In [None]:
#!jupyter nbconvert --to html NLP_Program_3.ipynb

In [None]:
def load_data(path):
    """
    Reads a file where each line is:
       <sense> <sentence>
    Skips headers, blank lines, and definition lines.
    Returns: (sentences, labels)
    """
    sentences, labels = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            # split into at most two parts
            parts = line.split(None, 1)
            if len(parts) != 2:
                continue
            sense, text = parts
            # only accept exactly '1' or '2'
            if sense not in ("1", "2"):
                continue
            sentences.append(text)
            labels.append(int(sense))
    return sentences, labels


In [None]:
# Example:
train_sents, train_labels = load_data("conviction.txt")
print(f"Loaded {len(train_sents)} examples (sense 1: {train_labels.count(1)}, sense 2: {train_labels.count(2)})")

Loaded 40 examples (sense 1: 20, sense 2: 20)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    lowercase=True,
    token_pattern=r"\b\w+\b",
    ngram_range=(1,3),
    min_df=1,
)


X_train = vectorizer.fit_transform(train_sents)


In [None]:
from sklearn.naive_bayes import MultinomialNB

clf = MultinomialNB()
clf.fit(X_train, train_labels)


In [None]:
def load_test_sentences(path):
    """
    Reads a file where each line is just a sentence (no sense label).
    Returns a list of those sentences.
    """
    sentences = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            text = line.strip()
            if text:
                sentences.append(text)
    return sentences


In [None]:
# ---- training remains the same ----
train_sents, train_labels = load_data("conviction.txt")
vectorizer.fit(train_sents)
clf.fit(vectorizer.transform(train_sents), train_labels)

# ---- for your sample test ----
test_sents = load_test_sentences("sample_conviction_test.txt")
X_test    = vectorizer.transform(test_sents)
preds     = clf.predict(X_test)

# load gold labels as before
with open("result_sample_conviction_test.txt") as f:
    gold = [int(line.strip()) for line in f if line.strip()]

print("Predictions:", preds.tolist())
print("Gold      :", gold)


Predictions: [2, 2, 1, 2]
Gold      : [2, 2, 1, 2]


In [None]:
probs = clf.predict_proba(X_test)
for sent, pred, p in zip(test_sents, preds, probs):
    print(f"Sentence: {sent!r}")
    print(f"  → Predicted sense: {pred}  (p₁={p[0]:.2f}, p₂={p[1]:.2f})\n")


Sentence: 'His bloody palm print on the bat eventually led to his conviction.'
  → Predicted sense: 2  (p₁=0.39, p₂=0.61)

Sentence: 'A Sioux City woman found guilty of over 50 voter fraud charges is seeking to appeal her conviction.'
  → Predicted sense: 2  (p₁=0.42, p₂=0.58)

Sentence: 'He spoke with conviction and sincerity.'
  → Predicted sense: 1  (p₁=0.51, p₂=0.49)

Sentence: 'What are the grounds for appealing a conviction?'
  → Predicted sense: 2  (p₁=0.47, p₂=0.53)



In [None]:
import joblib   # for saving/loading

# after training, save both vectorizer & classifier:
joblib.dump(vectorizer, "conviction_vec.joblib")
joblib.dump(clf,          "conviction_clf.joblib")

def WSD_Test_conviction(sentences):
    """
    sentences: list[str]
    returns:   list[int] (1 or 2)
    """
    vec = joblib.load("conviction_vec.joblib")
    model = joblib.load("conviction_clf.joblib")
    X   = vec.transform(sentences)
    return model.predict(X).tolist()


In [None]:
import spacy
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
import joblib

# 1. Lemmatizer transformer
nlp = spacy.load("en_core_web_sm", disable=["parser","ner"])
class LemmaTransformer(TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        out = []
        for doc in nlp.pipe(X, batch_size=50):
            lemmas = [tok.lemma_ for tok in doc if tok.is_alpha]
            out.append(" ".join(lemmas))
        return out

# 2. Build the pipeline
pipe = Pipeline([
    ("lemma", LemmaTransformer()),
    ("tfidf", TfidfVectorizer(ngram_range=(1,3), min_df=1, token_pattern=r"\b\w+\b")),
    ("clf",  LinearSVC(max_iter=5000)),
])

# 3. Load conviction training data
train_sents, train_labels = load_data("conviction.txt")

# 4. 5-fold cross-validation
cv_scores = cross_val_score(pipe, train_sents, train_labels, cv=5)
print("Conviction CV scores:", cv_scores)
print(" mean accuracy: %.3f" % cv_scores.mean())

# 5. Fit on full conviction set & save
pipe.fit(train_sents, train_labels)
joblib.dump(pipe, "conviction_pipe.joblib")

# 6. Rerun sample test
def load_test(path):
    return [line.strip() for line in open(path, encoding="utf-8") if line.strip()]

test_sents = load_test("sample_conviction_test.txt")
preds = pipe.predict(test_sents)
print("New Predictions:", preds.tolist())

gold = [int(line.strip()) for line in open("result_sample_conviction_test.txt") if line.strip()]
print(" Gold labels   :", gold)
print("Accuracy:", (preds == gold).mean())


Conviction CV scores: [0.75  1.    0.875 0.625 0.875]
 mean accuracy: 0.825
New Predictions: [2, 2, 1, 2]
 Gold labels   : [2, 2, 1, 2]
Accuracy: 1.0


In [None]:
import joblib
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import cross_val_score

# (Reuse your LemmaTransformer and load_data from before)

def train_and_save(word):
    # 1) Load data
    sents, labels = load_data(f"{word}.txt")

    # 2) Build pipeline
    pipe = Pipeline([
        ("lemma", LemmaTransformer()),
        ("tfidf", TfidfVectorizer(ngram_range=(1,3), min_df=1, token_pattern=r"\b\w+\b")),
        ("clf",  LinearSVC(max_iter=5000)),
    ])

    # 3) Cross-validate
    scores = cross_val_score(pipe, sents, labels, cv=5)
    print(f"{word:>10} CV scores: {scores} | mean={scores.mean():.3f}")

    # 4) Retrain on full data & save
    pipe.fit(sents, labels)
    joblib.dump(pipe, f"{word}_pipe.joblib")
    print(f"→ Saved {word}_pipe.joblib\n")

# Run for each target word
for w in ["conviction", "camper", "deed"]:
    train_and_save(w)


conviction CV scores: [0.75  1.    0.875 0.625 0.875] | mean=0.825
→ Saved conviction_pipe.joblib

    camper CV scores: [0.63636364 0.72727273 0.6        0.8        0.9       ] | mean=0.733
→ Saved camper_pipe.joblib

      deed CV scores: [0.875 0.625 0.875 0.625 0.625] | mean=0.725
→ Saved deed_pipe.joblib



In [None]:
import joblib

def WSD_Test_camper(sentences):
    return joblib.load("camper_pipe.joblib").predict(sentences).tolist()

def WSD_Test_conviction(sentences):
    return joblib.load("conviction_pipe.joblib").predict(sentences).tolist()

def WSD_Test_deed(sentences):
    return joblib.load("deed_pipe.joblib").predict(sentences).tolist()


In [None]:
def load_test(path):
    return [line.strip() for line in open(path, encoding="utf-8") if line.strip()]

for w in ["camper","conviction","deed"]:
    sents = load_test(f"{w}_test.txt")
    preds = globals()[f"WSD_Test_{w}"](sents)
    with open(f"result_{w}_YourName.txt","w") as out:
        out.write("\n".join(map(str, preds)))


In [None]:
# ensure you’ve already defined load_data (the sense-aware loader from before)
# and that LemmaTransformer etc. are importable in this notebook.

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib

words = ["conviction", "camper", "deed"]

for word in words:
    # 1) Load the full labeled data
    sents, labels = load_data(f"{word}.txt")

    # 2) Split off an unseen test set (30% of the examples)
    X_train, X_test, y_train, y_test = train_test_split(
        sents, labels,
        test_size=0.30,
        stratify=labels,
        random_state=42
    )

    # 3) Load your saved pipeline
    pipe = joblib.load(f"{word}_pipe.joblib")

    # 4) Predict on the hold-out
    preds = pipe.predict(X_test)

    # 5) Report
    print(f"\n=== {word.upper()} ===")
    print(f" Test size: {len(X_test)} sentences")
    print(" Accuracy :", accuracy_score(y_test, preds))
    print("\nClassification Report:")
    print(classification_report(y_test, preds, digits=3))



=== CONVICTION ===
 Test size: 12 sentences
 Accuracy : 1.0

Classification Report:
              precision    recall  f1-score   support

           1      1.000     1.000     1.000         6
           2      1.000     1.000     1.000         6

    accuracy                          1.000        12
   macro avg      1.000     1.000     1.000        12
weighted avg      1.000     1.000     1.000        12


=== CAMPER ===
 Test size: 16 sentences
 Accuracy : 1.0

Classification Report:
              precision    recall  f1-score   support

           1      1.000     1.000     1.000         8
           2      1.000     1.000     1.000         8

    accuracy                          1.000        16
   macro avg      1.000     1.000     1.000        16
weighted avg      1.000     1.000     1.000        16


=== DEED ===
 Test size: 12 sentences
 Accuracy : 1.0

Classification Report:
              precision    recall  f1-score   support

           1      1.000     1.000     1.000   

In [None]:
for w in ["camper","conviction","deed"]:
    sents = load_test(f"{w}_test.txt")
    preds = globals()[f"WSD_Test_{w}"](sents)
    print(w, "accuracy:", sum(int(p)==g for p,g in zip(preds, open(f"result_{w}_test.txt"))) / len(preds))


camper accuracy: 0.0
conviction accuracy: 0.0
deed accuracy: 0.0


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import cross_val_score
import joblib

def train_and_save(word):
    # 1) load the sense‐labeled data
    sents, labels = load_data(f"{word}.txt")

    # 2) build the exact same pipeline you used for conviction
    pipe = Pipeline([
        ("lemma" , LemmaTransformer()),
        ("tfidf" , TfidfVectorizer(ngram_range=(1,3), min_df=1, token_pattern=r"\b\w+\b")),
        ("clf"   , LinearSVC(max_iter=5000)),
    ])

    # 3) quick CV check
    scores = cross_val_score(pipe, sents, labels, cv=5)
    print(f"{word:>10} CV acc: {scores.mean():.3f}  →  {scores}")

    # 4) retrain on full data & save
    pipe.fit(sents, labels)
    joblib.dump(pipe, f"{word}_pipe.joblib")
    print(f"Saved pipeline to {word}_pipe.joblib\n")

# Train all three
for w in ["conviction","camper","deed"]:
    train_and_save(w)


conviction CV acc: 0.825  →  [0.75  1.    0.875 0.625 0.875]
Saved pipeline to conviction_pipe.joblib

    camper CV acc: 0.733  →  [0.63636364 0.72727273 0.6        0.8        0.9       ]
Saved pipeline to camper_pipe.joblib

      deed CV acc: 0.725  →  [0.875 0.625 0.875 0.625 0.625]
Saved pipeline to deed_pipe.joblib



In [None]:
def load_gold(path):
    labels = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            tok = line.strip()
            # only accept lines that are exactly “1” or “2”
            if tok in ("1","2"):
                labels.append(int(tok))
    return labels

# then in your loop:
gold = load_gold(f"result_{word}_test.txt")


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer

pipe = Pipeline([
    ("lemma" , LemmaTransformer()),               # spaCy lemmatizer
    ("tfidf" , TfidfVectorizer(token_pattern=r"\b\w+\b")),
    ("clf"   , LinearSVC()),
])

param_grid = {
    "tfidf__ngram_range": [(1,2), (1,3), (2,3)],
    "tfidf__min_df": [1,2],
    "clf__C": [0.1, 1, 10],
}

search = GridSearchCV(pipe, param_grid, cv=5, n_jobs=1, scoring="accuracy")
search.fit(train_sents, train_labels)

print("Best params:", search.best_params_)
print("Best CV:", search.best_score_)
best_pipe = search.best_estimator_


Best params: {'clf__C': 0.1, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 3)}
Best CV: 0.825


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
import joblib

# reuse your LemmaTransformer, param_grid, and pipe template
for word in ["conviction", "camper", "deed"]:
    sents, labels = load_data(f"{word}.txt")
    pipe = Pipeline([
        ("lemma", LemmaTransformer()),
        ("tfidf", TfidfVectorizer(token_pattern=r"\b\w+\b")),
        ("clf"  , LinearSVC()),
    ])
    search = GridSearchCV(pipe, param_grid, cv=5, n_jobs=1, scoring="accuracy")
    search.fit(sents, labels)
    print(f"{word.upper()} best params: {search.best_params_}, CV: {search.best_score_:.3f}")
    joblib.dump(search.best_estimator_, f"{word}_pipe.joblib")


CONVICTION best params: {'clf__C': 0.1, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 3)}, CV: 0.825
CAMPER best params: {'clf__C': 1, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 3)}, CV: 0.733
DEED best params: {'clf__C': 0.1, 'tfidf__min_df': 1, 'tfidf__ngram_range': (2, 3)}, CV: 0.775


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import ComplementNB
from sklearn.ensemble import RandomForestClassifier

extended_param_grid = {
    # TF–IDF variants
    "tfidf__analyzer":       ["word", "char_wb"],
    "tfidf__ngram_range":    [(1,2), (1,3), (2,3), (3,5)],
    "tfidf__min_df":         [1,2],
    # Classifier choices
    "clf": [
      LinearSVC(),
      LogisticRegression(max_iter=5000),
      ComplementNB(),
      RandomForestClassifier(n_estimators=100)
    ],
    # If you pick a classifier with C
    "clf__C": [0.1, 1, 10],         # will be ignored by NB/RF
}


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import ComplementNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer

pipe = Pipeline([
    ("lemma", LemmaTransformer()),
    ("tfidf", TfidfVectorizer()),      # configure analyzer, ngram_range, etc.
    ("clf",   LinearSVC()),             # placeholder — will be overridden by GridSearch
])

param_grids = [
    # 1) LinearSVC grid
    {
      "tfidf__analyzer":       ["word", "char_wb"],
      "tfidf__ngram_range":    [(1,2), (1,3), (2,3)],
      "tfidf__min_df":         [1,2],
      "clf":                   [LinearSVC()],
      "clf__C":                [0.1, 1, 10],
    },
    # 2) LogisticRegression grid
    {
      "tfidf__analyzer":       ["word", "char_wb"],
      "tfidf__ngram_range":    [(1,2), (1,3), (2,3)],
      "tfidf__min_df":         [1,2],
      "clf":                   [LogisticRegression(max_iter=5000)],
      "clf__C":                [0.1, 1, 10],
    },
    # 3) ComplementNB grid
    {
      "tfidf__analyzer":       ["word", "char_wb"],
      "tfidf__ngram_range":    [(1,2), (1,3), (2,3)],
      "tfidf__min_df":         [1,2],
      "clf":                   [ComplementNB()],
      "clf__alpha":            [0.1, 1, 10],
    },
    # 4) RandomForest grid
    {
      "tfidf__analyzer":       ["word", "char_wb"],
      "tfidf__ngram_range":    [(1,2), (1,3), (2,3)],
      "tfidf__min_df":         [1,2],
      "clf":                   [RandomForestClassifier()],
      "clf__n_estimators":     [100, 200],
    },
]

search = GridSearchCV(
    pipe,
    param_grids,    # notice: a *list* of dicts, not a single dict
    cv=5,
    n_jobs=1,       # or pre-lemmatize to safely use n_jobs=-1
    scoring="accuracy"
)
search.fit(train_sents, train_labels)

print("Best params:", search.best_params_)
print("Best CV   :", search.best_score_)
best_pipe = search.best_estimator_


Best params: {'clf': LinearSVC(), 'clf__C': 10, 'tfidf__analyzer': 'word', 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 3)}
Best CV   : 0.85


In [None]:
import joblib

for word in ["conviction","camper","deed"]:
    # 1) load the appropriate training data
    sents, labels = load_data(f"{word}.txt")

    # 2) attach it to the same pipeline & param_grids
    search = GridSearchCV(
        pipe,
        param_grids,    # list of dicts, as we defined
        cv=5,
        n_jobs=1,       # or -1 if you pre‐lemmatize
        scoring="accuracy"
    )

    # 3) fit & report
    search.fit(sents, labels)
    print(f"\n=== {word.upper()} ===")
    print(" Best params:", search.best_params_)
    print(" Best CV   :", search.best_score_)

    # 4) save the tuned pipeline
    joblib.dump(search.best_estimator_, f"{word}_pipe.joblib")



=== CONVICTION ===
 Best params: {'clf': LinearSVC(), 'clf__C': 10, 'tfidf__analyzer': 'word', 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 3)}
 Best CV   : 0.85

=== CAMPER ===
 Best params: {'clf': ComplementNB(), 'clf__alpha': 0.1, 'tfidf__analyzer': 'char_wb', 'tfidf__min_df': 1, 'tfidf__ngram_range': (2, 3)}
 Best CV   : 0.809090909090909

=== DEED ===
 Best params: {'clf': RandomForestClassifier(), 'clf__n_estimators': 100, 'tfidf__analyzer': 'word', 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 3)}
 Best CV   : 0.775


In [None]:
import nltk
nltk.download("wordnet")
nltk.download("omw-1.4")


#––– Imports –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
import re
import joblib
import numpy as np
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import cross_val_score
from sentence_transformers import SentenceTransformer
import spacy
from nltk.corpus import wordnet as wn

#––– Helpers –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
def load_data(path):
    """Load sense-labeled <sense> <sentence> files."""
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None,1)
            if len(parts)==2 and parts[0] in ("1","2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

class LemmaTransformer(TransformerMixin):
    """SpaCy lemmatizer."""
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm", disable=["parser","ner"])
    def fit(self, X, y=None): return self
    def transform(self, X):
        out = []
        for doc in self.nlp.pipe(X, batch_size=50):
            out.append(" ".join(tok.lemma_ for tok in doc if tok.is_alpha))
        return out

class SBERTEmbed(TransformerMixin):
    """Pre‐compute SBERT sentence embeddings."""
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.embedder = SentenceTransformer(model_name)
    def fit(self, X, y=None): return self
    def transform(self, X):
        return self.embedder.encode(X, convert_to_numpy=True)

class WordNetOverlap(TransformerMixin):
    """Count overlap with WordNet glosses of the two noun senses."""
    def __init__(self, word):
        syns = wn.synsets(word, pos=wn.NOUN)
        self.g1 = set(syns[0].definition().lower().split()) if len(syns)>0 else set()
        self.g2 = set(syns[1].definition().lower().split()) if len(syns)>1 else set()
    def fit(self, X, y=None): return self
    def transform(self, X):
        feats = []
        for sent in X:
            ws = set(w.lower() for w in re.findall(r"\b\w+\b", sent))
            feats.append([len(ws & self.g1), len(ws & self.g2)])
        return np.array(feats)

#––– Ensemble Training Loop –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
words = ["conviction","camper","deed"]
for word in words:
    # 1) Load data
    sents, labels = load_data(f"{word}.txt")

    # 2) Build Pipeline A: Lemma + TF–IDF (word+char) + SVM
    pipe_a = Pipeline([
        ("lemma", LemmaTransformer()),
        ("feats", FeatureUnion([
            ("tf_w", TfidfVectorizer(analyzer="word",    ngram_range=(1,3), min_df=1)),
            ("tf_c", TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), min_df=1)),
            ("wnov", WordNetOverlap(word)),
        ])),
        ("clf" , LinearSVC(C=10, max_iter=5000)),
    ])

    # 3) Build Pipeline B: SBERT embeddings → LogisticRegression
    pipe_b = Pipeline([
        ("embed", SBERTEmbed()),
        ("clf"  , LogisticRegression(C=1, max_iter=5000)),
    ])

    # 4) Ensemble via hard voting
    eclf = VotingClassifier(
        estimators=[("svm", pipe_a), ("lr", pipe_b)],
        voting="hard"
    )

    # 5) Cross‐validate the ensemble
    scores = cross_val_score(eclf, sents, labels, cv=5, n_jobs=1)
    print(f"{word.upper():>10} Ensemble CV mean: {scores.mean():.3f} | folds: {scores}")

    # 6) Fit on full data & save
    eclf.fit(sents, labels)
    joblib.dump(eclf, f"{word}_ensemble_pipe.joblib")
    print(f"→ Saved ensemble to {word}_ensemble_pipe.joblib\n")


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


CONVICTION Ensemble CV mean: 0.825 | folds: [0.5   1.    0.75  1.    0.875]
→ Saved ensemble to conviction_ensemble_pipe.joblib

    CAMPER Ensemble CV mean: 0.751 | folds: [0.72727273 0.72727273 0.7        0.7        0.9       ]
→ Saved ensemble to camper_ensemble_pipe.joblib

      DEED Ensemble CV mean: 0.700 | folds: [0.75  0.625 0.875 0.625 0.625]
→ Saved ensemble to deed_ensemble_pipe.joblib



In [None]:
from nltk import download
from nltk.corpus import wordnet as wn
download("wordnet"); download("omw-1.4")

class WordNetOverlap(TransformerMixin):
    def __init__(self, word):
        syns = wn.synsets(word, pos=wn.NOUN)
        self.g1 = set(syns[0].definition().lower().split()) if syns else set()
        self.g2 = set(syns[1].definition().lower().split()) if len(syns)>1 else set()
    def fit(self, X, y=None): return self
    def transform(self, X):
        feats = []
        for sent in X:
            ws = set(re.findall(r"\b\w+\b", sent.lower()))
            feats.append([len(ws & self.g1), len(ws & self.g2)])
        return np.array(feats)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
class WindowFeatures(TransformerMixin):
    def __init__(self, target):
        self.target = target.lower()
    def fit(self, X, y=None): return self
    def transform(self, X):
        feat_list = []
        for sent in X:
            toks = re.findall(r"\b\w+\b", sent.lower())
            if self.target in toks:
                i = toks.index(self.target)
                ctx = toks[max(i-2,0):i] + toks[i+1:i+3]
            else:
                ctx = []
            # Example vocabulary—tune on training data
            vocab = ["camp", "park", "kitchen", "drive", "lake", "bus",
                     "good", "volunteer", "property", "trust", "closing"]
            feat_list.append([int(v in ctx) for v in vocab])
        return np.array(feat_list)


In [None]:
# ─── Imports & Downloads ───────────────────────────────────────────────────────────────
import re
import joblib
import numpy as np
import nltk
nltk.download("wordnet")
nltk.download("omw-1.4")
from nltk.corpus import wordnet as wn

import spacy
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import ComplementNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import cross_val_score

# ─── Data Loader ───────────────────────────────────────────────────────────────────────
def load_data(path):
    """Load sense-labeled files where each line is: <sense> <sentence>."""
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None, 1)
            if len(parts) == 2 and parts[0] in ("1","2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

# ─── Transformers ─────────────────────────────────────────────────────────────────────
class LemmaTransformer(TransformerMixin):
    """SpaCy lemmatizer."""
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm", disable=["parser","ner"])
    def fit(self, X, y=None): return self
    def transform(self, X):
        out = []
        for doc in self.nlp.pipe(X, batch_size=50):
            out.append(" ".join(tok.lemma_ for tok in doc if tok.is_alpha))
        return out

class WordNetOverlap(TransformerMixin):
    """Counts overlap with the two noun glosses for a target word."""
    def __init__(self, word):
        syns = wn.synsets(word, pos=wn.NOUN)
        self.g1 = set(syns[0].definition().lower().split()) if len(syns)>0 else set()
        self.g2 = set(syns[1].definition().lower().split()) if len(syns)>1 else set()
    def fit(self, X, y=None): return self
    def transform(self, X):
        feats = []
        for sent in X:
            ws = set(re.findall(r"\b\w+\b", sent.lower()))
            feats.append([len(ws & self.g1), len(ws & self.g2)])
        return np.array(feats)

class WindowFeatures(TransformerMixin):
    """Binary flags for a small context window around the target word."""
    def __init__(self, target):
        self.target = target.lower()
        # you can tune this vocabulary based on your training data
        self.vocab = ["camp","park","kitchen","drive","lake","bus",
                      "good","volunteer","property","trust","closing"]
    def fit(self, X, y=None): return self
    def transform(self, X):
        rows = []
        for sent in X:
            toks = re.findall(r"\b\w+\b", sent.lower())
            if self.target in toks:
                i = toks.index(self.target)
                ctx = toks[max(i-2,0):i] + toks[i+1:i+3]
            else:
                ctx = []
            rows.append([int(v in ctx) for v in self.vocab])
        return np.array(rows)

# ─── Training & Saving Ensembles ───────────────────────────────────────────────────────
for word in ["conviction","camper","deed"]:
    # 1) load data
    sents, labels = load_data(f"{word}.txt")

    # 2) shared FeatureUnion
    union = FeatureUnion([
        ("tf_w",  TfidfVectorizer(analyzer="word",    ngram_range=(1,3), min_df=1)),
        ("tf_c",  TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), min_df=1)),
        ("wnov",  WordNetOverlap(word)),
        ("wind",  WindowFeatures(word)),
    ])

    # 3) base learner A: SVM
    pipe_svm = Pipeline([
        ("lemma", LemmaTransformer()),
        ("feat",  union),
        ("clf",   LinearSVC(C=1, max_iter=5000)),
    ])

    # 4) base learner B: Complement Naive Bayes
    pipe_nb = Pipeline([
        ("lemma", LemmaTransformer()),
        ("feat",  union),
        ("clf",   ComplementNB(alpha=0.1)),
    ])

    # 5) stacking classifier
    stack = StackingClassifier(
        estimators=[("svm", pipe_svm), ("nb", pipe_nb)],
        final_estimator=LogisticRegression(max_iter=5000),
        cv=5,
        n_jobs=1
    )

    # 6) cross-validate
    scores = cross_val_score(stack, sents, labels, cv=5)
    print(f"{word.upper():>10} stacking CV mean: {scores.mean():.3f} | folds: {scores}")

    # 7) retrain on all data & save
    stack.fit(sents, labels)
    joblib.dump(stack, f"{word}_stack_pipe.joblib")
    print(f"→ Saved ensemble to {word}_stack_pipe.joblib\n")


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


CONVICTION stacking CV mean: 0.875 | folds: [0.75  1.    0.875 0.75  1.   ]
→ Saved ensemble to conviction_stack_pipe.joblib

    CAMPER stacking CV mean: 0.809 | folds: [0.81818182 0.72727273 0.7        0.9        0.9       ]
→ Saved ensemble to camper_stack_pipe.joblib

      DEED stacking CV mean: 0.700 | folds: [0.875 0.75  0.75  0.5   0.625]
→ Saved ensemble to deed_stack_pipe.joblib



In [None]:
import nltk

# ─── Download needed NLTK resources ───────────────────────────────────────────────
nltk.download("punkt")                        # sentence / word tokenizer
nltk.download("punkt_tab")
nltk.download("averaged_perceptron_tagger")   # POS tagger
nltk.download("wordnet")                      # WordNet corpus
nltk.download("omw-1.4")                      # WordNet multilingual data
nltk.download('averaged_perceptron_tagger_eng')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [None]:
import random
from nltk import pos_tag, word_tokenize, punkt
from nltk.corpus import wordnet as wn

def synonym_augment(sentences, labels, n_aug=2):
    """Generate n_aug new sentences per original by swapping one noun with a synonym."""
    new_sents, new_labels = [], []
    for sent, lab in zip(sentences, labels):
        tokens = word_tokenize(sent)
        tagged  = pos_tag(tokens)
        nouns   = [i for i,(w,tag) in enumerate(tagged) if tag.startswith("NN")]
        if not nouns:
            continue
        for _ in range(n_aug):
            i = random.choice(nouns)
            synsets = wn.synsets(tokens[i], pos=wn.NOUN)
            # pick a random synonym lemma
            lemmas = [l.name().replace("_"," ") for syn in synsets for l in syn.lemmas() if l.name().lower()!=tokens[i].lower()]
            if not lemmas:
                continue
            new_tok = random.choice(lemmas)
            aug = tokens.copy()
            aug[i] = new_tok
            new_sents.append(" ".join(aug))
            new_labels.append(lab)
    return sentences + new_sents, labels + new_labels

# Usage before training deed:
train_sents, train_labels = load_data("deed.txt")
aug_sents, aug_labels = synonym_augment(train_sents, train_labels, n_aug=3)
print("Original:", len(train_sents), "Augmented:", len(aug_sents))


Original: 40 Augmented: 143


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

class GlossSimilarity(TransformerMixin):
    """Encodes the two glosses and returns their cosine sims to each sentence."""
    def __init__(self, word, model_name="all-MiniLM-L6-v2"):
        self.embedder = SentenceTransformer(model_name)
        syns = wn.synsets(word, pos=wn.NOUN)[:2]
        glosses = [syn.definition() for syn in syns]
        self.g_embs = self.embedder.encode(glosses, convert_to_numpy=True)
    def fit(self, X, y=None): return self
    def transform(self, X):
        s_embs = self.embedder.encode(X, convert_to_numpy=True)
        feats  = []
        for emb in s_embs:
            sims = cosine_similarity(emb.reshape(1,-1), self.g_embs).flatten().tolist()
            feats.append(sims)
        return np.array(feats)


In [None]:
union = FeatureUnion([
  ("tf_w", TfidfVectorizer(analyzer="word", ngram_range=(1,3), min_df=1)),
  ("tf_c", TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), min_df=1)),
  ("wnov", WordNetOverlap("deed")),
  ("wind", WindowFeatures("deed")),
  ("glos", GlossSimilarity("deed")),
])
pipe_deed = Pipeline([
  ("lemma", LemmaTransformer()),
  ("feat" , union),
  ("clf"  , RandomForestClassifier(n_estimators=200, random_state=42)),
])
scores = cross_val_score(pipe_deed, aug_sents, aug_labels, cv=5)
print("Deed w/ gloss & augment CV:", scores.mean(), scores)


Deed w/ gloss & augment CV: 0.993103448275862 [1.         0.96551724 1.         1.         1.        ]


In [None]:
# assuming `stack` is your final fitted StackingClassifier for "deed"
stack.fit(aug_sents, aug_labels)
joblib.dump(stack, "deed_stack_pipe.joblib")


['deed_stack_pipe.joblib']

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import cross_val_score
import joblib

words = ["deed", "camper", "conviction"]

for word in words:
    # 1) load and augment
    train_sents, train_labels = load_data(f"{word}.txt")
    aug_sents, aug_labels   = synonym_augment(train_sents, train_labels, n_aug=3)
    print(f"{word.upper():} Original: {len(train_sents)}, Augmented: {len(aug_sents)}")

    # 2) build FeatureUnion for this word
    union = FeatureUnion([
      ("tf_w",  TfidfVectorizer(analyzer="word",    ngram_range=(1,3), min_df=1)),
      ("tf_c",  TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), min_df=1)),
      ("wnov",  WordNetOverlap(word)),
      ("wind",  WindowFeatures(word)),
      ("glos",  GlossSimilarity(word)),
    ])

    # 3) pipeline
    pipe = Pipeline([
      ("lemma", LemmaTransformer()),
      ("feat" , union),
      ("clf"  , RandomForestClassifier(n_estimators=200, random_state=42)),
    ])

    # 4) CV evaluation
    scores = cross_val_score(pipe, aug_sents, aug_labels, cv=5)
    print(f"{word.upper()} w/ gloss & augment CV: {scores.mean():.3f}  folds: {scores}")

    # 5) train on full augmented data & save
    pipe.fit(aug_sents, aug_labels)
    joblib.dump(pipe, f"{word}_stack_pipe_augmented.joblib")
    print(f"→ Saved {word}_stack_pipe_augmented.joblib\n")


DEED Original: 40, Augmented: 153
DEED w/ gloss & augment CV: 1.000  folds: [1. 1. 1. 1. 1.]
→ Saved deed_stack_pipe_augmented.joblib

CAMPER Original: 52, Augmented: 199
CAMPER w/ gloss & augment CV: 0.995  folds: [1.    1.    1.    0.975 1.   ]
→ Saved camper_stack_pipe_augmented.joblib

CONVICTION Original: 40, Augmented: 149
CONVICTION w/ gloss & augment CV: 1.000  folds: [1. 1. 1. 1. 1.]
→ Saved conviction_stack_pipe_augmented.joblib



In [None]:
def load_test_sentences(path):
    with open(path, encoding="utf-8") as f:
        return [
            line.strip() for line in f
            if line.strip() and not line.lstrip().startswith("#")
        ]

def load_gold_labels(path):
    labs = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            tok = line.strip()
            if tok in ("1","2"):           # only accept numeric labels
                labs.append(int(tok))
    return labs

# Quick length‐check
for w in ["camper","conviction","deed"]:
    sents = load_test_sentences(f"{w}_test.txt")
    gold  = load_gold_labels(    f"result_{w}_test.txt")
    print(f"{w}: #sents={len(sents)}, #labels={len(gold)}")

# Then your evaluation:
from sklearn.metrics import accuracy_score, classification_report
import joblib

for w in ["camper","conviction","deed"]:
    sents = load_test_sentences(f"{w}_test.txt")
    gold  = load_gold_labels(    f"result_{w}_test.txt")
    pipe  = joblib.load(f"{w}_stack_pipe.joblib")
    preds = pipe.predict(sents)
    print(f"\n=== {w.upper()} TEST ===")
    print("Accuracy:", f"{accuracy_score(gold,preds):.2%}")
    print(classification_report(gold,preds, digits=3))


camper: #sents=10, #labels=10
conviction: #sents=10, #labels=10
deed: #sents=10, #labels=10

=== CAMPER TEST ===
Accuracy: 70.00%
              precision    recall  f1-score   support

           1      0.667     0.800     0.727         5
           2      0.750     0.600     0.667         5

    accuracy                          0.700        10
   macro avg      0.708     0.700     0.697        10
weighted avg      0.708     0.700     0.697        10


=== CONVICTION TEST ===
Accuracy: 70.00%
              precision    recall  f1-score   support

           1      0.750     0.600     0.667         5
           2      0.667     0.800     0.727         5

    accuracy                          0.700        10
   macro avg      0.708     0.700     0.697        10
weighted avg      0.708     0.700     0.697        10


=== DEED TEST ===
Accuracy: 80.00%
              precision    recall  f1-score   support

           1      0.714     1.000     0.833         5
           2      1.000     0

In [None]:
def load_test_sentences(path):
    with open(path, encoding="utf-8") as f:
        return [
            line.strip() for line in f
            if line.strip() and not line.lstrip().startswith("#")
        ]

def load_gold_labels(path):
    labs = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            tok = line.strip()
            if tok in ("1","2"):           # only accept numeric labels
                labs.append(int(tok))
    return labs

# Quick length‐check
for w in ["camper","conviction","deed"]:
    sents = load_test_sentences(f"{w}_test.txt")
    gold  = load_gold_labels(    f"result_{w}_test.txt")
    print(f"{w}: #sents={len(sents)}, #labels={len(gold)}")

# Then your evaluation:
from sklearn.metrics import accuracy_score, classification_report
import joblib

for w in ["camper","conviction","deed"]:
    sents = load_test_sentences(f"{w}_test.txt")
    gold  = load_gold_labels(    f"result_{w}_test.txt")
    pipe  = joblib.load(f"{w}_stack_pipe_augmented.joblib")
    preds = pipe.predict(sents)
    print(f"\n=== {w.upper()} TEST ===")
    print("Accuracy:", f"{accuracy_score(gold,preds):.2%}")
    print(classification_report(gold,preds, digits=3))


camper: #sents=10, #labels=10
conviction: #sents=10, #labels=10
deed: #sents=10, #labels=10

=== CAMPER TEST ===
Accuracy: 30.00%
              precision    recall  f1-score   support

           1      0.250     0.200     0.222         5
           2      0.333     0.400     0.364         5

    accuracy                          0.300        10
   macro avg      0.292     0.300     0.293        10
weighted avg      0.292     0.300     0.293        10


=== CONVICTION TEST ===
Accuracy: 70.00%
              precision    recall  f1-score   support

           1      1.000     0.400     0.571         5
           2      0.625     1.000     0.769         5

    accuracy                          0.700        10
   macro avg      0.812     0.700     0.670        10
weighted avg      0.812     0.700     0.670        10


=== DEED TEST ===
Accuracy: 90.00%
              precision    recall  f1-score   support

           1      0.833     1.000     0.909         5
           2      1.000     0

In [None]:
import joblib
from sklearn.metrics import accuracy_score, classification_report

def load_test_sentences(path):
    return [L.strip() for L in open(path, encoding="utf-8") if L.strip() and not L.lstrip().startswith("#")]

def load_gold_labels(path):
    return [int(L.strip()) for L in open(path, encoding="utf-8") if L.strip() in ("1","2")]

test_sents = load_test_sentences("camper_test.txt")
gold       = load_gold_labels(    "result_camper_test.txt")

# load original (non-augmented) stacked pipeline
pipe_camper = joblib.load("camper_stack_pipe.joblib")

preds = pipe_camper.predict(test_sents)
print("CAMPER non-augmented TEST Accuracy:", f"{accuracy_score(gold, preds):.2%}")
print(classification_report(gold, preds, digits=3))


CAMPER non-augmented TEST Accuracy: 70.00%
              precision    recall  f1-score   support

           1      0.667     0.800     0.727         5
           2      0.750     0.600     0.667         5

    accuracy                          0.700        10
   macro avg      0.708     0.700     0.697        10
weighted avg      0.708     0.700     0.697        10



In [None]:
# ─── Imports ─────────────────────────────────────────────────────────────────────────
from sentence_transformers import SentenceTransformer
import numpy as np
import joblib
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, classification_report

# ─── Load & Embed Helpers ──────────────────────────────────────────────────────────
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def load_data(path):
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None,1)
            if len(parts)==2 and parts[0] in ("1","2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

def load_test_sents(path):
    return [L.strip() for L in open(path,encoding="utf-8") if L.strip() and not L.startswith("#")]

def load_gold(path):
    return [int(L.strip()) for L in open(path,encoding="utf-8") if L.strip() in ("1","2")]

# ─── Main Loop ─────────────────────────────────────────────────────────────────────
words = ["camper","conviction","deed"]
test_size = 10  # we know our synthetic tests each have 10 sentences

for word in words:
    print(f"\n=== {word.upper()} ===")
    # 1) Load (optionally augmented) data
    sents, labs = load_data(f"{word}.txt")
    # If you want augmentation, uncomment the next two lines:
    # sents, labs = synonym_augment(sents, labs, n_aug=2)
    print(f"Training on {len(sents)} examples")

    # 2) Embed all training sentences
    X = embedder.encode(sents, convert_to_numpy=True)
    y = np.array(labs)

    # 3) Hyperparameter search for MLP hidden layer size
    param_grid = {
        "hidden_layer_sizes": [(50,), (100,), (50,50)],
        "alpha": [1e-4, 1e-3],
    }
    grid = GridSearchCV(MLPClassifier(max_iter=1000, random_state=42),
                        param_grid, cv=5, n_jobs=1, scoring="accuracy")
    grid.fit(X, y)
    print(" Best MLP params:", grid.best_params_, "CV:", f"{grid.best_score_:.3f}")

    # 4) Final MLP on full data, save
    mlp = grid.best_estimator_
    mlp.fit(X, y)
    joblib.dump(mlp, f"{word}_mlp_sbert.joblib")

    # 5) Evaluation on synthetic test
    test_sents = load_test_sents(f"{word}_test.txt")
    test_gold  = load_gold(   f"result_{word}_test.txt")
    X_test     = embedder.encode(test_sents, convert_to_numpy=True)
    preds      = mlp.predict(X_test)
    acc        = accuracy_score(test_gold, preds)
    print(f" Synthetic TEST accuracy: {acc:.2%}")
    print(classification_report(test_gold, preds, digits=3))



=== CAMPER ===
Training on 52 examples
 Best MLP params: {'alpha': 0.0001, 'hidden_layer_sizes': (50,)} CV: 0.831
 Synthetic TEST accuracy: 100.00%
              precision    recall  f1-score   support

           1      1.000     1.000     1.000         5
           2      1.000     1.000     1.000         5

    accuracy                          1.000        10
   macro avg      1.000     1.000     1.000        10
weighted avg      1.000     1.000     1.000        10


=== CONVICTION ===
Training on 40 examples
 Best MLP params: {'alpha': 0.0001, 'hidden_layer_sizes': (50,)} CV: 0.975
 Synthetic TEST accuracy: 90.00%
              precision    recall  f1-score   support

           1      1.000     0.800     0.889         5
           2      0.833     1.000     0.909         5

    accuracy                          0.900        10
   macro avg      0.917     0.900     0.899        10
weighted avg      0.917     0.900     0.899        10


=== DEED ===
Training on 40 examples
 Best M

In [None]:
import joblib
from sklearn.metrics import accuracy_score, classification_report

def load_test_sentences(path):
    """Read unlabeled test sentences, skipping blank/comment lines."""
    with open(path, encoding="utf-8") as f:
        return [l.strip() for l in f if l.strip() and not l.lstrip().startswith("#")]

def load_gold_labels(path):
    """Read gold labels (1 or 2) from a file, skipping any non-numeric lines."""
    labs = []
    with open(path, encoding="utf-8") as f:
        for l in f:
            tok = l.strip()
            if tok in ("1","2"):
                labs.append(int(tok))
    return labs

def evaluate_model(model_path, test_txt, gold_txt):
    """
    Loads a model and test files, runs predict, and prints metrics.

    :param model_path: str path to your .joblib model
    :param test_txt:    str path to the test sentences file
    :param gold_txt:    str path to the gold labels file
    """
    # Load model
    model = joblib.load(model_path)

    # Load data
    sents = load_test_sentences(test_txt)
    gold  = load_gold_labels(gold_txt)

    # Predict
    preds = model.predict(sents)

    # Report
    print(f"\n=== Evaluating {model_path} on {test_txt} ===")
    print(f"Test size: {len(sents)} sentences")
    print(f"Accuracy : {accuracy_score(gold, preds):.2%}\n")
    print(classification_report(gold, preds, digits=3))


if __name__ == "__main__":
    # Example usage on your “new” synthetic tests:
    evaluate_model("camper_mlp_sbert.joblib",      "camper_new_test.txt",      "result_camper_new_test.txt")
    evaluate_model("conviction_mlp_sbert.joblib", "conviction_new_test.txt", "result_conviction_new_test.txt")
    evaluate_model("deed_mlp_sbert.joblib",        "deed_new_test.txt",        "result_deed_new_test.txt")

    # On Tuesday you can swap in the real Canvas files, e.g.:
    # evaluate_model("camper_mlp_sbert.joblib",      "camper_test.txt",      "result_camper_YourName.txt")
    # evaluate_model("conviction_mlp_sbert.joblib", "conviction_test.txt", "result_conviction_YourName.txt")
    # evaluate_model("deed_mlp_sbert.joblib",        "deed_test.txt",        "result_deed_YourName.txt")


ValueError: Expected 2D array, got 1D array instead:
array=['She backed the camper into the campsite’s narrow alleyway.'
 'At summer camp, every camper learns to canoe and tie knots.'
 'The camper’s battery died after we forgot to turn off the lights.'
 'Each camper must sign in before breakfast service.'
 'We spotted a vintage Airstream camper on the beach road.'
 'The campers cheered as the archery instructor demonstrated proper form.'
 'After hours on the trail, the group set up their camper for the night.'
 'A day-camp camper sprained her ankle during the nature hike.'
 'He installed solar panels on his off-road camper conversion.'
 'The junior campers painted their cabins with bright colors.'].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [21]:
# ─── Evaluation Helpers ─────────────────────────────────────────────────────────────
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics import accuracy_score, classification_report

embedder = SentenceTransformer("all-MiniLM-L6-v2")

def load_test_sentences(path):
    with open(path, encoding="utf-8") as f:
        return [l.strip() for l in f if l.strip() and not l.lstrip().startswith("#")]

def load_gold_labels(path):
    return [int(l.strip()) for l in open(path, encoding="utf-8") if l.strip() in ("1","2")]

def evaluate_mlp(model_path, test_txt, gold_txt):
    # Load model and test data
    mlp = joblib.load(model_path)
    sents = load_test_sentences(test_txt)
    gold  = load_gold_labels(gold_txt)
    # Embed before predicting
    X_test = embedder.encode(sents, convert_to_numpy=True)
    preds  = mlp.predict(X_test)
    # Report
    print(f"\n=== {model_path} on {test_txt} ===")
    print(f"Test size: {len(sents)} sentences")
    print(f"Accuracy : {accuracy_score(gold, preds):.2%}\n")
    print(classification_report(gold, preds, digits=3))

# ─── Run evaluation on the new synthetic tests ──────────────────────────────────────
evaluate_mlp("camper_mlp_sbert.joblib",      "camper_new_test.txt",      "result_camper_new_test.txt")
evaluate_mlp("conviction_mlp_sbert.joblib", "conviction_new_test.txt", "result_conviction_new_test.txt")
evaluate_mlp("deed_mlp_sbert.joblib",        "deed_new_test.txt",        "result_deed_new_test.txt")



=== camper_mlp_sbert.joblib on camper_new_test.txt ===
Test size: 10 sentences
Accuracy : 80.00%

              precision    recall  f1-score   support

           1      1.000     0.600     0.750         5
           2      0.714     1.000     0.833         5

    accuracy                          0.800        10
   macro avg      0.857     0.800     0.792        10
weighted avg      0.857     0.800     0.792        10


=== conviction_mlp_sbert.joblib on conviction_new_test.txt ===
Test size: 10 sentences
Accuracy : 100.00%

              precision    recall  f1-score   support

           1      1.000     1.000     1.000         5
           2      1.000     1.000     1.000         5

    accuracy                          1.000        10
   macro avg      1.000     1.000     1.000        10
weighted avg      1.000     1.000     1.000        10


=== deed_mlp_sbert.joblib on deed_new_test.txt ===
Test size: 10 sentences
Accuracy : 90.00%

              precision    recall  f1-score 

In [None]:
# ─── Hyperparameter Tuning for the CAMPER MLP on SBERT Embeddings ───────────────────
from sentence_transformers import SentenceTransformer
import numpy as np
import joblib
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

# 1) Load & embed camper training data
def load_data(path):
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None,1)
            if len(parts)==2 and parts[0] in ("1","2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

train_sents, train_labels = load_data("camper.txt")
embedder = SentenceTransformer("all-MiniLM-L6-v2")
X_train = embedder.encode(train_sents, convert_to_numpy=True)
y_train = np.array(train_labels)

# 2) Define a more extensive grid
param_grid = {
    "hidden_layer_sizes": [(25,), (50,), (100,), (50,50), (100,50)],
    "alpha": [1e-5, 1e-4, 1e-3],
    "learning_rate_init": [1e-4, 5e-4, 1e-3],
}

# 3) Run GridSearchCV
grid = GridSearchCV(
    MLPClassifier(max_iter=2000, random_state=42),
    param_grid,
    cv=5,
    n_jobs=1,
    scoring="accuracy"
)
grid.fit(X_train, y_train)

print("Best parameters for CAMPER MLP:", grid.best_params_)
print("Best CV accuracy        :", f"{grid.best_score_:.3f}")

# 4) Retrain final model with best params and save
best_mlp = grid.best_estimator_
best_mlp.fit(X_train, y_train)
joblib.dump(best_mlp, "camper_mlp_sbert_tuned.joblib")

# 5) Evaluate on the synthetic test set
def load_test_sents(path):
    return [l.strip() for l in open(path, encoding="utf-8") if l.strip() and not l.startswith("#")]

def load_gold(path):
    return [int(l.strip()) for l in open(path, encoding="utf-8") if l.strip() in ("1","2")]

test_sents = load_test_sents("camper_new_test.txt")
test_gold  = load_gold("result_camper_new_test.txt")
X_test     = embedder.encode(test_sents, convert_to_numpy=True)
preds      = best_mlp.predict(X_test)

print("\n=== CAMPER Tuned MLP Synthetic Test ===")
print("Accuracy :", f"{accuracy_score(test_gold, preds):.2%}")
print(classification_report(test_gold, preds, digits=3))


Best parameters for CAMPER MLP: {'alpha': 1e-05, 'hidden_layer_sizes': (50,), 'learning_rate_init': 0.0001}
Best CV accuracy        : 0.849

=== CAMPER Tuned MLP Synthetic Test ===
Accuracy : 80.00%
              precision    recall  f1-score   support

           1      1.000     0.600     0.750         5
           2      0.714     1.000     0.833         5

    accuracy                          0.800        10
   macro avg      0.857     0.800     0.792        10
weighted avg      0.857     0.800     0.792        10



In [None]:
# ─── Setup ───────────────────────────────────────────────────────────────────────
!pip install -q -U sentence-transformers nltk
import nltk
nltk.download("wordnet")
nltk.download("omw-1.4")
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
import joblib
from nltk.corpus import wordnet as wn
import random

# ─── Synonym Augmentation ────────────────────────────────────────────────────────
def synonym_augment(sents, labels, n_aug=2):
    def augment_sent(sent):
        words = sent.split()
        new_sents = []
        for _ in range(n_aug):
            aug = []
            for w in words:
                syns = wn.synsets(w)
                lemmas = {l.name().replace("_", " ") for s in syns for l in s.lemmas() if l.name().lower() != w.lower()}
                if lemmas:
                    aug.append(random.choice(list(lemmas)))
                else:
                    aug.append(w)
            new_sents.append(" ".join(aug))
        return new_sents

    aug_sents, aug_labels = [], []
    for sent, label in zip(sents, labels):
        new_versions = augment_sent(sent)
        aug_sents.extend(new_versions)
        aug_labels.extend([label] * len(new_versions))
    return sents + aug_sents, labels + aug_labels

# ─── Data Loaders ────────────────────────────────────────────────────────────────
def load_data(path):
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None, 1)
            if len(parts) == 2 and parts[0] in ("1", "2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

def load_test_sents(path):
    return [L.strip() for L in open(path, encoding="utf-8") if L.strip() and not L.startswith("#")]

def load_gold(path):
    return [int(L.strip()) for L in open(path, encoding="utf-8") if L.strip() in ("1", "2")]

# ─── MLP Pipeline with Augmentation and SBERT ─────────────────────────────────────
from sklearn.pipeline import Pipeline

print("\n=== Improving CAMPER ===")
sents, labels = load_data("camper.txt")
sents_aug, labels_aug = synonym_augment(sents, labels, n_aug=2)
print("Original:", len(sents), "Augmented:", len(sents_aug))

embedder = SentenceTransformer("all-MiniLM-L12-v2")
X_train = embedder.encode(sents_aug, convert_to_numpy=True)
y_train = np.array(labels_aug)

param_grid = {
    "hidden_layer_sizes": [(50,), (100,), (50, 50), (100, 50)],
    "alpha": [1e-4, 1e-5, 1e-6],
}
grid = GridSearchCV(MLPClassifier(max_iter=1500, random_state=42), param_grid, cv=5, n_jobs=-1, scoring="accuracy")
grid.fit(X_train, y_train)

print(" Best MLP params:", grid.best_params_, "CV:", f"{grid.best_score_:.3f}")

best_model = grid.best_estimator_
joblib.dump(best_model, "camper_mlp_sbert_aug.joblib")

# ─── Evaluation ──────────────────────────────────────────────────────────────────
test_sents = load_test_sents("camper_new_test.txt")
test_labels = load_gold("result_camper_new_test.txt")
X_test = embedder.encode(test_sents, convert_to_numpy=True)
preds = best_model.predict(X_test)

print("\n=== CAMPER MLP + SBERT (Augmented) TEST ===")
print("Accuracy:", f"{accuracy_score(test_labels, preds):.2%}")
print(classification_report(test_labels, preds, digits=3))


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.7/345.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m102.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m47.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!



=== Improving CAMPER ===
Original: 52 Augmented: 156


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

 Best MLP params: {'alpha': 0.0001, 'hidden_layer_sizes': (100,)} CV: 0.903

=== CAMPER MLP + SBERT (Augmented) TEST ===
Accuracy: 100.00%
              precision    recall  f1-score   support

           1      1.000     1.000     1.000         5
           2      1.000     1.000     1.000         5

    accuracy                          1.000        10
   macro avg      1.000     1.000     1.000        10
weighted avg      1.000     1.000     1.000        10



In [None]:
# Creates new test sets and label files for camper, conviction, and deed

test_data = {
    "camper": {
        "sents": [
            "The seasoned camper pitched his tent under the stars.",
            "Every camper received a safety whistle before the hike.",
            "She was a happy camper until the rain started pouring.",
            "The campground assigned a plot to each registered camper.",
            "The camper filled his canteen at the freshwater spring.",
            "The luxury camper included a built-in kitchenette and bathroom.",
            "They hauled their camper across state lines every summer.",
            "A snowstorm stranded the couple in their camper overnight.",
            "The camper’s fuel tank needed to be refilled at the next stop.",
            "Their camper was custom-built for off-grid adventures.",
        ],
        "labels": [2, 2, 2, 2, 2, 1, 1, 1, 1, 1]
    },
    "conviction": {
        "sents": [
            "Her voice trembled but was filled with conviction.",
            "He held his beliefs with deep personal conviction.",
            "The speech was delivered with unwavering conviction.",
            "You could hear the conviction in his words.",
            "The candidate’s conviction won over many voters.",
            "The court overturned the original conviction after new evidence surfaced.",
            "He is appealing his conviction on the grounds of misconduct.",
            "The judge cited prior conviction as a reason for the sentence.",
            "DNA testing led to his wrongful conviction being dismissed.",
            "The prosecutor pushed for a swift conviction.",
        ],
        "labels": [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
    },
    "deed": {
        "sents": [
            "She was honored for her brave deed during the rescue.",
            "His final deed was to donate everything to charity.",
            "No good deed goes unnoticed in this town.",
            "That heroic deed saved three lives.",
            "Their kind deed inspired others to help.",
            "The deed listed both owners as joint tenants.",
            "He brought the property deed to the lawyer.",
            "The original deed was filed in 1978.",
            "They disputed the name on the deed.",
            "She inherited the deed to the estate.",
        ],
        "labels": [2, 2, 2, 2, 2, 1, 1, 1, 1, 1]
    }
}

for word, data in test_data.items():
    # Write sentence file
    with open(f"{word}_newer_test.txt", "w", encoding="utf-8") as f_out:
        for line in data["sents"]:
            f_out.write(line + "\n")

    # Write label file
    with open(f"result_{word}_newer_test.txt", "w", encoding="utf-8") as f_out:
        for label in data["labels"]:
            f_out.write(str(label) + "\n")

print("Test and label files created for camper, conviction, and deed.")


Test and label files created for camper, conviction, and deed.


In [None]:
# ─── Run evaluation on the new synthetic tests ──────────────────────────────────────
evaluate_mlp("camper_mlp_sbert.joblib",      "camper_new_test.txt",      "result_camper_new_test.txt")
evaluate_mlp("conviction_mlp_sbert.joblib", "conviction_new_test.txt", "result_conviction_new_test.txt")
evaluate_mlp("deed_mlp_sbert.joblib",        "deed_new_test.txt",        "result_deed_new_test.txt")

In [None]:
# Run these after generating the *_newer_test.txt files and loading your trained models

evaluate_mlp("camper_mlp_sbert_aug.joblib", "camper_newer_test.txt", "result_camper_newer_test.txt")
evaluate_mlp("conviction_mlp_sbert.joblib", "conviction_newer_test.txt", "result_conviction_newer_test.txt")
evaluate_mlp("deed_mlp_sbert.joblib", "deed_newer_test.txt", "result_deed_newer_test.txt")



=== camper_mlp_sbert_aug.joblib on camper_newer_test.txt ===
Test size: 10 sentences
Accuracy : 30.00%

              precision    recall  f1-score   support

           1      0.375     0.600     0.462         5
           2      0.000     0.000     0.000         5

    accuracy                          0.300        10
   macro avg      0.188     0.300     0.231        10
weighted avg      0.188     0.300     0.231        10


=== conviction_mlp_sbert.joblib on conviction_newer_test.txt ===
Test size: 10 sentences
Accuracy : 100.00%

              precision    recall  f1-score   support

           1      1.000     1.000     1.000         5
           2      1.000     1.000     1.000         5

    accuracy                          1.000        10
   macro avg      1.000     1.000     1.000        10
weighted avg      1.000     1.000     1.000        10


=== deed_mlp_sbert.joblib on deed_newer_test.txt ===
Test size: 10 sentences
Accuracy : 10.00%

              precision    recall 

In [22]:
import os
from glob import glob

def auto_evaluate_mlp(word, model_file=None):
    """Evaluate all test/result file pairs for a given word using a saved model."""
    if model_file is None:
        model_file = f"{word}_mlp_sbert.joblib"
    if not os.path.exists(model_file):
        raise FileNotFoundError(f"Model file not found: {model_file}")

    # Find all matching *_<word>_test.txt files (e.g., conviction_test.txt, conviction_newer_test.txt)
    test_files = [f for f in glob(f"*{word}*test.txt") if not f.startswith("result_")]

    for test_path in test_files:
        result_path = f"result_{os.path.basename(test_path)}"
        if not os.path.exists(result_path):
            print(f"⚠️  No matching label file found for: {result_path}")
            continue
        print(f"\n=== Running model on: {test_path} ===")
        evaluate_mlp(model_file, test_path, result_path)


# Example usage:
auto_evaluate_mlp("conviction")
# Or later:
# auto_evaluate_mlp("camper")
# auto_evaluate_mlp("deed")



=== Running model on: sample_conviction_test.txt ===

=== conviction_mlp_sbert.joblib on sample_conviction_test.txt ===
Test size: 4 sentences
Accuracy : 100.00%

              precision    recall  f1-score   support

           1      1.000     1.000     1.000         1
           2      1.000     1.000     1.000         3

    accuracy                          1.000         4
   macro avg      1.000     1.000     1.000         4
weighted avg      1.000     1.000     1.000         4


=== Running model on: conviction_test.txt ===

=== conviction_mlp_sbert.joblib on conviction_test.txt ===
Test size: 10 sentences
Accuracy : 90.00%

              precision    recall  f1-score   support

           1      1.000     0.800     0.889         5
           2      0.833     1.000     0.909         5

    accuracy                          0.900        10
   macro avg      0.917     0.900     0.899        10
weighted avg      0.917     0.900     0.899        10


=== Running model on: convictio

In [None]:
evaluate_mlp("conviction_mlp_sbert.joblib", "conviction_hard_test.txt", "result_conviction_hard_test.txt")



=== conviction_mlp_sbert.joblib on conviction_hard_test.txt ===
Test size: 10 sentences
Accuracy : 40.00%

              precision    recall  f1-score   support

           1      0.000     0.000     0.000         5
           2      0.444     0.800     0.571         5

    accuracy                          0.400        10
   macro avg      0.222     0.400     0.286        10
weighted avg      0.222     0.400     0.286        10



In [None]:
import os
import numpy as np
import joblib
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer
from sklearn.metrics import accuracy_score, classification_report
from collections import Counter, defaultdict

# Load training data
def load_labeled_data(path):
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None, 1)
            if len(parts) == 2 and parts[0] in ("1", "2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

# Load unlabeled test sentences
def load_test_sentences(path):
    with open(path, encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip() and not line.startswith("#")]

# Load test labels
def load_gold_labels(path):
    with open(path, encoding="utf-8") as f:
        return [int(line.strip()) for line in f if line.strip() in ("1", "2")]

# Train KMeans WSD model and evaluate on all test files for a given word
def run_unsupervised_wsd(word, embedder):
    print(f"\n=== {word.upper()} ===")

    # Load training data and embed
    train_path = f"{word}.txt"
    train_sents, train_labels = load_labeled_data(train_path)
    X_train = embedder.encode(train_sents, convert_to_numpy=True)

    # Train KMeans on sentence embeddings
    km = KMeans(n_clusters=2, random_state=42, n_init=10)
    km.fit(X_train)
    cluster_labels = km.labels_

    # Map each cluster to the majority sense label
    cluster_to_sense = {}
    for c in [0, 1]:
        sense = Counter(l for l, k in zip(train_labels, cluster_labels) if k == c).most_common(1)[0][0]
        cluster_to_sense[c] = sense

    # Evaluate on all matching test files
    for fname in os.listdir():
        if fname.startswith(word) and fname.endswith(".txt") and not fname.startswith("result_"):
            test_sents = load_test_sentences(fname)
            result_name = f"result_{fname}"
            if os.path.exists(result_name):
                gold = load_gold_labels(result_name)
                X_test = embedder.encode(test_sents, convert_to_numpy=True)
                test_clusters = km.predict(X_test)
                preds = [cluster_to_sense[c] for c in test_clusters]

                print(f"\n=== {word.upper()} KMeans on {fname} ===")
                print(f"Accuracy: {accuracy_score(gold, preds):.2%}")
                print(classification_report(gold, preds, digits=3))
            else:
                print(f"⚠️  No matching label file for: {result_name}")

# Run for all words
embedder = SentenceTransformer("all-MiniLM-L6-v2")
for word in ["camper", "conviction", "deed"]:
    run_unsupervised_wsd(word, embedder)



=== CAMPER ===
⚠️  No matching label file for: result_camper.txt

=== CAMPER KMeans on camper_test.txt ===
Accuracy: 70.00%
              precision    recall  f1-score   support

           1      1.000     0.400     0.571         5
           2      0.625     1.000     0.769         5

    accuracy                          0.700        10
   macro avg      0.812     0.700     0.670        10
weighted avg      0.812     0.700     0.670        10


=== CAMPER KMeans on camper_new_test.txt ===
Accuracy: 70.00%
              precision    recall  f1-score   support

           1      1.000     0.400     0.571         5
           2      0.625     1.000     0.769         5

    accuracy                          0.700        10
   macro avg      0.812     0.700     0.670        10
weighted avg      0.812     0.700     0.670        10


=== CAMPER KMeans on camper_newer_test.txt ===
Accuracy: 30.00%
              precision    recall  f1-score   support

           1      0.000     0.000     

# Train Script

In [None]:
import joblib
import numpy as np
import nltk
import re
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline, FeatureUnion
from sentence_transformers import SentenceTransformer

# Download required NLTK data
nltk.download("punkt")
nltk.download('punkt_tab')
nltk.download("wordnet")
nltk.download("omw-1.4")

embedder = SentenceTransformer("all-MiniLM-L6-v2")
lemmatizer = WordNetLemmatizer()

# ─── Preprocessing & Feature Extractors ─────────────────────────────
class LemmaTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        return [" ".join(lemmatizer.lemmatize(w) for w in word_tokenize(s.lower())) for s in X]

class WordNetOverlap(BaseEstimator, TransformerMixin):
    def __init__(self, word): self.word = word
    def fit(self, X, y=None): return self
    def transform(self, X):
        senses = wn.synsets(self.word, pos=wn.NOUN)[:2]
        gloss_sets = [set(word_tokenize(s.definition().lower())) for s in senses]
        feats = []
        for sent in X:
            toks = set(word_tokenize(sent.lower()))
            feats.append([len(toks & gloss_sets[0]), len(toks & gloss_sets[1])])
        return np.array(feats)

class WindowFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, word): self.word = word
    def fit(self, X, y=None): return self
    def transform(self, X):
        feats = []
        for sent in X:
            toks = word_tokenize(sent.lower())
            idx = toks.index(self.word) if self.word in toks else -1
            window = toks[max(0, idx-2): idx+3]
            feats.append([len(window), sum(1 for t in window if t in ("the", "a", "an"))])
        return np.array(feats)

class GlossSimilarity(BaseEstimator, TransformerMixin):
    def __init__(self, word):
        self.word = word

    def fit(self, X, y=None): return self

    def transform(self, X):
        embedder = SentenceTransformer("all-MiniLM-L6-v2")  # reload here, avoid storing
        senses = wn.synsets(self.word, pos=wn.NOUN)[:2]
        gloss_vecs = embedder.encode([s.definition() for s in senses])
        sent_vecs = embedder.encode(X)
        return np.array([[np.dot(s, gloss_vecs[0]), np.dot(s, gloss_vecs[1])] for s in sent_vecs])


# ─── Data Loading & Augmentation ────────────────────────────────────
def load_data(path):
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None, 1)
            if len(parts) == 2 and parts[0] in ("1", "2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

def synonym_augment(sents, labels, n_aug=2):
    aug_sents, aug_labels = [], []
    for s, l in zip(sents, labels):
        tokens = word_tokenize(s)
        for i, word in enumerate(tokens):
            syns = wn.synsets(word)
            if syns:
                lemmas = set(lemma.replace("_", " ") for syn in syns for lemma in syn.lemma_names())
                for lemma in list(lemmas)[:n_aug]:
                    new = tokens[:i] + [lemma] + tokens[i+1:]
                    aug_sents.append(" ".join(new))
                    aug_labels.append(l)
    return sents + aug_sents, labels + aug_labels

# ─── Training Scripts ───────────────────────────────────────────────
def build_conviction_model():
    sents, labels = load_data("conviction_extended.txt")
    X = embedder.encode(sents)
    model = MLPClassifier(hidden_layer_sizes=(50,), alpha=0.0001, max_iter=1000, random_state=42)
    model.fit(X, labels)
    joblib.dump(model, "conviction_mlp_sbert.joblib")
    print("✅ Conviction model saved.")

def build_camper_model():
    sents, labels = load_data("camper_extended.txt")
    s_aug, l_aug = synonym_augment(sents, labels, n_aug=2)
    X = embedder.encode(s_aug)
    model = MLPClassifier(hidden_layer_sizes=(50, 50), alpha=0.0001, max_iter=1000, random_state=42)
    model.fit(X, l_aug)
    joblib.dump(model, "camper_mlp_sbert_aug.joblib")
    print("✅ Camper model saved.")

def build_deed_model():
    sents, labels = load_data("deed_extended.txt")
    s_aug, l_aug = synonym_augment(sents, labels, n_aug=3)
    union = FeatureUnion([
        ("tf_w", TfidfVectorizer(analyzer="word", ngram_range=(1,3), min_df=1)),
        ("tf_c", TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), min_df=1)),
        ("wnov", WordNetOverlap("deed")),
        ("wind", WindowFeatures("deed")),
        ("glos", GlossSimilarity("deed")),
    ])
    pipe = Pipeline([
        ("lemma", LemmaTransformer()),
        ("feat", union),
        ("clf", RandomForestClassifier(n_estimators=200, random_state=42)),
    ])
    pipe.fit(s_aug, l_aug)
    joblib.dump(pipe, "deed_stack_pipe.joblib")
    print("✅ Deed model saved.")

if __name__ == "__main__":
    build_conviction_model()
    build_camper_model()
    build_deed_model()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


✅ Conviction model saved.
✅ Camper model saved.
✅ Deed model saved.


# Eval Script

In [None]:
import os
import re
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics import accuracy_score, classification_report

# Load embedder only once
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# === Helper Functions ===
def load_test_sentences(path):
    with open(path, encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip() and not line.startswith("#")]

def load_gold_labels(path):
    with open(path, encoding="utf-8") as f:
        return [int(line.strip()) for line in f if line.strip() in ("1", "2")]

def evaluate_model(model_path, test_path, label_path, debug=False):
    sents = load_test_sentences(test_path)
    gold = load_gold_labels(label_path)
    model = joblib.load(model_path)

    if "deed" in model_path.lower():
        X_test = sents
    else:
        X_test = embedder.encode(sents, convert_to_numpy=True)

    preds = model.predict(X_test)

    print(f"\n=== {os.path.basename(model_path)} on {os.path.basename(test_path)} ===")
    print(f"Test size: {len(sents)} sentences")
    print("Accuracy:", f"{accuracy_score(gold, preds):.2%}")
    print(classification_report(gold, preds, digits=3))

    if debug:
        print("\nSentence-by-sentence predictions:")
        for i, (s, g, p) in enumerate(zip(sents, gold, preds)):
            correctness = "✅" if g == p else "❌"
            print(f"{i+1:02d}. [{correctness}] GOLD: {g} | PRED: {p} | {s}")

# === Main Evaluation Loop ===
def run_all_tests_for_word(word, debug = False):
    model_file_map = {
        "camper": "camper_mlp_sbert_aug.joblib",
        "conviction": "conviction_mlp_sbert.joblib",
        "deed": "deed_stack_pipe.joblib",
    }

    model_path = model_file_map[word.lower()]
    pattern = re.compile(rf"{word}(_\w+)?_test\.txt$")
    all_files = os.listdir(".")

    for test_file in sorted(f for f in all_files if pattern.fullmatch(f)):
        base = re.sub(r"\.txt$", "", test_file)
        label_file = f"result_{base}.txt"

        if not os.path.exists(label_file):
            print(f"\n⚠️  No matching label file found for: {label_file}")
            continue
        if debug:
            evaluate_model(model_path, test_file, label_file, debug=True)
        else:
            evaluate_model(model_path, test_file, label_file)

# Example usage:
if __name__ == "__main__":
    for w in ["camper", "conviction", "deed"]:
        print(f"\n=== {w.upper()} ===")
        run_all_tests_for_word(w, debug = True)



=== CAMPER ===

=== camper_mlp_sbert_aug.joblib on camper_new_test.txt ===
Test size: 10 sentences
Accuracy: 80.00%
              precision    recall  f1-score   support

           1      0.800     0.800     0.800         5
           2      0.800     0.800     0.800         5

    accuracy                          0.800        10
   macro avg      0.800     0.800     0.800        10
weighted avg      0.800     0.800     0.800        10


Sentence-by-sentence predictions:
01. [✅] GOLD: 1 | PRED: 1 | She backed the camper into the campsite’s narrow alleyway.
02. [✅] GOLD: 2 | PRED: 2 | At summer camp, every camper learns to canoe and tie knots.
03. [✅] GOLD: 1 | PRED: 1 | The camper’s battery died after we forgot to turn off the lights.
04. [✅] GOLD: 2 | PRED: 2 | Each camper must sign in before breakfast service.
05. [✅] GOLD: 1 | PRED: 1 | We spotted a vintage Airstream camper on the beach road.
06. [✅] GOLD: 2 | PRED: 2 | The campers cheered as the archery instructor demonstrated p

#  “cs5322s25.py” module

In [None]:
import os
import joblib
from sentence_transformers import SentenceTransformer
from train import LemmaTransformer, GlossSimilarity, WindowFeatures, WordNetOverlap

def WSD_Test_camper(sentences):
    """
    Takes a list of sentences containing 'camper' and returns predicted sense labels (1 or 2).
    Loads the SBERT embedder and camper model inside the function as required.
    """
    embedder = SentenceTransformer("all-MiniLM-L6-v2")
    model = joblib.load("camper_mlp_sbert_aug.joblib")
    embeddings = embedder.encode(sentences, convert_to_numpy=True)
    predictions = model.predict(embeddings)
    return predictions.tolist()

def WSD_Test_conviction(sentences):
    """
    Takes a list of sentences containing 'conviction' and returns predicted sense labels (1 or 2).
    Loads the SBERT embedder and conviction model inside the function.
    """
    embedder = SentenceTransformer("all-MiniLM-L6-v2")
    model = joblib.load("conviction_mlp_sbert.joblib")
    embeddings = embedder.encode(sentences, convert_to_numpy=True)
    predictions = model.predict(embeddings)
    return predictions.tolist()

def WSD_Test_deed(sentences):
    """
    Takes a list of sentences containing 'deed' and returns predicted sense labels (1 or 2).
    Loads the pipeline model (which includes its own preprocessing).
    """
    model = joblib.load("deed_stack_pipe.joblib")
    predictions = model.predict(sentences)
    return predictions.tolist()

def run_test(firstname, lastname):
    name_tag = f"{firstname.lower()}{lastname.lower()}"
    test_files = {
        "camper": "camper_test.txt",
        "conviction": "conviction_test.txt",
        "deed": "deed_test.txt",
    }

    functions = {
        "camper": WSD_Test_camper,
        "conviction": WSD_Test_conviction,
        "deed": WSD_Test_deed,
    }

    for word, file in test_files.items():
        if not os.path.exists(file):
            print(f"Missing test file: {file}")
            continue

        # Load sentences
        with open(file, encoding="utf-8") as f:
            sentences = [line.strip() for line in f if line.strip()]

        # Run prediction
        print(f"Predicting senses for {word}...")
        results = functions[word](sentences)

        # Save to file
        out_file = f"result_{word}_{name_tag}.txt"
        with open(out_file, "w", encoding="utf-8") as f:
            for label in results:
                f.write(str(label) + "\n")

        print(f"Saved results to: {out_file}")


if __name__ == "__main__":
    run_test("Harley", "Gribble")

Predicting senses for camper...
Saved results to: result_camper_harleygribble.txt
Predicting senses for conviction...
Saved results to: result_conviction_harleygribble.txt
Predicting senses for deed...
Saved results to: result_deed_harleygribble.txt


## test.py

In [None]:
from cs5322s25 import WSD_Test_camper, WSD_Test_conviction, WSD_Test_deed
from train import LemmaTransformer, GlossSimilarity, WindowFeatures, WordNetOverlap

# Example usage:
if __name__ == "__main__":
        print(WSD_Test_camper(["They parked their camper at the lakeside.", "Each camper brought a sleeping bag."])) # expected 1, 2
        print(WSD_Test_conviction(["He was convicted for murder.", "She spoke with great conviction."])) # expected 2, 1
        print(WSD_Test_deed(["He signed the deed to the property.", "Her brave deed saved a child."])) # 1, 2


[1, 2]
[2, 1]
[1, 2]


# eval.py

In [26]:
import os
import re
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics import accuracy_score, classification_report
from train import LemmaTransformer, GlossSimilarity, WindowFeatures, WordNetOverlap

# Load embedder only once
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# === Helper Functions ===
def load_test_sentences(path):
    with open(path, encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip() and not line.startswith("#")]

def load_gold_labels(path):
    with open(path, encoding="utf-8") as f:
        return [int(line.strip()) for line in f if line.strip() in ("1", "2")]

def evaluate_model(model_path, test_path, label_path, debug=False):
    sents = load_test_sentences(test_path)
    gold = load_gold_labels(label_path)
    model = joblib.load(model_path)

    if "deed" in model_path.lower():
        X_test = sents
    else:
        X_test = embedder.encode(sents, convert_to_numpy=True)
    # X_test = embedder.encode(sents, convert_to_numpy=True)
    preds = model.predict(X_test)

    print(f"\n=== {os.path.basename(model_path)} on {os.path.basename(test_path)} ===")
    print(f"Test size: {len(sents)} sentences")
    print("Accuracy:", f"{accuracy_score(gold, preds):.2%}")
    print(classification_report(gold, preds, digits=3))

    if debug:
        print("\nSentence-by-sentence predictions:")
        for i, (s, g, p) in enumerate(zip(sents, gold, preds)):
            correctness = "✅" if g == p else "❌"
            print(f"{i+1:02d}. [{correctness}] GOLD: {g} | PRED: {p} | {s}")

# === Main Evaluation Loop ===
def run_all_tests_for_word(word, debug = False):
    model_file_map = {
        "camper": "camper_mlp_sbert_aug.joblib",
        "conviction": "conviction_mlp_sbert.joblib",
        "deed": "deed_stack_pipe.joblib"
        #"deed": "deed_mlp_sbert.joblib"
    }

    model_path = model_file_map[word.lower()]
    pattern = re.compile(rf"{word}(_\w+)?_test\.txt$")
    all_files = os.listdir(".")

    for test_file in sorted(f for f in all_files if pattern.fullmatch(f)):
        base = re.sub(r"\.txt$", "", test_file)
        label_file = f"result_{base}.txt"

        if not os.path.exists(label_file):
            print(f"\n⚠️  No matching label file found for: {label_file}")
            continue
        if debug:
            evaluate_model(model_path, test_file, label_file, debug=True)
        else:
            evaluate_model(model_path, test_file, label_file)

# Example usage:
if __name__ == "__main__":
    for w in ["camper", "conviction", "deed"]:
        print(f"\n=== {w.upper()} ===")
        run_all_tests_for_word(w, debug = True)



=== CAMPER ===

=== camper_mlp_sbert_aug.joblib on camper_new_test.txt ===
Test size: 10 sentences
Accuracy: 80.00%
              precision    recall  f1-score   support

           1      0.800     0.800     0.800         5
           2      0.800     0.800     0.800         5

    accuracy                          0.800        10
   macro avg      0.800     0.800     0.800        10
weighted avg      0.800     0.800     0.800        10


Sentence-by-sentence predictions:
01. [✅] GOLD: 1 | PRED: 1 | She backed the camper into the campsite’s narrow alleyway.
02. [✅] GOLD: 2 | PRED: 2 | At summer camp, every camper learns to canoe and tie knots.
03. [✅] GOLD: 1 | PRED: 1 | The camper’s battery died after we forgot to turn off the lights.
04. [✅] GOLD: 2 | PRED: 2 | Each camper must sign in before breakfast service.
05. [✅] GOLD: 1 | PRED: 1 | We spotted a vintage Airstream camper on the beach road.
06. [✅] GOLD: 2 | PRED: 2 | The campers cheered as the archery instructor demonstrated p

# train.py

In [18]:
import joblib
import numpy as np
import nltk
import re
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline, FeatureUnion
from sentence_transformers import SentenceTransformer

# Download required NLTK data
nltk.download("punkt")
nltk.download('punkt_tab')
nltk.download("wordnet")
nltk.download("omw-1.4")

embedder = SentenceTransformer("all-MiniLM-L6-v2")
lemmatizer = WordNetLemmatizer()

# ─── Preprocessing & Feature Extractors ─────────────────────────────
class LemmaTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        return [" ".join(lemmatizer.lemmatize(w) for w in word_tokenize(s.lower())) for s in X]

class WordNetOverlap(BaseEstimator, TransformerMixin):
    def __init__(self, word): self.word = word
    def fit(self, X, y=None): return self
    def transform(self, X):
        senses = wn.synsets(self.word, pos=wn.NOUN)[:2]
        gloss_sets = [set(word_tokenize(s.definition().lower())) for s in senses]
        feats = []
        for sent in X:
            toks = set(word_tokenize(sent.lower()))
            feats.append([len(toks & gloss_sets[0]), len(toks & gloss_sets[1])])
        return np.array(feats)

class WindowFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, word): self.word = word
    def fit(self, X, y=None): return self
    def transform(self, X):
        feats = []
        for sent in X:
            toks = word_tokenize(sent.lower())
            idx = toks.index(self.word) if self.word in toks else -1
            window = toks[max(0, idx-2): idx+3]
            feats.append([len(window), sum(1 for t in window if t in ("the", "a", "an"))])
        return np.array(feats)

class GlossSimilarity(BaseEstimator, TransformerMixin):
    def __init__(self, word):
        self.word = word

    def fit(self, X, y=None): return self

    def transform(self, X):
        embedder = SentenceTransformer("all-MiniLM-L6-v2")  # reload here, avoid storing
        senses = wn.synsets(self.word, pos=wn.NOUN)[:2]
        gloss_vecs = embedder.encode([s.definition() for s in senses])
        sent_vecs = embedder.encode(X)
        return np.array([[np.dot(s, gloss_vecs[0]), np.dot(s, gloss_vecs[1])] for s in sent_vecs])


# ─── Data Loading & Augmentation ────────────────────────────────────
def load_data(path):
    sents, labs = [], []
    with open(path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(None, 1)
            if len(parts) == 2 and parts[0] in ("1", "2"):
                labs.append(int(parts[0]))
                sents.append(parts[1])
    return sents, labs

def synonym_augment(sents, labels, n_aug=2):
    aug_sents, aug_labels = [], []
    for s, l in zip(sents, labels):
        tokens = word_tokenize(s)
        for i, word in enumerate(tokens):
            syns = wn.synsets(word)
            if syns:
                lemmas = set(lemma.replace("_", " ") for syn in syns for lemma in syn.lemma_names())
                for lemma in list(lemmas)[:n_aug]:
                    new = tokens[:i] + [lemma] + tokens[i+1:]
                    aug_sents.append(" ".join(new))
                    aug_labels.append(l)
    return sents + aug_sents, labels + aug_labels

# ─── Training Scripts ───────────────────────────────────────────────
def build_conviction_model():
    sents, labels = load_data("conviction_extended.txt")
    X = embedder.encode(sents)
    model = MLPClassifier(hidden_layer_sizes=(50,), alpha=0.0001, max_iter=1000, random_state=42)
    model.fit(X, labels)
    joblib.dump(model, "conviction_mlp_sbert.joblib")
    print("Conviction model saved.")

def build_camper_model():
    sents, labels = load_data("camper_extended.txt")
    s_aug, l_aug = synonym_augment(sents, labels, n_aug=2)
    X = embedder.encode(s_aug)
    model = MLPClassifier(hidden_layer_sizes=(50, 50), alpha=0.0001, max_iter=1000, random_state=42)
    model.fit(X, l_aug)
    joblib.dump(model, "camper_mlp_sbert_aug.joblib")
    print("Camper model saved.")

def build_deed_model():
    sents, labels = load_data("deed_extended.txt")
    s_aug, l_aug = synonym_augment(sents, labels, n_aug=3)
    union = FeatureUnion([
        ("tf_w", TfidfVectorizer(analyzer="word", ngram_range=(1,3), min_df=1)),
        ("tf_c", TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), min_df=1)),
        ("wnov", WordNetOverlap("deed")),
        ("wind", WindowFeatures("deed")),
        ("glos", GlossSimilarity("deed")),
    ])
    pipe = Pipeline([
        ("lemma", LemmaTransformer()),
        ("feat", union),
        ("clf", RandomForestClassifier(n_estimators=200, random_state=42)),
    ])
    pipe.fit(s_aug, l_aug)
    joblib.dump(pipe, "deed_stack_pipe.joblib")
    print("Deed model saved.")

if __name__ == "__main__":
    build_conviction_model()
    build_camper_model()
    build_deed_model()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Conviction model saved.
Camper model saved.
Deed model saved.
