In [1]:
import wsd_code as wc

## Explore instances and word meanings

In [30]:
def show_instance(word):
    # All instances in the corpus containing the given word
    instances = wc.senseval.instances('{}.pos'.format(word))
    
    print("'{}' appears {} times".format(word, len(instances)))
    print("The word has {} different meanings.".format(
        len(wc.senses('{}.pos'.format(word)))))

    # Show the context andd infor of the first context
    print(instances[0])
    print(instances[1])
    print()
    
for w in "hard interest serve".split():
    show_instance(w)

'hard' appears 4333 times
The word has 3 different meanings.
SensevalInstance(word='hard-a', position=20, context=[('``', '``'), ('he', 'PRP'), ('may', 'MD'), ('lose', 'VB'), ('all', 'DT'), ('popular', 'JJ'), ('support', 'NN'), (',', ','), ('but', 'CC'), ('someone', 'NN'), ('has', 'VBZ'), ('to', 'TO'), ('kill', 'VB'), ('him', 'PRP'), ('to', 'TO'), ('defeat', 'VB'), ('him', 'PRP'), ('and', 'CC'), ('that', 'DT'), ("'s", 'VBZ'), ('hard', 'JJ'), ('to', 'TO'), ('do', 'VB'), ('.', '.'), ("''", "''")], senses=('HARD1',))
SensevalInstance(word='hard-a', position=10, context=[('clever', 'NNP'), ('white', 'NNP'), ('house', 'NNP'), ('``', '``'), ('spin', 'VB'), ('doctors', 'NNS'), ("''", "''"), ('are', 'VBP'), ('having', 'VBG'), ('a', 'DT'), ('hard', 'JJ'), ('time', 'NN'), ('helping', 'VBG'), ('president', 'NNP'), ('bush', 'NNP'), ('explain', 'VB'), ('away', 'RB'), ('the', 'DT'), ('economic', 'JJ'), ('bashing', 'NN'), ('that', 'IN'), ('low-and', 'JJ'), ('middle-income', 'JJ'), ('workers', 'NNS'),

## Baseline classifiers

In [10]:
import nltk

What is the **random baseline** for the words 'hard' and 'interest'?

In [26]:
hard_bl = 1.0 / len(wc.senses('hard.pos'))
interest_bl = 1.0 / len(wc.senses('interest.pos'))
serve_bl = 1.0 / len(wc.senses('serve.pos'))

print("Random baseline for hard: %s" % hard_bl)
print("Random interest for interest: %s" % interest_bl)
print("Random interest for serve: %s" % serve_bl)

Random baseline for hard: 0.3333333333333333
Random interest for interest: 0.16666666666666666
Random interest for serve: 0.25


The **majority baseline** is the accuracy we get, if we always guess the most frequent sense.

In [29]:
def get_majority_baseline(word):
    dist = nltk.FreqDist([
        i.senses[0] for i in wc.senseval.instances('%s.pos' % word)
    ])
    most_frequent_sense = max(dist.keys(), key=(lambda k: dist[k]))
    
    return dist.freq(most_frequent_sense)
    
print("Majority baseline for 'hard': %s" % get_majority_baseline("hard"))
print("Majority baseline for 'interest': %s" % get_majority_baseline("interest"))
print("Majority baseline for 'serve': %s" % get_majority_baseline("serve"))

Majority baseline for 'hard': 0.797369028386799
Majority baseline for 'interest': 0.5287162162162162
Majority baseline for 'serve': 0.4143444495203289


## Naive Bayes classifiers

In [28]:
target_words = "hard interest serve".split()

for features in [wc.wsd_word_features, wc.wsd_context_features]:
    for word in target_words:
        print("Training classifier for %s..." % word)
        clf = wc.wsd_classifier(
            nltk.NaiveBayesClassifier.train,
            "%s.pos" % word,
            features)
        print()
    print("----------------------------")

Training classifier for hard...
Reading data...
 Senses: HARD2 HARD1 HARD3
Training classifier...
Testing classifier...
Accuracy: 0.8178

Training classifier for interest...
Reading data...
 Senses: interest_5 interest_6 interest_1 interest_3 interest_4 interest_2
Training classifier...
Testing classifier...
Accuracy: 0.5549

Training classifier for serve...
Reading data...
 Senses: SERVE6 SERVE12 SERVE2 SERVE10
Training classifier...
Testing classifier...
Accuracy: 0.4760

----------------------------
Training classifier for hard...
Reading data...
 Senses: HARD2 HARD1 HARD3
Training classifier...
Testing classifier...
Accuracy: 0.8950

Training classifier for interest...
Reading data...
 Senses: interest_5 interest_6 interest_1 interest_3 interest_4 interest_2
Training classifier...
Testing classifier...
Accuracy: 0.4283

Training classifier for serve...
Reading data...
 Senses: SERVE6 SERVE12 SERVE2 SERVE10
Training classifier...
Testing classifier...
Accuracy: 0.8345

-------------

### Results

|Features | hard | interest | serve |
|--|--|--|--|
| random baseline  | 0.3333 | 0.1666 | 0.25 |
| majority baseline  | 0.7973 | 0.5287 | 0.4143 |
|word_features | 0.8178 |  **0.5549** | 0.4760|
|context | **0.8950** |  0.4283 | **0.8345**|


Why is 'interest' worse for the context features?

The senses occur in very similar contexts, so the context features are not helpful. Word features seem to take a greater context.

# Assignment 4: Semantics

## 1.1) 'hard'

In [48]:
target_words = "hard interest serve".split()

word = "hard"
clf = wc.wsd_classifier(
    nltk.NaiveBayesClassifier.train,
    "%s.pos" % word,
    wc.wsd_context_features,
    confusion_matrix=True,
    log=True)

Reading data...
 Senses: HARD2 HARD1 HARD3
Training classifier...
Testing classifier...
Accuracy: 0.8950
Writing errors to errors.txt
      |   H   H   H |
      |   A   A   A |
      |   R   R   R |
      |   D   D   D |
      |   1   2   3 |
------+-------------+
HARD1 |<643> 39  20 |
HARD2 |   6 <73>  9 |
HARD3 |   5  12 <60>|
------+-------------+
(row = reference; col = test)



In [47]:
from pprint import pprint

m = [
        [643, 39, 20],
        [  6, 73,  9],
        [  5, 12, 60]
]

m_r = [[float("%8.5f" % (x/sum(line))) for x in line] for line in m]
print("\n".join([str(l) for l in m_r]))

[0.91595, 0.05556, 0.02849]
[0.06818, 0.82955, 0.10227]
[0.06494, 0.15584, 0.77922]


Most difficult: HARD3 is confused by HARD2

Sense for HARD3:
`resisting weight or pressure: “a hard rock”`

Sense for HARD2:
`dispassionate “a hard bargainer”`

### Examples

