# Word Disambiguator Using Wordnet Synsets
https://github.com/kevincobain2000/sentiment_classifier/blob/master/scripts/senti_classifier

In [None]:
from __future__ import unicode_literals, print_function
import os
import re
import sys
import codecs
import nltk
import argparse
import operator
from nltk.corpus import wordnet as wn
from collections import defaultdict
import cPickle as pickle
from pkg_resources import resource_string, resource_stream
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews

In [None]:
"""
Word Disambiguator using nltk
Sentiment Classifier as a combination of
  -Bag of Words (nltk movie review corpus, words as features)
  -Heuristics
  
--KATHURIA Pulkit
"""


def word_similarity(word1, word2):
    w1synsets = wn.synsets(word1)
    w2synsets = wn.synsets(word2)
    maxsim = 0
    for w1s in w1synsets:
        for w2s in w2synsets:
            current = wn.path_similarity(w1s, w2s)
            if (current > maxsim and current > 0):
                maxsim = current
    return maxsim


def disambiguateWordSenses(sentence, word):
    wordsynsets = wn.synsets(word)
    bestScore = 0.0
    result = None
    for synset in wordsynsets:
        for w in nltk.word_tokenize(sentence):
            score = 0.0
            for wsynset in wn.synsets(w):
                sim = wn.path_similarity(wsynset, synset)
                if sim is None:
                    continue
                else:
                    score += sim
            if score > bestScore:
                bestScore = score
                result = synset
    return result


def SentiWordNet_to_pickle(swn):
    synsets_scores = defaultdict(list)
    for senti_synset in swn.all_senti_synsets():
        if senti_synset.synset.name not in synsets_scores:
            synsets_scores[senti_synset.synset.name] = defaultdict(float)
        synsets_scores[senti_synset.synset.name]['pos'] += senti_synset.pos_score
        synsets_scores[senti_synset.synset.name]['neg'] += senti_synset.neg_score
    return synsets_scores


def classify(text, synsets_scores, bag_of_words):
    #synsets_scores = pickled object in data/SentiWN.p
    pos = neg = 0
    for line in text:
        if not line.strip() or line.startswith('#'):
            continue
        for sentence in line.split('.'):
            sentence = sentence.strip()
            sent_score_pos = sent_score_neg = 0
            for word in sentence.split():
                if disambiguateWordSenses(sentence, word):
                    disamb_syn = disambiguateWordSenses(sentence, word).name
                    if disamb_syn in synsets_scores:
                        #uncomment the disamb_syn.split... if also want to check synsets polarity
                        if word.lower() in bag_of_words['neg']:
                            sent_score_neg += synsets_scores[disamb_syn]['neg']
                        if word.lower() in bag_of_words['pos']:
                            sent_score_pos += synsets_scores[disamb_syn]['pos']
            pos += sent_score_pos
            neg += sent_score_neg
    return pos, neg


senti_pickle = resource_stream('senti_classifier', 'data/SentiWn.p')
bag_of_words_pickle = resource_stream('senti_classifier', 'data/bag_of_words.p')
synsets_scores = pickle.load(senti_pickle)
bag_of_words = pickle.load(bag_of_words_pickle)
bag_of_words = classify_polarity(bag_of_words)


def polarity_scores(lines_list):
    scorer = defaultdict(list)
    pos, neg = classify(lines_list, synsets_scores, bag_of_words)
    return pos, neg

In [None]:
if __name__ == "__main__":
    #print polarity_scores(['Excellent','Worst'])
    parser = argparse.ArgumentParser(add_help=True)
    parser = argparse.ArgumentParser(description='Sentiment classification')
    parser.add_argument('-c', '--classify', action="store",
                        nargs='*', dest="files", type=argparse.FileType('rt'),
                        help='-c reviews')
    myarguments = parser.parse_args()
    if not myarguments.files:
        parser.print_help()
        exit("Documentation: %s" % __documentation__)
    for file in myarguments.files:
        tpos = 0
        tneg = 0
        for lineno, line in enumerate(file.readlines()):
            line = line.strip()
            if len(line) == 0:
                continue
            r = re.compile("[,.?()\\d]+ *")
            lines_list = r.split(line)
            pos, neg = polarity_scores(lines_list)
            print('{0:<40}... pos = {1:<5} \tneg = {2:<5}'.format(str(lineno)+'. ' + line[:20],pos,neg))
            tpos += pos
            tneg += neg
        print('-' * 75)
        if tpos > tneg:
            positive = file.name + ' ' + 'is Positive'
            print('{0:<40}... pos = {1:<5} \tneg = {2:<5}'.format(positive, tpos, tneg))
        else:
            negative = file.name + ' ' + 'is Negative'
            print('{0:<40}... pos = {1:<5} \tneg = {2:<5}'.format(negative, tpos, tneg))
        print('Overall score of document\nTotal Pos = %s\nTotal Neg = %s'%(tpos, tneg))
        print('-'*75)

## SEE FAVORITES FOR CURATED LINKS

## PYWSD
https://github.com/alvations/pywsd

## Short Simple Lesk Example + Wordnet Interface
https://stackoverflow.com/questions/20896278/word-sense-disambiguation-algorithm-in-python

## WSD using online encyclopedia
https://github.com/liuhuanyong/WordMultiSenseDisambiguation/blob/master/wordsense_detect.py

## Bidirectional LSTM
https://github.com/Jeff09/Word-Sense-Disambiguation-using-Bidirectional-LSTM

## WSD Based On RDF Graph with Pyrhon Wrapper
https://github.com/wastl/disambiguation

## Well-Written Simplified Lesk Algorithm (Wordnet)
https://github.com/dropofwill/word-sense-disambiguation/blob/master/wordnet_lesk.py  
COMPARE WITH PyWSD

## Cross-Language Experiments
https://github.com/alexrudnick/chipa