In [8]:
import nltk
from nltk.corpus import wordnet
from nltk.stem import PorterStemmer
from nltk import word_tokenize, pos_tag
from nltk.wsd import lesk

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return 'a'
    elif treebank_tag.startswith('V'):
        return 'v'
    elif treebank_tag.startswith('N'):
        return 'n'
    elif treebank_tag.startswith('R'):
        return 'r'
    else:
        return None

def query_expansion_wordnet(query):
    words = word_tokenize(query)
    pos_tags = pos_tag(words)
    stemmer = PorterStemmer()

    expanded_queries = []
    for word, pos in zip(words, pos_tags):
        pos = get_wordnet_pos(pos[1])
        synsets = wordnet.synsets(word, pos=pos)
        if synsets:
            correct_synset = lesk(words, word, pos=pos)
            if correct_synset:
                max_similarity = 0
                most_similar_synset = None
                for synset in synsets:
                    similarity = correct_synset.path_similarity(synset)
                    if similarity and similarity > max_similarity:
                        max_similarity = similarity
                        most_similar_synset = synset
                if most_similar_synset:
                    expanded_queries.extend(most_similar_synset.lemma_names())  # Extend the list
            else:
                expanded_queries.append(word)
        else:
            expanded_queries.append(word)

    # Combine expanded terms into a refined query
    refined_query = ' '.join(expanded_queries)
    return refined_query

# Example usage
input_query = "machine learning"
refined_query = query_expansion_wordnet(input_query)
print("Refined query:", refined_query)


Refined query: machine political_machine learning acquisition
