In [12]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.collocations import BigramCollocationFinder, BigramAssocMeasures

In [13]:
# Sample text corpus containing sentences with the word "innovate"
corpus = [
    "Companies innovate to stay ahead in the market.",
    "Innovation is crucial for economic growth.",
    "We need to innovate our business processes to improve efficiency."
]

In [14]:
# Tokenization
tokens = [word_tokenize(sentence.lower()) for sentence in corpus]

In [15]:
# Filtering
stop_words = set(stopwords.words('english'))
filtered_tokens = [[word for word in sentence if word.isalnum() and word not in stop_words] for sentence in tokens]

In [8]:
# Collocation identification
finder = BigramCollocationFinder.from_documents(filtered_tokens)
bigram_measures = BigramAssocMeasures()
collocations = finder.nbest(bigram_measures.pmi, 5)

print("Collocations with 'innovate':", collocations)

Collocations with 'innovate': [('ahead', 'market'), ('business', 'processes'), ('crucial', 'economic'), ('economic', 'growth'), ('improve', 'efficiency')]


In [9]:
# Frequency analysis
word_freq = nltk.FreqDist()
for sentence in tokens:
    word_freq.update(sentence)

print("Word frequencies:", word_freq.most_common())

Word frequencies: [('to', 3), ('.', 3), ('innovate', 2), ('companies', 1), ('stay', 1), ('ahead', 1), ('in', 1), ('the', 1), ('market', 1), ('innovation', 1), ('is', 1), ('crucial', 1), ('for', 1), ('economic', 1), ('growth', 1), ('we', 1), ('need', 1), ('our', 1), ('business', 1), ('processes', 1), ('improve', 1), ('efficiency', 1)]


In [10]:
# Contextual analysis
context_window = 3  # Number of words before and after the target word for context analysis

for sentence in tokens:
    for i, word in enumerate(sentence):
        if word == 'innovate':  # Target word
            start_index = max(0, i - context_window)
            end_index = min(len(sentence), i + context_window + 1)
            context = sentence[start_index:i] + sentence[i+1:end_index]
            print("Context for 'innovate':", context)

Context for 'innovate': ['companies', 'to', 'stay', 'ahead']
Context for 'innovate': ['we', 'need', 'to', 'our', 'business', 'processes']
