In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

In [None]:
document = "Natural Language Processing (NLP) is a subfield of Artificial Intelligence (AI) that enables machines to understand and process human language."

In [None]:
tokens = document.split()
print("Tokens:", tokens)

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(document)
pos_tags_spacy = [(token.text, token.pos_) for token in doc]
print("POS Tags using spaCy:", pos_tags_spacy)

In [None]:
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print("Tokens after Stop Words Removal:", filtered_tokens)

In [None]:
stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
print("Tokens after Stemming:", stemmed_tokens)

In [None]:
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
print("Tokens after Lemmatization:", lemmatized_tokens)

In [None]:
corpus = [
    "Natural Language Processing (NLP) is a subfield of Artificial Intelligence (AI) that enables machines to understand and process human language.",
    "It includes tasks like text analysis, sentiment analysis, and machine translation."
]

In [None]:
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(corpus)
feature_names = vectorizer.get_feature_names_out()
import pandas as pd
df_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)
print(df_tfidf)