In [1]:
sample_text = "Both Talmuds are arranged according to the six orders of the Mishnah, but the discussion of the Mishnic text often wanders off into widely different topics."

# Importing Libraries
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

# Download required datasets
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')

# Tokenization
words = word_tokenize(sample_text)
sentences = sent_tokenize(sample_text)

# Stopword Removal
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]

# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_words]

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]

# Output Results
print("Original Text:", sample_text)
print("\nTokenized Words:", words)
print("\nTokenized Sentences:", sentences)
print("\nFiltered Words (Stopword Removal):", filtered_words)
print("\nStemmed Words:", stemmed_words)
print("\nLemmatized Words:", lemmatized_words)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Original Text: Both Talmuds are arranged according to the six orders of the Mishnah, but the discussion of the Mishnic text often wanders off into widely different topics.

Tokenized Words: ['Both', 'Talmuds', 'are', 'arranged', 'according', 'to', 'the', 'six', 'orders', 'of', 'the', 'Mishnah', ',', 'but', 'the', 'discussion', 'of', 'the', 'Mishnic', 'text', 'often', 'wanders', 'off', 'into', 'widely', 'different', 'topics', '.']

Tokenized Sentences: ['Both Talmuds are arranged according to the six orders of the Mishnah, but the discussion of the Mishnic text often wanders off into widely different topics.']

Filtered Words (Stopword Removal): ['Talmuds', 'arranged', 'according', 'six', 'orders', 'Mishnah', ',', 'discussion', 'Mishnic', 'text', 'often', 'wanders', 'widely', 'different', 'topics', '.']

Stemmed Words: ['talmud', 'arrang', 'accord', 'six', 'order', 'mishnah', ',', 'discuss', 'mishnic', 'text', 'often', 'wander', 'wide', 'differ', 'topic', '.']

Lemmatized Words: ['Talmu