In [1]:
 # Import necessary libraries
import nltk
# Download the 'punkt_tab' resource
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from collections import Counter
from nltk.sentiment import SentimentIntensityAnalyzer

# Download necessary resources of NLTK
nltk.download('punkt') # Tokenization
nltk.download('stopwords') # Stopwords
nltk.download('vader_lexicon') # Sentiment Analysis

 # Sample text
text = """
O Romeo, Romeo! wherefore art thou Romeo?
Deny thy father and refuse thy name;
Or, if thou wilt not, be but sworn my love,
And I'll no longer be a Capulet.
"""

 # 1. Tokenization: Divide text in words and sentences
words = word_tokenize(text)
sentences = sent_tokenize(text)

print("Tokenizacion:")
print("Palabras:", words)
print("Sentencias:", sentences)
print()

 # 2. Remove empty words (stopwords)
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words and word.isalnum()]

print("Palabras sin stopwords:", filtered_words)
print()

 # 3. Frequency of words
word_freq = Counter(filtered_words)
print("Frecuencia de palabras:")
print(word_freq)
print()

# 4. Sentiment Analysis
sia =  SentimentIntensityAnalyzer()
sentiment = sia.polarity_scores(text)
print("Analisis de sentimiento:")
print(sentiment)
print()

# 5. Language complexity: Average number of words per sentence
avg_words_per_sentence = sum(len(word_tokenize(sent)) for sent in sentences) / len(sentences)
print(f"Complejidad del lenguaje: {avg_words_per_sentence:.2f} palabras por oración")

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Tokenizacion:
Palabras: ['O', 'Romeo', ',', 'Romeo', '!', 'wherefore', 'art', 'thou', 'Romeo', '?', 'Deny', 'thy', 'father', 'and', 'refuse', 'thy', 'name', ';', 'Or', ',', 'if', 'thou', 'wilt', 'not', ',', 'be', 'but', 'sworn', 'my', 'love', ',', 'And', 'I', "'ll", 'no', 'longer', 'be', 'a', 'Capulet', '.']
Sentencias: ['\nO Romeo, Romeo!', 'wherefore art thou Romeo?', "Deny thy father and refuse thy name;\nOr, if thou wilt not, be but sworn my love,\nAnd I'll no longer be a Capulet."]

Palabras sin stopwords: ['Romeo', 'Romeo', 'wherefore', 'art', 'thou', 'Romeo', 'Deny', 'thy', 'father', 'refuse', 'thy', 'name', 'thou', 'wilt', 'sworn', 'love', 'longer', 'Capulet']

Frecuencia de palabras:
Counter({'Romeo': 3, 'thou': 2, 'thy': 2, 'wherefore': 1, 'art': 1, 'Deny': 1, 'father': 1, 'refuse': 1, 'name': 1, 'wilt': 1, 'sworn': 1, 'love': 1, 'longer': 1, 'Capulet': 1})

Analisis de sentimiento:
{'neg': 0.172, 'neu': 0.672, 'pos': 0.156, 'compound': 0.4574}

Complejidad del lenguaje: 13.3