In [None]:
import nltk
from nltk.corpus import gutenberg
from nltk.probability import FreqDist
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import matplotlib.pyplot as plt

# Download necessary NLTK data
nltk.download('gutenberg')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Read Moby Dick file from Gutenberg dataset
moby_dick = gutenberg.raw('melville-moby_dick.txt')

# Tokenization
tokens = word_tokenize(moby_dick)

# Stop-words filtering
stop_words = set(stopwords.words('english'))
filtered_tokens = [token for token in tokens if token.lower() not in stop_words]

# Parts-of-Speech (POS) tagging
pos_tags = nltk.pos_tag(filtered_tokens)

# POS frequency
pos_counts = FreqDist(tag for (word, tag) in pos_tags)
top_pos = pos_counts.most_common(5)

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(token, pos=tag[0].lower()) for token, tag in pos_tags[:20]]

# Plotting frequency distribution
pos_values = [count for tag, count in top_pos]
pos_labels = [tag for tag, count in top_pos]

plt.bar(pos_labels, pos_values)
plt.xlabel('Parts of Speech')
plt.ylabel('Frequency')
plt.title('POS Frequency Distribution')
plt.show()

# Optional: Sentiment Analysis
# Perform sentiment analysis on the Moby Dick text

# Define positive and negative words
positive_words = ['good', 'great', 'excellent']
negative_words = ['bad', 'poor', 'terrible']

# Calculate sentiment score
sentiment_score = 0
for word in filtered_tokens:
    if word.lower() in positive_words:
        sentiment_score += 1
    elif word.lower() in negative_words:
        sentiment_score -= 1

# Calculate average sentiment score
average_sentiment_score = sentiment_score / len(filtered_tokens)

# Determine overall text sentiment
if average_sentiment_score > 0.05:
    overall_sentiment = 'positive'
else:
    overall_sentiment = 'negative'

# Display sentiment analysis results
print(f"Average Sentiment Score: {average_sentiment_score}")
print(f"Overall Text Sentiment: {overall_sentiment}")

# Commit your exercise solution to your GitHub account and provide the URL in the submission box

: 