# NLTK sample Model creation 
- Preprocessing Text
- Extract Features 
- Train Classification
- Evolution the classification


referencer : https://github.com/LinkedInLearning/ai-projects-with-python-tensorflow-and-nltk-4512163/tree/main

In [1]:
# Importing necessary libraries
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

# Download the vader_lexicon package
nltk.download('vader_lexicon')

def analyze_sentiment(text):
    # Initialize the VADER sentiment intensity analyzer
    sia = SentimentIntensityAnalyzer()

    # Compute and print the sentiment scores
    sentiment = sia.polarity_scores(text)
    print(sentiment)

# Test the function with a sample text
analyze_sentiment("NLTK is a great library for Natural Language Processing!")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Allur\AppData\Roaming\nltk_data...


{'neg': 0.0, 'neu': 0.465, 'pos': 0.535, 'compound': 0.784}


#  Custom processing  Movie review 

In [2]:
import nltk
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
import random
nltk.download('movie_reviews')

# Prepare the dataset
documents = [(list(movie_reviews.words(fileid)), category)
              for category in movie_reviews.categories()
              for fileid in movie_reviews.fileids(category)]

# Shuffle the documents
random.shuffle(documents)

# Define the feature extractor
all_words = nltk.FreqDist(w.lower() for w in movie_reviews.words())
word_features = list(all_words)[:2000]

def document_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features['contains({})'.format(word)] = (word in document_words)
    return features

# Train the classifier
featuresets = [(document_features(d), c) for (d,c) in documents]
train_set, test_set = featuresets[100:], featuresets[:100]
classifier = NaiveBayesClassifier.train(train_set)

# Test the classifier
print(accuracy(classifier, test_set))

# Show the most important features
classifier.show_most_informative_features(5)

[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Allur\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\movie_reviews.zip.


0.82
Most Informative Features
   contains(outstanding) = True              pos : neg    =     11.2 : 1.0
         contains(mulan) = True              pos : neg    =      8.2 : 1.0
        contains(seagal) = True              neg : pos    =      7.9 : 1.0
   contains(wonderfully) = True              pos : neg    =      7.4 : 1.0
         contains(damon) = True              pos : neg    =      6.3 : 1.0
