<a href="https://colab.research.google.com/github/arjunanpawin/m_l/blob/main/movie_review_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from textblob import TextBlob  # For spelling correction


nltk.download('punkt')
nltk.download('stopwords')
nltk.download('brown')


positive_words = ['good', 'great', 'excellent', 'wonderful', 'awesome']
negative_words = ['bad', 'poor', 'terrible', 'awful', 'horrible']
def analyze_sentiment(review):
    tokens = word_tokenize(review.lower())
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    porter = PorterStemmer()
    stemmed_tokens = [porter.stem(word) for word in filtered_tokens]
    # Count positive and negative words
    positive_count = sum(stemmed_tokens.count(pos_word) for pos_word in positive_words)
    negative_count = sum(stemmed_tokens.count(neg_word) for neg_word in negative_words)

    if positive_count > negative_count:
        return 'Positive'
    elif negative_count > positive_count:
        return 'Negative'
    else:
        return 'Neutral'

# Function to extract keywords from a single review
def extract_keywords(review):
    blob = TextBlob(review)
    return blob.noun_phrases  # Extract noun phrases as keywords

# Sample movie reviews
reviews = [
    "I absolutly loved this movie! The actng was superb and the storyline was captivating.",
    "The movie was terible. The plot was weak and the acting was mediocre at best.",
    "It was an okay movie. Nothing special, but it wasn't terrible either.",
    "I was pleasantly surprised by how good this movie was. Definitely worth watching."
]

# Lists to store sentiment results
sentiments = []
positive_counts = 0
negative_counts = 0

# Analyze sentiment and extract keywords of each review
for i, review in enumerate(reviews):
    # Correct spelling
    review_blob = TextBlob(review)
    review_corrected = str(review_blob.correct())

    # Analyze sentiment
    sentiment = analyze_sentiment(review_corrected)
    sentiments.append(sentiment)

    # Count positive and negative reviews
    if sentiment == 'Positive':
        positive_counts += 1
    elif sentiment == 'Negative':
        negative_counts += 1

    # Extract keywords
    keywords = extract_keywords(review_corrected)

    print(f"Review {i+1}:")
    print(f"Corrected Review: {review_corrected}")
    print(f"Sentiment: {sentiment}")
    print(f"Keywords: {keywords}")
    print()


# Calculate overall insights
total_reviews = len(reviews)
positive_proportion = positive_counts / total_reviews
negative_proportion = negative_counts / total_reviews

# Generate insights paragraph
insights_paragraph = f"In the sample of {total_reviews} movie reviews, the sentiment analysis revealed that "
insights_paragraph += f"{positive_counts} reviews were classified as positive ({positive_proportion*100:.2f}%), "
insights_paragraph += f"while {negative_counts} reviews were classified as negative ({negative_proportion*100:.2f}%). "
insights_paragraph += "The keywords extracted from the reviews provide insights into the main themes and topics discussed."

print("Overall Insights:\n")
print(insights_paragraph)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.


Review 1:
Corrected Review: I absolutely loved this movie! The acting was superb and the storyline was captivating.
Sentiment: Neutral
Keywords: []

Review 2:
Corrected Review: The movie was terrible. The plot was weak and the acting was mediocre at best.
Sentiment: Neutral
Keywords: []

Review 3:
Corrected Review: It was an okay movie. Nothing special, but it wasn't terrible either.
Sentiment: Neutral
Keywords: ['okay movie', "n't terrible"]

Review 4:
Corrected Review: I was pleasantly surprised by how good this movie was. Definitely worth watching.
Sentiment: Positive
Keywords: ['definitely']

Overall Insights:

In the sample of 4 movie reviews, the sentiment analysis revealed that 1 reviews were classified as positive (25.00%), while 0 reviews were classified as negative (0.00%). The keywords extracted from the reviews provide insights into the main themes and topics discussed.
