In [None]:
!pip install nltk pandas spacy  gensim

In [None]:
!pip install sklearn

In [2]:
import nltk

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [5]:
import pandas as pd
import spacy
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import re


# Load Spacy English model
nlp = spacy.load("en_core_web_sm")

# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

def clean_text(text):
    """
    Clean the text by removing unnecessary characters and converting to lowercase.
    """
    # text = re.sub(r'[^a-zA-Z\s]', '', text)
    # text = text.lower().strip()
    # return text

    # Convert text to lowercase
    text = text.lower()
    # Replace common contractions
    text = re.sub(r"what's", "what is ", text)
    text = text.replace('(ap)', '')
    text = re.sub(r"\'s", " is ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    # Remove non-alphanumeric characters except for '?', '!', and ' '
    text = re.sub(r'[^a-zA-Z?! ]+', '', text)
    # Remove non-ASCII characters
    text = re.sub(r'[^\x00-\x7F]+', '', text)
    # Remove extra spaces and newlines
    text = re.sub('\s+', ' ', text)
    return text





def clean_article(article):
    """
    Process article texts by cleaning, removing stop words, and applying lemmatization.
    """
    doc = nlp(clean_text(article))
    lemmatized_tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return ' '.join(lemmatized_tokens)

def analyze_mood(article):
    """
    Determine the mood of the article using VADER.
    """
    score = sid.polarity_scores(article)['compound']
    return "Positive" if score >= 0.05 else "Negative" if score <= -0.05 else "Neutral"




def aspect_sentiment_analysis(article):
    """
    Extracts aspects from the article and evaluates their sentiment,
    ensuring both positive and negative sentiments are considered.
    """
    doc = nlp(article)
    aspects = {}
    for chunk in doc.noun_chunks:
        # Extract the root word of the chunk as the aspect
        aspect_text = ' '.join([token.lemma_ for token in chunk])
        # Use VADER to determine sentiment of the whole chunk
        sentiment_score = sid.polarity_scores(chunk.text)['compound']
        if sentiment_score >= 0.05:
            sentiment = "Positive"
        elif sentiment_score <= -0.05:
            sentiment = "Negative"
        else:
            sentiment = "Neutral"
        aspects[aspect_text] = sentiment
    return aspects






def find_connections(article, n_keywords=3):
    """
    Identify key themes within an article using TF-IDF for keyword extraction.
    """
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([article])
    feature_names = np.array(vectorizer.get_feature_names_out())
    sorted_indices = np.argsort(tfidf_matrix.toarray()).flatten()[::-1]
    top_keywords = feature_names[sorted_indices][:n_keywords]
    return ', '.join(top_keywords)


def main():
    """
    Perform analysis on the input article provided by the user and display the results.
    """
    # Take article input from the user
    article = input("Please input the article text: ")

    # Perform analysis
    clean_textt = clean_text(article)
    cleaned_article = clean_article(clean_textt)
    mood = analyze_mood(cleaned_article)
    aspect_analysis = aspect_sentiment_analysis(cleaned_article)
    themes = find_connections(cleaned_article)

    # Display results
    print("Cleaned Article:")
    print(cleaned_article)
    print("\nMood Rating:", mood)
    print("\nAspect Analysis:")
    for aspect, sentiment in aspect_analysis.items():
        print(f"{aspect}: {sentiment}")
    print("\nThemes:", themes)

# Call the main function
main()




Please input the article text: Whether you dip it, drizzle it, or put it on chicken nuggets or french fries, McDonald’s sauce assortment is expanding this fall.  The fast food chain announced Tuesday that it's adding two new limited-edition sauces to its U.S. menu in two weeks.  The new sauces — McDonald’s Sweet & Spicy Jam and McDonald’s MamboSauce — will hit the menu on Oct. 9, according to a news release from the company.  Are whoppers really too small?Burger King must face whopper of a lawsuit alleging burgers are too small, says judge  What are McDonald's two new sauces?  Here's more about the new limited-edition duo: • Sweet & Spicy Jam: The chain describes it as a breakfast-inspired "jammy red pepper dipping sauce with a tongue-numbing Szechuan peppercorn kick." It also includes apple cider vinegar and cayenne pepper. • MamboSauce: This one is described by McD's as "a tomato-based, sweet, spicy and vinegary sauce, inspired by an Washington, D.C. area sauce staple."  The company 