In [None]:
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')

In [None]:
from modules.article_retriever import get_articles_for_topic

def get_article_urls(source, topic, number_of_pages):
    url_list = []
    for i in range(number_of_pages):
        if i == 0 :
            suffix = ''
        else:
            suffix = f'?page={i}'
        url_list = url_list + get_articles_for_topic(source, topic, suffix)
    return url_list


In [None]:
from common.enum import NewsSource, Topic

topic = Topic.UKRAINE
source = NewsSource.PINK
number_of_pages = 1

url_list = get_article_urls(source, topic, number_of_pages)
article_count = len(url_list)
print(article_count, 'article(s) found')

In [None]:
from modules.article_retriever import extract_article_info
from modules.preprocessing import preprocess_text
from modules.sentiment_analysis import get_polarity, SentimentMethod

article_sentiments = []
for i, url in enumerate(url_list):
    [header, article_text] = extract_article_info(url, source)
    if not article_text:
        print(f"No text in article {i+1} ({url})")
        continue
    article_polarity = get_polarity(preprocess_text(article_text), SentimentMethod.VADER)
    article_sentiments.append({'header': header, 'polarity': article_polarity, 'url': url})
    print(f"Article {i+1}/{article_count} processed")


In [None]:
from modules.plot_methods import create_dataframe

sentiment_df = create_dataframe(article_sentiments)
sentiment_df.name = topic.value.replace('_', ' ').title()
print(sentiment_df[['header', 'polarity']])

In [None]:
from modules.plot_methods import display_histogram

display_histogram(sentiment_df)


In [None]:
from modules.plot_methods import display_heatmap

display_heatmap(sentiment_df)