In [None]:
import nltk
import string
import nltk
from nltk.corpus import stopwords
from nltk import tokenize
from nltk.stem import WordNetLemmatizer
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

from nltk.sentiment import SentimentIntensityAnalyzer
from article_retriever import *

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()

def preprocess_text(text):
    text = text.lower()
    sentences = tokenize.sent_tokenize(text)
    sentences = [preprocess_sentence(sentence) for sentence in sentences]
    return sentences
    # return '. '.join(sentences)


def preprocess_sentence(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = tokenize.word_tokenize(text.lower())

    # Remove stop words
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]

    # Lemmatize words
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]

    # Join words back into a string
    preprocessed_text = ' '.join(words)

    return preprocessed_text

def get_vader_polarity(raw_text):
    processed_text = preprocess_text(raw_text)
    score = sia.polarity_scores('. '.join(processed_text))
    return score['compound']


In [None]:
topic_url = "https://www.bbc.co.uk/news/topics/cwlw3xz01lxt" # transgender people
topic_url = "https://www.bbc.co.uk/news/topics/cx2pk70323et" # upifting
topic_url = "https://www.thepinknews.com/identity/trans"

url_list = get_articles_for_topic(topic_url)
print(url_list)

full_polarity_list = []
for url in url_list:
    [header, my_text] = get_info_from_article(url)
    my_polarity = get_vader_polarity(my_text)
    full_polarity_list.append(my_polarity)
    print(header)
    print(my_polarity)

print("Average polarity: ")
print(np.mean(full_polarity_list))
# TODO: consider wordclouds

In [None]:
%matplotlib inline
plt.rcParams.update({'figure.figsize':(7,5), 'figure.dpi':100})

# Plot Histogram on x
x = full_polarity_list
plt.hist(x, bins=50)
plt.gca().set(title='Frequency Histogram', ylabel='Frequency');

In [None]:
my_list = full_polarity_list
n = int(len(my_list) ** 0.5)
my_matrix = [my_list[i:i+n] for i in range(0, len(my_list), n)]
# create the heatmap using seaborn
sns.heatmap(my_matrix, cmap='coolwarm')
plt.show()

In [None]:
pink_url = "https://www.thepinknews.com/2023/03/10/coming-out-trans-non-binary-later-life-suzie-eddie-izzard/"
text = get_info_from_article(pink_url)
print(text)

In [None]:
processed_text = preprocess_text(text)
print(processed_text)

In [None]:
score = sia.polarity_scores('. '.join(processed_text))
polarity = score['compound']
print(polarity)