In [None]:
# ################################################# DATA VISUALIZATION ################################################# 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from textblob import TextBlob

# read in the dataframe from the csv file - data/cleaned_responses.csv
df = pd.read_csv("data/cleaned_responses.csv")

# Combine all of the cleaned responses into a single string
all_content = " ".join(df["content"])

# save all the content to a file
with open("data/all_web_content.txt", "w") as file: file.write(all_content)

## Word Mapping

In [None]:

# create a word map from just the keywords 
text = " ".join(df["keywords"]) 
wordcloud = WordCloud().generate(text)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

# Create a word map from all of the content 
text = " ".join(df["keywords"])
wordcloud = WordCloud().generate(text)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

# Word Plotting

In [None]:
# count the frequency of each keyword in the dataframe
keyword_freq = df['keywords'].explode().value_counts()

# Plot the top 20 keywords as a bar chart 
plt.figure(figsize=(10, 6)) 
sns.barplot(x=keyword_freq[:20], y=keyword_freq[:20].index) 
plt.xlabel('Frequency') 
plt.ylabel('Keyword') 
plt.title('Most Frequent Keywords in Articles') 
plt.show()


## Sentiment Analysis

In [None]:
# A sentiment analysis of the content of each article
def get_sentiment(text): 
    # Create a TextBlob object from the text blob = TextBlob(text) 
    # # Return the polarity and subjectivity as a tuple return (blob.sentiment.polarity, blob.sentiment.subjectivity)
    
# Apply the function to each row of the dataframe and assign the results to two new columns
df[['polarity', 'subjectivity']] = df['content'].apply(get_sentiment).apply(pd.Series)

# Plot the polarity and subjectivity of each article as a scatter plot
plt.figure(figsize=(10, 6)) 
sns.scatterplot(x='polarity', y='subjectivity', data=df) 
plt.xlabel('Polarity') 
plt.ylabel('Subjectivity') 
plt.title('Sentiment Analysis of Articles') 