# Evaluating Sentiment Changes in Historical Speeches

This notebook implements a complete NLP project that analyzes historical speeches for sentiment trends and topic modeling over time.

In [None]:
!pip install pandas matplotlib seaborn nltk vaderSentiment wordcloud plotly sklearn gensim streamlit bertopic --quiet


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from bertopic import BERTopic
import plotly.express as px

nltk.download('punkt')
sns.set(style="whitegrid")


In [None]:
df = pd.read_csv('historical_speeches.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(by='date')
df.head()


## Data Cleaning

In [None]:
df.dropna(inplace=True)
df['text'] = df['text'].str.replace(r"[^a-zA-Z0-9.,!?\s]", "", regex=True)
df.head()


## Exploratory Data Analysis

In [None]:
df['year'] = df['date'].dt.year
plt.figure(figsize=(12,6))
sns.countplot(data=df, x='year')
plt.xticks(rotation=45)
plt.title('Distribution of Speeches Over Time')
plt.show()


In [None]:
text = " ".join(df['text'].tolist())
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
plt.figure(figsize=(15, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Speech Texts')
plt.show()


## Sentiment Analysis

In [None]:
analyzer = SentimentIntensityAnalyzer()
df['sentiment'] = df['text'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
px.line(df, x='date', y='sentiment', title='Sentiment Trend Over Time')


## Topic Modeling - LDA

In [None]:
vectorizer = CountVectorizer(stop_words='english', max_df=0.95, min_df=2)
dtm = vectorizer.fit_transform(df['text'])
lda = LatentDirichletAllocation(n_components=5, random_state=42)
lda.fit(dtm)

for index, topic in enumerate(lda.components_):
    print(f"TOPIC #{index}")
    print([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-10:]])


## Topic Modeling - BERTopic

In [None]:
topic_model = BERTopic(verbose=False)
topics, _ = topic_model.fit_transform(df['text'].tolist())
topic_model.visualize_barchart(top_n_topics=5)


## Deployment Instructions with Streamlit

To deploy this project as a Streamlit app, create a `streamlit_app.py` file and copy relevant visualization code to create interactive plots. Run it with:
```bash
streamlit run streamlit_app.py
```