In [4]:
pip install fpdf2

Collecting fpdf2
  Downloading fpdf2-2.8.3-py2.py3-none-any.whl.metadata (69 kB)
Downloading fpdf2-2.8.3-py2.py3-none-any.whl (245 kB)
Installing collected packages: fpdf2
Successfully installed fpdf2-2.8.3
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
from datetime import datetime
from docx import Document
from docx.shared import Inches
from fpdf import FPDF
import os

# Load the tweets
df = pd.read_csv("tweets_output.csv")
df['created_at'] = pd.to_datetime(df['created_at']).dt.tz_localize(None)

# Sentiment analysis
def get_sentiment(text):
    blob = TextBlob(str(text))
    return blob.sentiment.polarity

df['sentiment_score'] = df['text'].apply(get_sentiment)

def categorize(score):
    if score > 0.1:
        return 'positive'
    elif score < -0.1:
        return 'negative'
    else:
        return 'neutral'

df['sentiment_category'] = df['sentiment_score'].apply(categorize)

# Save analyzed data
df.to_csv("analyzed_tweets.csv", index=False)
df.to_excel("analyzed_tweets.xlsx", index=False)

# Plot 1: Sentiment distribution
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x='sentiment_category', hue='sentiment_category', palette='Set2', legend=False)
plt.title("Tweet Sentiment Distribution")
plt.xlabel("Sentiment")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig("fig_sentiment_distribution.png")
plt.close()

# Plot 2: Average sentiment over time
df['date'] = pd.to_datetime(df['created_at']).dt.date
daily_sentiment = df.groupby('date')['sentiment_score'].mean().reset_index()

plt.figure(figsize=(8, 4))
sns.lineplot(data=daily_sentiment, x='date', y='sentiment_score', marker='o')
plt.title("Average Sentiment Score Over Time")
plt.xlabel("Date")
plt.ylabel("Average Sentiment")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("fig_sentiment_over_time.png")
plt.close()

# Create Word report
doc = Document()
doc.add_heading('Polio Vaccine Tweet Analysis Report', 0)

doc.add_heading('1. Overview', level=1)
doc.add_paragraph('This report analyzes sentiment in tweets related to polio vaccines in Kenya.')

doc.add_heading('2. Sentiment Distribution', level=1)
doc.add_picture('fig_sentiment_distribution.png', width=Inches(5.5))
doc.add_paragraph('This chart shows the distribution of positive, neutral, and negative tweets.')

doc.add_heading('3. Sentiment Over Time', level=1)
doc.add_picture('fig_sentiment_over_time.png', width=Inches(5.5))
doc.add_paragraph('This line chart illustrates how average sentiment changes over time.')

doc.add_heading('4. Sample Tweets', level=1)
for i, row in df.head(5).iterrows():
    doc.add_paragraph(f"- {row['text'][:150]}... ({row['sentiment_category']})")

doc.save("Tweet_Analysis_Report.docx")

# Create PDF version using FPDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="Polio Vaccine Tweet Analysis Report", ln=True, align='C')

pdf.ln(10)
pdf.set_font("Arial", size=10)
pdf.multi_cell(0, 10, txt="This report analyzes sentiment in tweets related to polio vaccines in Kenya.")

pdf.ln(10)
pdf.cell(0, 10, "Sentiment Distribution", ln=True)
pdf.image("fig_sentiment_distribution.png", w=180)

pdf.ln(10)
pdf.cell(0, 10, "Average Sentiment Over Time", ln=True)
pdf.image("fig_sentiment_over_time.png", w=180)

pdf.ln(10)
pdf.cell(0, 10, "Sample Tweets", ln=True)
for i, row in df.head(5).iterrows():
    pdf.multi_cell(0, 8, txt=f"- {row['text'][:150]}... ({row['sentiment_category']})")

pdf.output("Tweet_Analysis_Report.pdf")

print("🎉 All exports completed:")
print("- analyzed_tweets.csv / .xlsx")
print("- sentiment charts (.png)")
print("- Word: Tweet_Analysis_Report.docx")
print("- PDF: Tweet_Analysis_Report.pdf")


UnicodeEncodeError: 'latin-1' codec can't encode character '\u2026' in position 674: ordinal not in range(256)

In [5]:
pip install pandas matplotlib seaborn textblob openpyxl python-docx fpdf
python -m textblob.download_corpora


SyntaxError: invalid syntax (1633507366.py, line 1)

In [6]:
!pip install pandas matplotlib seaborn textblob openpyxl python-docx




In [7]:
!python -m textblob.download_corpora


Finished.


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_da

In [9]:
# Imports
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
from docx import Document
from docx.shared import Inches
from fpdf import FPDF
import os

# Load tweets
df = pd.read_csv("tweets.csv")  # Change this if your file is named differently

# Ensure datetime is timezone-naive if needed
if 'date' in df.columns and pd.api.types.is_datetime64_any_dtype(df['date']):
    df['date'] = pd.to_datetime(df['date']).dt.tz_localize(None)

# Clean and analyze text
def clean_text(text):
    return str(text).strip().replace('\n', ' ')

def get_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    return polarity

df['text'] = df['text'].apply(clean_text)
df['sentiment_score'] = df['text'].apply(get_sentiment)
df['sentiment_category'] = df['sentiment_score'].apply(lambda x: 'positive' if x > 0.1 else 'negative' if x < -0.1 else 'neutral')

# Export as CSV and Excel
df.to_csv("analyzed_tweets.csv", index=False)
df.to_excel("analyzed_tweets.xlsx", index=False)

# Sentiment distribution plot
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x='sentiment_category', palette='Set2')
plt.title("Sentiment Distribution")
plt.xlabel("Sentiment")
plt.ylabel("Tweet Count")
plt.tight_layout()
plt.savefig("sentiment_distribution.png")
plt.close()

# Sentiment score histogram
plt.figure(figsize=(6, 4))
sns.histplot(df['sentiment_score'], bins=30, kde=True)
plt.title("Sentiment Score Histogram")
plt.xlabel("Sentiment Score")
plt.ylabel("Density")
plt.tight_layout()
plt.savefig("sentiment_histogram.png")
plt.close()

# Export Word report
doc = Document()
doc.add_heading("Tweet Sentiment Analysis Report", 0)
doc.add_paragraph("This report summarizes the sentiment analysis of tweets.")

# Summary stats
doc.add_heading("Summary", level=1)
sentiment_counts = df['sentiment_category'].value_counts().to_dict()
for k, v in sentiment_counts.items():
    doc.add_paragraph(f"{k.title()}: {v} tweets")

# Insert plots
doc.add_heading("Visualizations", level=1)
doc.add_picture("sentiment_distribution.png", width=Inches(5))
doc.add_picture("sentiment_histogram.png", width=Inches(5))

# Add sample tweets
doc.add_heading("Sample Tweets", level=1)
for i, row in df.head(5).iterrows():
    doc.add_paragraph(f"- {row['text'][:150]}... ({row['sentiment_category']})")

doc.save("Tweet_Analysis_Report.docx")

# Export PDF report
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, txt="Tweet Sentiment Analysis Report", align='C')

pdf.set_font("Arial", size=10)
pdf.ln(10)
pdf.multi_cell(0, 8, txt="Summary:\n", align='L')
for k, v in sentiment_counts.items():
    pdf.multi_cell(0, 8, txt=f"{k.title()}: {v} tweets")

pdf.ln(5)
pdf.multi_cell(0, 8, txt="Sample Tweets:\n", align='L')
for i, row in df.head(5).iterrows():
    tweet_text = row['text'][:150].encode('latin-1', 'replace').decode('latin-1')
    pdf.multi_cell(0, 8, txt=f"- {tweet_text}... ({row['sentiment_category']})")

pdf.output("Tweet_Analysis_Report.pdf")

print("🎉 Export complete:")
print("- analyzed_tweets.csv")
print("- analyzed_tweets.xlsx")
print("- sentiment_distribution.png")
print("- sentiment_histogram.png")
print("- Tweet_Analysis_Report.docx")
print("- Tweet_Analysis_Report.pdf")


FileNotFoundError: [Errno 2] No such file or directory: 'tweets.csv'