In [None]:
import re
import csv
import nltk
import pandas as panda
from bs4 import BeautifulSoup
from mastodon import Mastodon
from dotenv import dotenv_values
from nltk.sentiment import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')

env = dotenv_values()

# Create an instance of the Mastodon class and authenticate
mastodon = Mastodon(
    access_token=env['ACCESS_TOKEN'],
    api_base_url='https://mastodon.uno'
)

# Get the home timeline posts
timeline = mastodon.timeline_home()

# Create an instance of the SentimentIntensityAnalyzer class
analyzer = SentimentIntensityAnalyzer()

# Create a list to store the data for CSV
csv_data = []

# Loop through the timeline statuses
for status in timeline:
    content = status['content']
    author = status['account']['username']
    timestamp = status['created_at']

    # Use BeautifulSoup to remove HTML tags from content
    soup = BeautifulSoup(content, 'html.parser')
    filtered_content = soup.get_text()

    # Remove URLs from filtered_content
    filtered_content = re.sub(r'http\S+|www\S+', '', filtered_content)

    # Remove only #
    filtered_content = re.sub(r'#', '', filtered_content).lower()

    # Perform sentiment analysis on the filtered content
    sentiment_scores = analyzer.polarity_scores(filtered_content)
    sentiment_score = sentiment_scores['compound']

    # Classify sentiment based on the sentiment score
    if sentiment_score >= 0.05:
        sentiment_label = 'Positive'
    elif sentiment_score <= -0.05:
        sentiment_label = 'Negative'
    else:
        sentiment_label = 'Neutral'

    # Add the data to csv_data as a list
    csv_data.append([filtered_content, sentiment_label])

    # Print the post information and sentiment analysis result
    print(f"Content: {filtered_content}")
    print(f"Sentiment: {sentiment_label} ({sentiment_score})")
    print("---")

# with open('dataset.csv', 'r') as file:
#     # Read the existing data from the CSV file
#     existing_data = [row for row in csv.reader(file)]

# # Compare the new data with the existing data
# data_to_append = [row for row in filtered_content if row not in existing_data]

# Generate csv
with open('dataset.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Content', 'Sentiment'])  # Write the column headers
    writer.writerows(csv_data)