In [1]:
from collections import deque
import pickle
import os
from datetime import datetime, timedelta
from supabase import create_client
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
SUPABASE_URL = os.getenv('SUPABASE_URL')
SUPABASE_KEY = os.getenv('API_KEY')

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

In [15]:
DATA_DIR = "../data"
domain_files = ["sports", "lifestyle", "technology", "business", "bollywood"]

In [16]:
# Load the queue
def load_queue(domain):
    QUEUE_FILE = os.path.join(DATA_DIR, f"{domain}_queue.pkl")
    if os.path.exists(QUEUE_FILE):
        with open(QUEUE_FILE, 'rb') as f:
            return deque(pickle.load(f))  # Convert list back to deque
    return deque()  # Return an empty deque if file doesn't exist

# Save the updated queue
def save_queue(queue, domain):
    QUEUE_FILE = os.path.join(DATA_DIR, f"{domain}_queue.pkl")
    with open(QUEUE_FILE, 'wb') as f:
        pickle.dump(list(queue), f)  # Convert deque to list for saving
    print(f"Queue saved to {QUEUE_FILE}.")

In [26]:
# Delete outdated records
def delete_outdated_records():
    current_time = datetime.now()

    for domain in domain_files:
        queue = load_queue(domain)
        print(len(queue))
        updated_queue = deque()

        while queue:
            record_uuid = queue.popleft()  # Get the top element from the queue

            # Retrieve the record's timestamp from Supabase
            try:
                response = supabase.table("NewsSentiment").select("date_time").eq("uuid", record_uuid).execute()
                if not response.data:
                    print(f"Record UUID {record_uuid} not found in database, skipping.")
                    continue
                
                record_time = datetime.strptime(response.data[0]['date_time'], "%Y-%m-%dT%H:%M:%S")  # Updated format
                time_difference = current_time - record_time
                # print(f"record time = {record_time}")
                # print(f"time difference = {time_difference}")
                # Check if the record is older than 24 hours
                if time_difference > timedelta(hours=24):
                    # Delete the record from the database
                    supabase.table("NewsSentiment").delete().eq("uuid", record_uuid).execute()
                    print(f"Record UUID {record_uuid} deleted from database.")

                else:
                    # If not outdated, add back to the queue
                    updated_queue.append(record_uuid)

            except Exception as e:
                print(f"Error processing record UUID {record_uuid}: {e}")
                

        
        # Save the updated queue
        save_queue(updated_queue, domain)

    print("Outdated record deletion process completed.")

In [27]:
delete_outdated_records()

22
Queue saved to ../data\sports_queue.pkl.
21
Record UUID 41aa1cfe-fd5d-4383-a9d8-f033c715378d deleted from database.
Record UUID ddd578cb-b0ca-4ede-bdda-94bb703254c5 deleted from database.
Record UUID b0caa698-d3e0-4776-b38f-21397ea350e9 deleted from database.
Record UUID e48109b9-f1c5-44da-8caa-48ce8758425b deleted from database.
Record UUID 20a9f1a6-d8d9-4c2b-9d70-e1cd462e7bad deleted from database.
Record UUID 874b9574-01f7-4220-b40c-9378cffafc98 deleted from database.
Record UUID 6e88181a-a4f1-489b-b784-5bab78310d6c deleted from database.
Record UUID c73c572e-6b66-4ae0-80b4-0e0e6dce34f0 deleted from database.
Record UUID d3e90dbf-5388-4975-a1d7-a82da2007642 deleted from database.
Record UUID 3f1f1c3d-7381-41b3-88c9-3babf366adf2 deleted from database.
Record UUID b76d1c02-6571-4d6c-aeb7-581bca05cfc7 deleted from database.
Record UUID 2a50a477-acf0-47df-8b8d-374d6ae6c5ff deleted from database.
Record UUID b624c5bb-ff43-42a3-8a6a-65c8bc07322d deleted from database.
Record UUID 4efb2