In [4]:
from dotenv import load_dotenv
import os
from supabase import create_client

# Load environment variables from the .env file
load_dotenv()

# Now you can access the variables
my_var = os.getenv('SUPABASE_URL')
print(my_var)


https://ealrqqefbhliasbxhghh.supabase.co


In [5]:
url = os.getenv('SUPABASE_URL')
key = os.getenv('API_KEY')
supabase = create_client(url, key)


In [9]:
# Example: Inserting a new record into the "users" table
data = {
    "uuid": 0,
    "headlines": "Taman",
    "article_url": "india_today",
    "category" : "bollywood",
    "date_time" : "2024-12-18 20:55:00"
}

response = supabase.table("NewsSentiment").insert(data).execute()




AttributeError: 'APIResponse[~_ReturnT]' object has no attribute 'status_code'

APIResponse[~_ReturnT](data=[{'uuid': 0, 'headlines': 'Taman', 'article_url': 'india_today', 'category': 'bollywood', 'date_time': '2024-12-18T20:55:00'}], count=None)

## pipeline test

In [1]:
import os
import pandas as pd
import pickle
from datetime import datetime, timedelta
from queue import Queue
from supabase import create_client
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Supabase client setup
SUPABASE_URL = os.getenv('SUPABASE_URL')
API_KEY = os.getenv('API_KEY')
supabase = create_client(SUPABASE_URL, API_KEY)

In [2]:
# Directory for storing data
DATA_DIR = "../data"


# Domains and their respective CSV files
domain_files = {
    "sports": "sports_articles.csv",
    "technology": "technology_articles.csv",
    "lifestyle": "lifestyle_articles.csv",
    "bollywood": "bollywood_articles.csv",
    "business": "business_articles.csv",
}

In [3]:
from collections import deque
import pickle
import os

# Save the queue
def save_queue(queue, domain):
    QUEUE_FILE = os.path.join(DATA_DIR, f"{domain}_queue.pkl")
    with open(QUEUE_FILE, 'wb') as f:
        pickle.dump(list(queue), f)  # Convert deque to list for saving
    print(f"Queue saved to {QUEUE_FILE}.")

# Load the queue
def load_queue(domain):
    QUEUE_FILE = os.path.join(DATA_DIR, f"{domain}_queue.pkl")
    if os.path.exists(QUEUE_FILE):
        with open(QUEUE_FILE, 'rb') as f:
            return deque(pickle.load(f))  # Convert list back to deque
    return deque()  # Return an empty deque if file doesn't exist


In [6]:
def add_to_database():
    for domain, file_name in domain_files.items():
        queue = load_queue(domain)
        file_path = os.path.join(DATA_DIR, file_name)

        if not os.path.exists(file_path):
            print(f"File not found for domain '{domain}': {file_name}")
            continue

        # Read CSV file
        df = pd.read_csv(file_path)

        for _, row in df.iterrows():
            record_uuid = row['UUID']


            # Prepare data for Supabase
            data = {
                "uuid": record_uuid,
                "headlines": row['Headline'],
                "article_url": row['Link'],
                "category": domain,
                "date_time": row['DateTime'],
                "positive" : row['Positive'],
                "negative" : row['Negative']
            }

            # Insert into Supabase
            try:
                response = supabase.table("NewsSentiment").insert(data).execute()
                queue.append(record_uuid)  # Add record UUID to the queue
            except Exception as e:
                print(f"Error inserting record UUID {record_uuid}: {e}")

        # Save the updated queue
        save_queue(queue, domain)

    print("Database update and queue management completed.")


In [8]:
add_to_database()

Queue saved to ../data\sports_queue.pkl.
Queue saved to ../data\technology_queue.pkl.
Queue saved to ../data\lifestyle_queue.pkl.
Queue saved to ../data\bollywood_queue.pkl.
Queue saved to ../data\business_queue.pkl.
Database update and queue management completed.
