In [None]:
import os
import psycopg
from dotenv import load_dotenv
from psycopg import sql

load_dotenv()
connection_params = {
            'dbname': os.getenv('DB_NAME'),
            'user': os.getenv('DB_USER'),
            'password': os.getenv('DB_PASSWORD'),
            'host': os.getenv('DB_HOST'),
            'port': 5432
        }
connection = psycopg.connect(**connection_params)
cursor = connection.cursor()

In [None]:
### Movies ###
# Store the data as a list of tuples
movie_data = [(movie_id, movie_title, release_date, total_reviews, movie_scrap_time)]

# Build the query
upsert_query = sql.SQL("""
            INSERT INTO {} (movie_id, title, release_date, nb_reviews, scrapping_timestamp)
            VALUES ({})
            ON CONFLICT (movie_id) 
            DO UPDATE SET 
                nb_reviews = EXCLUDED.nb_reviews,
                scrapping_timestamp = EXCLUDED.scrapping_timestamp
            WHERE {}.nb_reviews <> EXCLUDED.nb_reviews
        """).format(
            sql.Identifier('movies'),
            sql.SQL(', ').join(sql.Placeholder() * len(movie_data[0])),
            sql.Identifier('movies')
        )

# Execute and commit
cursor.executemany(upsert_query, movie_data)
connection.commit()
print(f"[INFO] Upserted movie #{movie_id} into movies")

In [None]:
### Reviews ###
# Create a variable to identify reviews needing sentiment analysis
reviews_df['to_process'] = 1

# Store the data as a list of tuples, replacing NaN with None to avoid errors with postgreSQL
reviews_list = [tuple(None if pd.isna(x) else x for x in row) for row in reviews_list]

# Build the query
query = """
INSERT INTO reviews_raw (movie_id, review_id, author, title, text, rating, date, upvotes, downvotes, scrapping_timestamp, to_process)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (review_id) DO UPDATE
SET 
    title = EXCLUDED.title,
    text = EXCLUDED.text,
    upvotes = EXCLUDED.upvotes,
    downvotes = EXCLUDED.downvotes,
    to_process = CASE 
        WHEN reviews_raw.title IS DISTINCT FROM EXCLUDED.title OR reviews_raw.text IS DISTINCT FROM EXCLUDED.text 
        THEN 1 
        ELSE reviews_raw.to_process 
    END;
"""

# Execute and commit
cursor.executemany(query, reviews_list)
connection.commit()
print(f"[INFO] Upserted reviews for #{movie_id} into reviews_raw")
print(f"Rows updated: {cursor.rowcount}")

In [None]:
pd.read_sql("SELECT * FROM reviews_raw LIMIT 10;", connection)

In [None]:
connection.close()

In [None]:
connection.rollback()