In [1]:
import s3fs
import os

from datetime import datetime
from dotenv import load_dotenv
from src.utils.db import PostgreSQLDatabase

In [2]:
load_dotenv()
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_host = os.getenv('DB_HOST')

db = PostgreSQLDatabase(
    dbname = db_name, 
    user = db_user, 
    password = db_password,
    host = db_host
)

db.connect()

Successfully connected to the database


In [3]:
S3_ENDPOINT_URL = 'https://' + os.environ['AWS_S3_ENDPOINT']
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})

bucket_name = 'maeldieudonne'
destination_directory = bucket_name + '/MLOps/'

# Backup the tables

In [4]:
db.backup_table('reviews_raw')

Table reviews_raw backed up to data/backups/reviews_raw_20250328_141909.parquet


# Get the latest backup and transfer to S3

In [5]:
def get_latest_backup(table_name):
    files = [f for f in os.listdir("data/backups") if f.startswith(table_name)]
    if not files:
        raise FileNotFoundError(f"No backup found for {table_name}.")
    
    latest_file = max(files, key=lambda f: os.path.getctime(os.path.join("data/backups", f)))
    source_path = os.path.join("data/backups", latest_file)

    return source_path

In [6]:
try:
    file_path = get_latest_backup("reviews_raw")
    fs.put(file_path, destination_directory, content_type="parquet", encoding="utf-8")
    print(f"Successfully uploaded {file_path} to {destination_directory}")
except Exception as e:
    print(f"Error uploading file: {e}")

Successfully uploaded data/backups/reviews_raw_20250328_141909.parquet to maeldieudonne/MLOps/


# Get the latest backup from S3 and restore