In [None]:
import s3fs
import os
import pandas as pd
import re

from datetime import datetime
from dotenv import load_dotenv
from src.utils.db import PostgreSQLDatabase

In [None]:
db = PostgreSQLDatabase()
db.connect()

In [None]:
S3_ENDPOINT_URL = 'https://' + os.environ['AWS_S3_ENDPOINT']
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})

bucket_name = 'maeldieudonne'
destination = bucket_name + '/diffusion/'

In [None]:
for table in ['movies', 'reviews_raw', 'reviews_sentiments']:
    db.backup_table(table)

In [None]:
def get_latest_local_backup(table_name):
    backup_files = [f for f in os.listdir("data/backups") if f.startswith(table_name)]
    
    if not backup_files:
        print(f"No local backup found for {table_name}")
        return None

    else:
        latest_backup = max(backup_files, key=lambda f: os.path.getctime(os.path.join("data/backups", f)))
        file_path = os.path.join("data/backups", latest_backup)
        return file_path

In [None]:
for table in ['movies', 'reviews_raw', 'reviews_sentiments']:   
    file_path = get_latest_local_backup(table)
        
    if file_path is not None:
        try:
            fs.put(file_path, destination, content_type="parquet", encoding="utf-8")
            os.remove(file_path)
            print(f"Successfully uploaded {file_path} to {destination}")
        except Exception as e:
            print(f"Error uploading {file_path} to {destination}: {e}")

In [None]:
db.close_connection()