In [23]:
##### connect to the elasticsearch server
import json
from pprint import pprint
import os
import time
import pandas as pd 
from datetime import datetime
import pathlib

from dotenv import load_dotenv
from elasticsearch import Elasticsearch

import minio
from minio.error import S3Error
from minio.commonconfig import ENABLED
from minio.versioningconfig import VersioningConfig


from DataProfile_JSON_db import *
from minio_utils import *

##### input args
path_to_main_input = "./examples/dummy_data"    
minio_credential = "credentials.macstudio.json"

file_type = "bam"
input_files = [item for item in pathlib.Path(os.path.join(path_to_main_input, file_type)).glob("*.{}".format(file_type))]
input_metadata = pd.DataFrame(data = [item.name for item in input_files], columns = ["FileName"])
input_metadata["FileType"] = file_type
input_metadata["Labcode"] = input_metadata["FileName"].apply(lambda x: x.replace(".{}".format(file_type), ""))
input_metadata["path"] = ["./examples/dummy_data/{}/{}.{}".format(file_type, labcode, file_type) for labcode in input_metadata.Labcode.values]
input_metadata["project"] = "ECD"
input_metadata["sub_project"] = "ECD_read_based"
input_metadata["Date"] = datetime.now()
input_metadata["pipeline"] = "bismark_wgbs"
input_metadata = input_metadata.set_index("path")
input_metadata_dict = input_metadata.to_dict(orient = "index") # the input metadata is ready to be added to the database elasticsearch

##### MAIN
class RDS:
    def __init__(self, username, password, minio_credential, dataProfile_json,  verbose = False):
        ##### connect to elastic search database
        self.username = username
        self.password = password
        self.dataProfile_json = dataProfile_json
        self.es = Elasticsearch(
            "http://localhost:9200", # deployed locally, no cloud
            basic_auth=(username, password)) 
        client_info = self.es.info()
        tmp = self.es.cat.indices(index='*', h='index', s='index:asc', format='json')
        self.all_indices = [index['index'] for index in tmp if index['index'][0] != "."] # not show hidden indices
            
        ##### load minio server information
        self.minio_credential = minio_credential
        with open(self.minio_credential, 'r') as file:
            keys = json.load(file)
        minio_client = minio.Minio(
            endpoint="localhost:9000",
            access_key=keys["accessKey"],
            secret_key=keys["secretKey"],
            secure=False 
        )
        self.verbose = verbose
        self.minio_client = minio_client
        self.buckets = minio_client.list_buckets()
        
        ##### load all available pre-defined data profiles
        self.dataProfileObj = DataProfileDB(self.dataProfile_json)
        self.dataProfile = self.dataProfileObj.data
        
        if self.verbose:
            print('Connected to Elasticsearch!')
            pprint(client_info.body)
        
    def es_create_index(self, index_name, metadata_profile):
        self.es.indices.create(index = index_name,  mappings = metadata_profile)

    def es_insert_document(self, document, index):
        return self.es.index(index = index, body=document)
    
    def create_bucket(self, bucket_name, data_profile, versioning = True):
        """
        Create a bucket in MinIO.

        Parameters:
        - bucket_name (str): The name of the bucket to create.
        - minio_client (minio.Minio): An instance of the Minio client.

        Returns:
        - bool: True if the bucket was created successfully, False otherwise.
        """
        minio_client = self.minio_client
        try:
            # Check if the bucket already exists
            exists = minio_client.bucket_exists(bucket_name)
            if exists == False: 
                # Make a new bucket
                minio_client.make_bucket(bucket_name)
                print(f"Bucket '{bucket_name}' created successfully.")
                if versioning:
                    minio_client.set_bucket_versioning(bucket_name, VersioningConfig(ENABLED))

            else:
                print(f"Bucket '{bucket_name}' already exists. Cannot create bucket with the same name. Please choose another name")
            return True
        except S3Error as e:
            print(f"Error creating bucket: {e}")
            return False
    
    def upload_file(self, 
                    bucket_name, 
                    object_name, 
                    file_to_upload, 
                    metadat,
                    data_profile):
        """
        Upload a file to a MinIO bucket with specified metadata.

        Parameters:
        - bucket_name (str): The name of the target bucket.
        - object_name (str): The object name in the bucket.
        - file_to_upload (str): The local path to the file to be uploaded.
        - metadata (dict): A dictionary of metadata to attach to the object.
        - minio_credentials (str): Path to the JSON file with MinIO credentials.

        Returns:
        - bool: True if the file was uploaded successfully, False otherwise.
        """
        try:
            with open(file_to_upload, 'rb') as file_data:
                file_stat = os.stat(file_to_upload)
                self.minio_client.put_object(
                    bucket_name=bucket_name,
                    object_name=object_name,
                    data=file_data,
                    length=file_stat.st_size,
                    metadata=metadata
                )
            if self.verbose:
                print(f"File '{object_name}' uploaded successfully with metadata.")
            return True
        except S3Error as e:
            print(f"Error uploading file: {e}")
            return False
        

my_es = RDS(username = "elastic", 
            password = "genov4", 
            minio_credential = minio_credential, 
            verbose = False,
            dataProfile_json = "ALL_DATA_PROFILES.json")