# Test Minio Integration

This notebook requires MLflow to be deployed. Check [these](https://documentation.ubuntu.com/charmed-mlflow/en/latest/tutorial/mlflow-kubeflow/) instructions in order to deploy MLflow with Kubeflow.
- list buckets
- upload object to bucket
- download object
- retrieve data with pandas
- remove objects
- remove bucket

## Setup

In [None]:
# Please check the requirements.in file for more details
!pip install -r requirements.txt

In [None]:
import filecmp
import os

import pandas as pd

from minio import Minio
from minio.error import S3Error

## Configure MinIO Client

In [None]:
MINIO_HOST = os.environ["MINIO_ENDPOINT_URL"].split("http://")[1]

# Initialize a MinIO client
mc = Minio(
    endpoint=MINIO_HOST,
    access_key=os.environ["AWS_ACCESS_KEY_ID"],
    secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
    secure=False,
)

## List Existing Buckets

In [None]:
# List buckets
buckets = mc.list_buckets()
for bucket in buckets:
    print(bucket.name)
    # List objects in bucket
    objects = mc.list_objects(bucket.name)
    for obj in objects:
        print("\t", obj.object_name)

## Create Bucket

In [None]:
BUCKET = "kf-testing-minio"

In [None]:
# Create bucket if it doesn't exist
if not mc.bucket_exists(BUCKET):
    mc.make_bucket(BUCKET)
    print(f"Created bucket {BUCKET}")
else:
    print(f"Bucket {BUCKET} already exists!")

In [None]:
# check that the bucket was created successfully
assert mc.bucket_exists(BUCKET), f"Bucket {BUCKET} does not exist!"
# check that the new bucket is empty
assert [obj for obj in mc.list_objects(BUCKET)] == [], f"Bucket {BUCKET} is not empty!"

## Upload Data to Bucket

In [None]:
LOCAL_OBJECT = "sample.txt"
UPLOADED_OBJECT = "uploaded-sample.txt"
DOWNLOADED_OBJECT = "downloaded-sample.txt"
mc.fput_object(BUCKET, UPLOADED_OBJECT, LOCAL_OBJECT)

In [None]:
# check that the bucket only contains the uploaded object
objects = [obj for obj in mc.list_objects(BUCKET)]
assert len(objects) == 1, f"Expected only 1 object in bucket {BUCKET}!"
assert (
    objects[0].object_name == UPLOADED_OBJECT
), "The uploaded and local object names do not match!"

# check that the size is the same
file_stat = os.stat(LOCAL_OBJECT)
assert (
    objects[0].size == file_stat.st_size
), "The uploaded and local objects are not of the same size!"

### Download Object

In [None]:
mc.fget_object(BUCKET, UPLOADED_OBJECT, DOWNLOADED_OBJECT)

In [None]:
# check that the file was downloaded successfully
assert os.path.exists(DOWNLOADED_OBJECT), f"Failed to download object {UPLOADED_OBJECT}!"

# check that its content matches that of the original file
assert filecmp.cmp(
    LOCAL_OBJECT, DOWNLOADED_OBJECT, shallow=False
), f"Downloaded object {DOWNLOADED_OBJECT} does not match the original!"

### Download Data with Pandas

In [None]:
LOCAL_CSV = "sample.csv"
UPLOADED_CSV = "uploaded-sample.csv"
DOWNLOADED_CSV = "downloaded-sample.csv"
mc.fput_object(BUCKET, UPLOADED_CSV, LOCAL_CSV)

In [None]:
local = pd.read_csv(LOCAL_CSV, delimiter=";")
uploaded = pd.read_csv(
    f"s3://{BUCKET}/{UPLOADED_CSV}",
    delimiter=";",
    storage_options={
        "key": os.environ["AWS_ACCESS_KEY_ID"],
        "secret": os.environ["AWS_SECRET_ACCESS_KEY"],
        "client_kwargs": {"endpoint_url": os.environ["MINIO_ENDPOINT_URL"]},
    },
)

In [None]:
# inspect contents of uploaded CSV
uploaded

In [None]:
assert local.equals(uploaded), "Uploaded and local CSV contents do not match!"

## Clean Up

In [None]:
mc.remove_object(BUCKET, UPLOADED_OBJECT)
mc.remove_object(BUCKET, UPLOADED_CSV)

In [None]:
# check that the bucket is now empty
assert [obj for obj in mc.list_objects(BUCKET)] == [], f"Bucket {BUCKET} is not empty!"

In [None]:
# check that attempting to retrieve a deleted object raises an error
try:
    res = None
    res = mc.get_object(BUCKET, UPLOADED_OBJECT)
except S3Error as e:
    if not e.code == "NoSuchKey":
        raise

assert not res, f"Failed to delete {UPLOADED_OBJECT}!"

In [None]:
mc.remove_bucket(BUCKET)

In [None]:
assert BUCKET not in {b.name for b in mc.list_buckets()}, f"Failed to delete bucket {BUCKET}!"

In [None]:
try:
    os.remove(DOWNLOADED_OBJECT)
except FileNotFoundError:
    print(f"File {DOWNLOADED_OBJECT} already deleted!")