# Test Minio Integration

- list buckets
- upload object to bucket
- download object
- retrieve data with pandas
- remove objects
- remove bucket

## Setup

In [1]:
!pip install minio pandas s3fs -q

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
boto3 1.27.0 requires botocore<1.31.0,>=1.30.0, but you have botocore 1.29.161 which is incompatible.
awscli 1.28.0 requires botocore==1.30.0, but you have botocore 1.29.161 which is incompatible.[0m[31m
[0m

In [2]:
import filecmp
import os

import pandas as pd

from minio import Minio
from minio.error import BucketAlreadyOwnedByYou, NoSuchKey

## Configure MinIO Client

In [3]:
MINIO_HOST = os.environ["MINIO_ENDPOINT_URL"].split("http://")[1]

# Initialize a MinIO client
mc = Minio(
    endpoint=MINIO_HOST,
    access_key=os.environ["AWS_ACCESS_KEY_ID"],
    secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
    secure=False,
)

## List Existing Buckets

In [4]:
# List buckets
buckets = mc.list_buckets()
for bucket in buckets:
    print(bucket.name)
    # List objects in bucket
    objects = mc.list_objects(bucket.name)
    for obj in objects:
        print("\t", obj.object_name)

mlflow
	 624471807852528081/
	 872778786834962304/


## Create Bucket

In [5]:
BUCKET = "kf-testing-minio"

In [6]:
try:
    mc.make_bucket(BUCKET)
except BucketAlreadyOwnedByYou:
    print(f"Bucket {BUCKET} already exists!")

In [7]:
# check that the bucket was created successfully
assert mc.bucket_exists(BUCKET), f"Bucket {BUCKET} does not exist!"
# check that the new bucket is empty
assert [obj for obj in mc.list_objects(BUCKET)] == [], f"Bucket {BUCKET} is not empty!"

## Upload Data to Bucket

In [8]:
LOCAL_OBJECT = "sample.txt"
UPLOADED_OBJECT = "uploaded-sample.txt"
DOWNLOADED_OBJECT = "downloaded-sample.txt"
mc.fput_object(BUCKET, UPLOADED_OBJECT, LOCAL_OBJECT)

('4a507473e499735a94edc9ad9704a545', None)

In [9]:
# check that the bucket only contains the uploaded object
objects = [obj for obj in mc.list_objects(BUCKET)]
assert len(objects) == 1, f"Expected only 1 object in bucket {BUCKET}!"
assert objects[0].object_name == UPLOADED_OBJECT, "The uploaded and local object names do not match!"

# check that the size is the same
file_stat = os.stat(LOCAL_OBJECT)
assert objects[0].size == file_stat.st_size, "The uploaded and local objects are not of the same size!"

### Download Object

In [10]:
mc.fget_object(BUCKET, UPLOADED_OBJECT, DOWNLOADED_OBJECT)

<minio.definitions.Object at 0x7fc00f2e5430>

In [11]:
# check that the file was downloaded successfully
assert os.path.exists(DOWNLOADED_OBJECT), f"Failed to download object {UPLOADED_OBJECT}!"

# check that its content matches that of the original file
assert filecmp.cmp(LOCAL_OBJECT, DOWNLOADED_OBJECT, shallow=False), f"Downloaded object {DOWNLOADED_OBJECT} does not match the original!"

### Download Data with Pandas

In [12]:
LOCAL_CSV = "sample.csv"
UPLOADED_CSV = "uploaded-sample.csv"
DOWNLOADED_CSV = "downloaded-sample.csv"
mc.fput_object(BUCKET, UPLOADED_CSV, LOCAL_CSV)

('c886b0a6971427fc0faf293423e7a320', None)

In [13]:
local = pd.read_csv(LOCAL_CSV, delimiter=";")
uploaded = pd.read_csv(f"s3://{BUCKET}/{UPLOADED_CSV}", delimiter=";",storage_options={
    "key": os.environ["AWS_ACCESS_KEY_ID"],
    "secret": os.environ["AWS_SECRET_ACCESS_KEY"],
    "client_kwargs":{
        "endpoint_url": os.environ["MINIO_ENDPOINT_URL"]
    }
})

In [14]:
# inspect contents of uploaded CSV
uploaded

Unnamed: 0,col1,col2,col3
0,1,2,3
1,3,4,5
2,3,4,5


In [15]:
assert local.equals(uploaded), "Uploaded and local CSV contents do not match!"

## Clean Up

In [16]:
mc.remove_object(BUCKET, UPLOADED_OBJECT)
mc.remove_object(BUCKET, UPLOADED_CSV)

In [17]:
# check that the bucket is now empty
assert [obj for obj in mc.list_objects(BUCKET)] == [], f"Bucket {BUCKET} is not empty!"

In [18]:
# check that attempting to retrieve a deleted object raises an error
try:
    res = None
    res = mc.get_object(BUCKET, UPLOADED_OBJECT)
except Exception as e:
    if not isinstance(e, NoSuchKey):
        raise
    
assert not res, f"Failed to delete {UPLOADED_OBJECT}!"

In [19]:
mc.remove_bucket(BUCKET)

In [20]:
assert BUCKET not in {b.name for b in mc.list_buckets()}, f"Failed to delete bucket {BUCKET}!"

In [21]:
try:
    os.remove(DOWNLOADED_OBJECT)
except FileNotFoundError:
    print(f"File {DOWNLOADED_OBJECT} already deleted!")