# Data Import From MINIO server

## A. Accessing MINIO Console

Step 1: Head to this link http://10.1.32.31:9001 on your browser

Step 2: Login using your username and password

## B. Create test bucket in your name and upload few files

<img style="width: 60%" src="im/cap1.PNG">

<img style="width: 60%"  src='im/cap2.PNG'>

## C. Download data from MINIO to workspace

### 1. Import libraries

In [24]:
from minio import Minio
from dotenv import load_dotenv # add this line
import os

In [25]:
os.getcwd()

'/workspace'

### 2. Create .env file with credentials 

#### Run these in the terminal

`echo "MINIO_ACCESS"=USERID >> .env`

`echo "MINIO_KEY"=PWD >> .env`

### 3. Load credentials securely

In [26]:
load_dotenv() # add this line
user = os.getenv('MINIO_ACCESS')
key = os.getenv('MINIO_KEY')

In [27]:
user

'elan'

### 4. Initialize s3 client

In [28]:
import boto3
s3 = boto3.resource('s3',
                endpoint_url="http://minio-0:9000",
                aws_access_key_id=user,
                aws_secret_access_key=key)

### 5. Load data into workspace

In [29]:
# Print buckets in data server
buckets = s3.buckets.all()
for b,bucket in enumerate(buckets):
    print(b, "Bucket: " ,bucket.name, bucket.creation_date)

0 Bucket:  aish 2022-03-29 16:20:21.187000+00:00
1 Bucket:  dvcremote 2022-03-29 20:06:57.020000+00:00
2 Bucket:  elan 2022-03-22 20:04:49.117000+00:00
3 Bucket:  heart-segmentation 2022-03-30 14:14:40.987000+00:00
4 Bucket:  ike 2022-03-14 15:12:48.441000+00:00
5 Bucket:  lda-object-detection 2022-03-30 00:50:23.137000+00:00
6 Bucket:  madi 2022-03-29 18:52:05.070000+00:00
7 Bucket:  monailabel 2022-03-22 20:34:59.637000+00:00
8 Bucket:  neeraja 2022-03-11 19:20:49.062000+00:00
9 Bucket:  neerajanew 2022-03-11 19:29:13.226000+00:00
10 Bucket:  pathology 2022-03-28 18:26:09.536000+00:00
11 Bucket:  pathologytest 2022-03-24 19:28:11.765000+00:00
12 Bucket:  utilities 2022-03-25 19:53:29.863000+00:00
13 Bucket:  zach 2022-03-11 19:22:45.234000+00:00


In [33]:
from multiprocessing.pool import ThreadPool 

In [39]:
def download_file(objkey):
    dir_path = os.path.join(bucketname, os.path.dirname(objkey))
    if not os.path.exists(dir_path):
        #print('Creating dir: ', dir_path)
        os.makedirs(dir_path,exist_ok=True)
    filename = os.path.join(dir_path,os.path.basename(objkey))
    bucket.download_file(objkey, filename) # save to same path

def downloadDirectoryFroms3(remoteDirectoryName,objectcount=None,suffix=None):
    print('Attempting to download bucket, ', bucketname, ' folder, ', remoteDirectoryName)
    objkeys = [obj.key for obj in bucket.objects.filter(Prefix = remoteDirectoryName)]
    
    if objectcount:
        objkeys = objkeys[:objectcount]
        
    if suffix:
        objkeys = [o for o in objkeys if o.endswith(suffix)]
    
    pool = ThreadPool(processes=2*len(objkeys))
    pool.map(download_file,objkeys)
    pool.close()
#     for obj in bucket.objects.filter(Prefix = remoteDirectoryName):
#         dir_path = os.path.join(bucketName, os.path.dirname(obj.key))
#         if not os.path.exists(dir_path):
#             print('Creating dir: ', dir_path)
#             os.makedirs(dir_path,exist_ok=True)
#         filename = os.path.join(dir_path,os.path.basename(obj.key))
#         bucket.download_file(obj.key, filename) # save to same path

### Download your bucket

In [41]:
os.chdir('/workspace/LDA_Object_Detection/data/csv')

In [43]:
%%time
bucketname = input('Enter bucket name')
bucket = s3.Bucket(bucketname)
os.makedirs(bucketname,exist_ok=True)
downloadDirectoryFroms3('',suffix='.csv')
#downloadDirectoryFroms3_single(s3,bucketname,'')

Enter bucket name lda-object-detection


Attempting to download bucket,  lda-object-detection  folder,  
CPU times: user 1.29 s, sys: 71.1 ms, total: 1.36 s
Wall time: 20.7 s


## D. Upload data from workspace to bucket in MINIO using s3

### 1. Define function to delete bucket if it already exists

In [16]:
from botocore.client import ClientError

In [75]:
def delete_bucket(s3, bucketname):
    try:
        s3.meta.client.head_bucket(Bucket=bucketname)
        bucket = s3.Bucket(bucketname)
        response = bucket.objects.all().delete()
        #print('Deleted bucket objects: \n',response)
        response = bucket.delete(
            ExpectedBucketOwner='string'
        )
        print('Deleted bucket: \n',bucketname)
    except ClientError:
        print('Bucket does not exist')

### 2. Set new bucket and upload folder - bucket name should be only small letters, no symbols

In [113]:
new_bucket = "monailabel"
upload_folder = "monailabel"

### 3. Delete bucket if it exists and create empty bucket - Only for test purposes - don't delete project data buckets!

In [114]:
delete_bucket(s3,new_bucket)

Deleted bucket: 
 monailabel


### 4. Upload Data to bucket

In [115]:
from multiprocessing.pool import ThreadPool 
bucket = s3.Bucket(new_bucket)

In [116]:
folderpath = upload_folder
def upload_file(full_path):
    with open(full_path, 'rb') as data:
            bucket.put_object(Key=full_path[len(folderpath)+1:], Body=data)

In [117]:
def upload_files(s3,bucketname,folderpath):
    if not bucket.creation_date:
        s3.create_bucket(Bucket=bucketname)
    all_paths = []
    for subdir, dirs, files in os.walk(folderpath):
        full_paths = [os.path.join(subdir, file) for file in files]
        all_paths = all_paths + full_paths
        
    pool = ThreadPool(processes=2*len(all_paths))
    pool.map(upload_file,all_paths)
    #pool.join()
    pool.close()

In [118]:
%%time

upload_files(s3,new_bucket,upload_folder)

print("Upload Done")

Upload Done
CPU times: user 17.3 s, sys: 20.4 s, total: 37.7 s
Wall time: 16 s


### 5. Go to MINIO Console and check if data is uploaded

<img style="width: 40%" src="im/cap3.PNG">