In [25]:
import s3fs
import os
from minio import Minio
from minio import ResponseError

## Some useful utility funcitons
<br>

#### print_n_buckets(client [, N=10])
Prints first N (default=10) buckets in client...


#### find_csv_file(client, bucket [, prefix=''" [, recursive=False]]
Returns list of object names which end in .csv (essentially objects matching /client/bucket/[prefix]/*.csv). 
<br>By defaault:
* prefix is '' (matching all prefixes)
* recursive is False (implying find_csv_file will not traverse sub-folders under /client/bucket/prefix)



In [3]:
def print_n_buckets(minio_client, N=10):
    # get list of buckets...
    try:
        buckets = minio_client.list_buckets()
    except exception as msg:
        print("Problem with client...")
        print(msg)
        return
    
    # print N bucket names
    count = 0
    how_many = len(buckets)
    print("Found {} buckets ...".format(how_many))
    for b in buckets:
        count += 1
        if count > N:
            break
        print(b.name)    
    if how_many > N:
        print("... (+{} more)".format(how_many-N))
        
        
def find_csv_files(minio_client, bucket, prefix='', recursive=True):
    csv_files = []
    
    try: 
        if not minio_client.bucket_exists(bucket):
            print("ERROR: Bucket '{}' does not exist...".format(bucket))
            return csv_files
    except:
        print("ERROR: Problem with client...")
        print(msg)
        return csv_files
        
    # get list of buckets...
    try:
        objects = minio_client.list_objects(bucket, prefix, recursive)
    except exception as msg:
        print("ERROR: Problem with client...")
        print(msg)
        return csv_files
    
    # look for .csf 'files'...
    for o in objects:
        if o.object_name.endswith('.csv'):
            csv_files.append(o.object_name)
            
    return csv_files
                
    

In [6]:
myminio_endpoint = "10.1.80.0:9000"
myminio_access_key = "minioadmin"
myminio_secret_key = "minioadmin"
myminio_secure = False

In [8]:
myminio_client = Minio(myminio_endpoint, myminio_access_key, myminio_secret_key, secure=False)
print_n_buckets(myminio_client)


Found 1 buckets ...
celeba-dataset


In [111]:
for b in myminio_client.list_buckets():
    csv_files = find_csv_files(myminio_client, b.name, '', False)
    for csv_file in csv_files:
        print("{}/{}".format(b.name, csv_file))


celeba-dataset/list_attr_celeba.csv
celeba-dataset/list_bbox_celeba.csv
celeba-dataset/list_eval_partition.csv
celeba-dataset/list_landmarks_align_celeba.csv


In [9]:
play_endpoint = "play.minio.io"
play_access_key = "Q3AM3UQ867SPQQA43P2F"
play_secret_key = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
play_secure = False

In [10]:
play_client = Minio(play_endpoint, access_key=play_access_key, secret_key=play_secret_key, secure=play_secure )

In [11]:
print_n_buckets(play_client, 10)

Found 185 buckets ...
00test
123-123-123-123-123-123-123
2063b651-92a3-4a20-a4a5-03a96e7c5a89
5433590004825c02f3d9c8678682ec2a
58a1a091-df26-43ea-9909-c659617d2034
5ad99bcb-13c2-40ff-926d-53d29fe16fff
5bvn4zo6h31c74926dxe4kxbx5dvg5in
7c0d0243-73d3-4bb0-9485-f454e8df17df
a-test-1
aicg7t2
... (+175 more)


In [12]:
for b in play_client.list_buckets():
    if not b.name.startswith(""):
        continue

    csv_files = find_csv_files(play_client, b.name, '', False)
    if len(csv_files):
        print("bucket {}...".format(b.name))
        for csv_file in csv_files:
            print("{}/{}".format(b.name, csv_file))

bucket heihei...
heihei/Parking_Citations.csv
bucket testbucket3...
testbucket3/customer.csv


In [13]:
bucket_name = 'testbucket3'
object_name = 'customer.csv'

o = play_client.stat_object(bucket_name, object_name)
print("{}/{} is {} bytes".format(bucket_name, object_name, o.size))


testbucket3/customer.csv is 779474 bytes


In [18]:
try:
    data = play_client.get_partial_object(bucket_name, object_name, 0, 1000)
    for d in data:
        print(d.decode('utf-8'))
except ResponseError as err:
    print(err)

1;Van Buren;753 Monroe Street;Boston;33347;WI;05-137-1962;634-608-1829

2;Lincoln;492 3/4 North Preisker Lane;Helena;72262;CT;09-257-1976;277-955-3440

3;Taylor;140 Via Real;Topeka;80777;FL;08-238-1995;324-301-2565

4;Adams;920 Jean de la Fontaine UNIT 11;Harrisburg;42893;OR;09-273-1997;673-454-4359

5;Arthur;989 Bowles Avenue;Oklahoma City;70403;HI;05-136-1967;689-239-6423

6;Garfield;926 East Main Street;Phoenix;57263;NM;02-54-1986;212-557-4930

7;Garfield;571 Hutchinson Rd;Madison;81626;AZ;01-25-1959;509-938-9472

8;Monroe;529 Pacific Hwy S;Salt Lake City;39036;SD;07-200-1977;912-743-5804

9;Johnson;109 Camelback Rd;Juneau;22856;TX;01-31-1982;218-216-2876

10;Polk;105 Apalachee Parkway;Harrisburg;53363;OK;01-03-1993;687-882-6953

11;Truman;150 Monroe Street;Des Moines;79646;NE;05-144-1985;652-415-3417

12;Fillmore;339 N Kentwood;Tallahassee;71707;TN;12-341-1988;588-300-4881

13;Pierce;71 W. Russell St.;Hartford;65894;CA;03-69-1956;920-757-4843

14;Tyler;916 San Luis Obispo North;Bat

In [41]:
# Get a full object and put it in a file '/tmp/<object_name>
tmp_file_name = os.path.join("/tmp", object_name)
print("Temporary file name: {}".format(tmp_file_name))
if os.path.exists(tmp_file_name):
    print('{} already exists... removing it... ')
    os.remove(tmp_file_name)
    print('{} is {}'.format(tmp_file_name, "still there" if os.path.exists(tmp_file_name) else "gone"))
    
try:
    obj = play_client.fget_object(bucket_name, object_name, tmp_file_name)
    
    logical_object_name = os.path.join("/minio", bucket_name, object_name)
    print("Object {} successfully copied from minio to {}".format(logical_object_name, tmp_file_name))
except ResponseError as err:
    print(err)
    
print('checking to see if {} was copied correctly...'.format(logical_object_name))
if os.path.getsize(tmp_file_name) == obj.size:
    print('looks good... file same size as object')
else:
    print('ooops... problme... file is NOT the same size as object!!')
    
    


Temporary file name: /tmp/customer.csv
{} already exists... removing it... 
/tmp/customer.csv is gone
Object /minio/testbucket3/customer.csv successfully copied from minio to /tmp/customer.csv
checking to see if /minio/testbucket3/customer.csv was copied correctly...
looks good... file same size as object


In [42]:
import sys, inspect
clsmembers = inspect.getmembers(sys.modules['s3fs'], inspect.isclass)

In [43]:
for m in inspect.getmembers(sys.modules['s3fs'], inspect.isclass):
    print(m)

('S3File', <class 's3fs.core.S3File'>)
('S3FileSystem', <class 's3fs.core.S3FileSystem'>)


In [51]:
s

KeyError: 's3fs.S3File'