In [None]:
!pip install boto3

## S3 demo notebook

- I have shared credentials with you, they can be found in locked zip file, password announced during lesson  
- Before you run it, make sure you have proper credentials  
- Credentials folder `~/.aws` (files `credentials` and `config` )  
- Documentation https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html  
  - python library: `boto3`  
  - CLI: `aws`  


---
It should look like this:

`~/.aws/credentials`
```
[ydata-demo]
  aws_access_key_id = XXXXXXXXXXXXXXXXXXXXXXXXX
  aws_secret_access_key = YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
```

`~/.aws/config`
```
[ydata-demo]
  region=ru-central1
```

## Binary files

Download remote or upload local

In [10]:
import boto3
from botocore.exceptions import ClientError
import pickle

session = boto3.session.Session(profile_name='ydata-demo')

s3 = session.client(
    service_name='s3',
    endpoint_url='https://storage.yandexcloud.net',
)

cloud_fname = 'data/requirements.pkl'
BUCKET_NAME = 'evgerher-ydata-demo'

try:
    # try to load from remote
    cloud_obj = s3.get_object(Bucket=BUCKET_NAME, Key=fname)['Body'].read()
    dependencies = pickle.loads(cloud_obj)['dependencies_str']
    print('Loaded from remote')
except ClientError as ex:
    if ex.response['Error']['Code'] == 'NoSuchKey':
        # compute locally
        dependencies = "\n".join(['pandas', 'numpy', 'matplotlib'])
        with open('requirements.pkl', "wb") as fOut:
            data = {
                'dependencies_str': dependencies
            }
            pickle.dump(data, fOut, protocol=pickle.HIGHEST_PROTOCOL)
        s3.upload_file('requirements.pkl', BUCKET_NAME, cloud_fname)
        print('Made locally, uploaded to remote')
    else:
        raise ex

Loaded from remote


## Main methods

- upload_file to bucket
- download_file and save on filesystem
- get_object (binary/text) and return as object
- list_objects on a bucket

In [11]:
s3.upload_file('requirements.txt', BUCKET_NAME, 'data/requirements.txt')

In [19]:
s3.download_file(BUCKET_NAME, Key='data/bike-sharing-demand.zip', Filename='data/dataset.zip')
!ls -la data/

total 2576
drwx------   6 evgerher  LD\Domain Users     192 19 дек 01:17 [34m.[m[m
drwxr-xr-x  15 evgerher  LD\Domain Users     480 19 дек 01:17 [34m..[m[m
-rw-r--r--   1 evgerher  LD\Domain Users  193828 19 дек 01:17 dataset.zip
-rw-rw-r--@  1 evgerher  LD\Domain Users  142861 11 дек  2019 sampleSubmission.csv
-rw-rw-r--@  1 evgerher  LD\Domain Users  323856 11 дек  2019 test.csv
-rw-rw-r--@  1 evgerher  LD\Domain Users  648353 11 дек  2019 train.csv


In [12]:
get_object_response = s3.get_object(Bucket=BUCKET_NAME,Key='data/requirements.txt')
print(get_object_response['Body'].read())

b'numpy\npandas\nmatplotlib\nxgboost\ncatboost\nseaborn\nscikit-learn'


In [20]:
s3.list_objects(Bucket=BUCKET_NAME)

{'ResponseMetadata': {'RequestId': 'af71d1de88a8235d',
  'HostId': '',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'nginx',
   'date': 'Mon, 19 Dec 2022 00:19:17 GMT',
   'content-type': 'application/xml; charset=UTF-8',
   'content-length': '1783',
   'connection': 'keep-alive',
   'keep-alive': 'timeout=60',
   'x-amz-request-id': 'af71d1de88a8235d'},
  'RetryAttempts': 0},
 'IsTruncated': False,
 'Marker': '',
 'Contents': [{'Key': 'data/',
   'LastModified': datetime.datetime(2022, 12, 16, 11, 29, 43, 31000, tzinfo=tzutc()),
   'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
   'Size': 0,
   'StorageClass': 'STANDARD',
   'Owner': {'DisplayName': 'aje5epp0g909pf22lvb0',
    'ID': 'aje5epp0g909pf22lvb0'}},
  {'Key': 'data/bike-sharing-demand.zip',
   'LastModified': datetime.datetime(2022, 12, 16, 12, 21, 38, 561000, tzinfo=tzutc()),
   'ETag': '"0e5eac483e2db79c852cc8ce941cfeb1"',
   'Size': 193828,
   'StorageClass': 'STANDARD',
   'Owner': {'DisplayName': 'aje5epp0g909pf22