# Connecting to S3 storage

Leveraging environment settings and Pydantic to avoid hard coding blob storage related connection info.

In [None]:
from pathlib import Path
from typing import Dict, Any
import json
from pydantic import BaseSettings, SecretStr, HttpUrl
import s3fs

In [None]:
# There's a JSON file available with your credentials in it
def json_config_settings_source(settings: BaseSettings) -> Dict[str, Any]:
    return json.loads(settings.__config__.json_settings_path.read_text())

class Settings(BaseSettings):
    
    MINIO_URL: HttpUrl
    MINIO_ACCESS_KEY: str
    MINIO_SECRET_KEY: SecretStr
        
    NAMESPACE: Path = Path('nrcan-btap')
    
    class Config:
        json_settings_path: Path = Path('/vault/secrets/minio-standard-tenant-1.json')
        # Ignore extra values present in the JSON data
        extra = 'ignore'
        
        @classmethod
        def customise_sources(cls, init_settings, env_settings, file_secret_settings):
            return (init_settings, json_config_settings_source, env_settings, file_secret_settings)

settings = Settings()
print(settings)

In [None]:
# Establish S3 connection
s3 = s3fs.S3FileSystem(
    anon=False,
    key=settings.MINIO_ACCESS_KEY,
    secret=settings.MINIO_SECRET_KEY.get_secret_value(),
    use_ssl=False, # Used if Minio is getting SSL verification errors.
    client_kwargs={
        'endpoint_url': settings.MINIO_URL,
        'verify':False
    }
)

# Get a list of files to prove it works
s3.ls(settings.NAMESPACE.as_posix())
s3.ls('nrcan-btap/input_data')

# Reading/writing data

Use `s3.open()` in places where you would read files with the standard Python `open()`.

For more info on s3fs, see https://s3fs.readthedocs.io/en/latest/

In [None]:
with s3.open('nrcan-btap/output_data/predict_out', 'rb') as f:
     print(f.read())
        #nrcan-btap/input_data/output.xlsx

In [None]:
import io
bucket ='nrcan-btap'
path='output_data/test_out'
data = {'features':['teast','etsd']}
with s3.open('nrcan-btap/output_data/test_out', 'wb') as f:
            data_json = json.dumps(data).encode('utf-8')
            csv_buffer = io.BytesIO(data_json)
            f.write(data_json)
            #f.write(2*2**20 * b'a')
            #f.write(csv_buffer)    