# Minio - Object Storage - Practice 01

In [None]:
import minio
import os
import re
import pandas as pd
import json

In [None]:
access_creds = json.loads(os.environ.get("MINIO_CONFIG"))

In [None]:
from minio import Minio
from minio.error import S3Error


def main():
    # Create a client with the MinIO server playground, its access key
    # and secret key.
    client = Minio(
        re.sub(r"https?://","",access_creds["url"]),
        secure=False,
        access_key=access_creds["accessKey"],
        secret_key=access_creds["secretKey"],
    )

    # Make 'asiatrip' bucket if not exist.
    found = client.bucket_exists("emails")
    if not found:
        client.make_bucket("emails")
    else:
        print("Bucket 'emails' already exists")

    # Upload '/home/user/Photos/asiaphotos.zip' as object name
    # 'asiaphotos-2015.zip' to bucket 'asiatrip'.
    client.fput_object(
        "emails",  "/datto_rmm/devices/2023/04/17/datto_rmm_2023_04_17_202808_UTC_devices.parquet","[REDACTED]/.json",
    )
    print(
        "successful upload"
    )


if __name__ == "__main__":
    try:
        main()
    except S3Error as exc:
        print("error occurred.", exc)

In [None]:
import json
import os
import datetime as dt
import pandas as pd
from api_datto_rmm_devices import ApiExtraction
from s3_loader import S3Loader

# Define naming and storage schema
# DO NOT CHANGE THESE VALUES WITHOUT DISCUSSION WITH A DATA EXPERT
# CHANGING THESE VALUES CAN HAVE OVERREACHING CONSEQUENCES
TARGET_TYPE = "Minio"
TARGET_NAME = "data-ingestion"
SOURCE_PRODUCT = "datto_rmm"
SOURCE_TYPE = "api"
SOURCE_SUBJECT = "devices"
AWS_REGION = "localhost"

# ------------------------------------------------------------------------#
# ONLY ADJUST THESE VALUES DIRECTLY ABOVE THE LINE


# Set environmental variables from defined values above
# Variables can also be set on the function itself if the above code is missing
os.environ.update({
    "TARGET_TYPE": TARGET_TYPE,
    "TARGET_NAME": TARGET_NAME,
    "SOURCE_PRODUCT": SOURCE_PRODUCT,
    "SOURCE_SUBJECT": SOURCE_SUBJECT,
    "SOURCE_TYPE": SOURCE_TYPE,
    "AWS_REGION": AWS_REGION,
})


# A run config is created based on in code and statically assigned environment variables, using default values if unset
def create_config():
    config = {
        "TARGET_TYPE": os.environ.get("TARGET_TYPE", "mino"),  # Storage type that file will be stored
        "TARGET_NAME": os.environ.get("TARGET_NAME", "data-ingestion"),  # bucket, db, etc name for root path
        "SOURCE_PRODUCT": os.environ.get("SOURCE_PRODUCT", "unidentified"),  # platform or source definition for
        # future reference of data validation
        "SOURCE_SUBJECT": os.environ.get("SOURCE_SUBJECT", "unidentified"),
        "SOURCE_TYPE": os.environ.get("SOURCE_TYPE", "unidentified"),  # source type for reference of data source
        # api route or other decripter to further define the source origin inteded purpose or classification
        "AWS_REGION": os.environ.get("AWS_REGION", "localhost"),  # aws resource region (defaults to us-east-1)
        "_UTC_EXTRACTION_DATETIME": dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')  # datetime for cateloging
    }

    return config


def lambda_handler(event, context):
    # create runtime variables
    config = create_config()

    # create DataFrame from API
    datto = ApiExtraction(secret_key="DATTO_RMM_API")
    df = datto.create_dataframe(config["SOURCE_SUBJECT"])

    # add marker columns
    df['_SOURCE_PRODUCT'] = config["SOURCE_PRODUCT"]
    df['_SOURCE_SUBJECT'] = config["SOURCE_SUBJECT"]
    df['_SOURCE_TYPE'] = config["SOURCE_TYPE"]
    df['_UTC_EXTRACTION_DATETIME'] = config["_UTC_EXTRACTION_DATETIME"]

    # upload to s3 using defined folder structure
    s3_loader = S3Loader(df_input=df, run_config=config, secret_key="MINIO_CONFIG")
    result = s3_loader.upload_to_s3()

    # add configs to upload result output
    result.update({"config": config})

    return json.dumps(result)


if __name__ == "__main__":
    print(lambda_handler("", ""))