# Data Lake Example 4 - Object Versioning

## 0. Load libraries and common configuration
Object Versioning allows a bucket to store multiple versions of the same object, which is useful for restoring previous versions or maintaining a history of changes

In [2]:
# Install necessary packages
!pip install minio certifi 



## 1. Enable Versioning on the Bucket
To work with versioning, we need to enable it on the bucket using the Minio client. 

In [14]:
from minio import Minio
from minio.error import S3Error
from minio.versioningconfig import VersioningConfig
from urllib3 import make_headers, PoolManager

import ssl
import warnings
warnings.filterwarnings('ignore')

#Some issues might appear (SSL verification error) with yhe client if python is not properly configured. 
# You might find this line useful to skip the error 
ssl._create_default_https_context = ssl._create_unverified_context

# Custom HTTP client with SSL verification disabled
http_client = PoolManager(cert_reqs='CERT_NONE')

# Minio client configuration
minio_url = 's3api.scene.local'
access_key = 'testuser'
secret_key = 'testscene'

# Initialize Minio client
minio_client = Minio(
    minio_url,
    access_key=access_key,
    secret_key=secret_key,
    secure=True,
    http_client=http_client  # Disables SSL verification
)

bucket_name = "testbucket"

# Enable versioning on the bucket using VersioningConfig
try:
    versioning_config = VersioningConfig(status="Enabled")
    minio_client.set_bucket_versioning(bucket_name, versioning_config)
    print(f"Versioning enabled on bucket '{bucket_name}'.")
except S3Error as e:
    print(f"Error enabling versioning: {e}")


Error enabling versioning: S3 operation failed; code: AccessDenied, message: Access Denied., resource: /testbucket, request_id: 1806FBE724B50E1D, host_id: dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8, bucket_name: testbucket


## 2. List all files from a bucket within a Data Lake

In [10]:
# List objects in the bucket
response = s3_client.list_objects(Bucket=bucket_name)

# Print each file name (key)
if 'Contents' in response:
    for file in response['Contents']:
        print(file['Key'])
else:
    print("No files found in the bucket.")

athens.png
athens.webm
images/athens2.png


## 3. Download file from a Data Lake

In [11]:
# File details
download_path='athens_download.png'
bucket_name='testbucket'
object_name = 'images/athens2.png'    

# Download the file
s3_client.download_file(bucket_name, object_name, download_path)
print(f"Downloaded {object_name} to {download_path}")

Downloaded images/athens2.png to athens_download.png


## 4. Delete a file from the Data Lake

In [7]:
# Delete the file
s3_client.delete_object(Bucket=bucket_name, Key=object_name)
print("Delete successful")

Delete successful
