# Importing multiple buckets into a lakeFS repository

## Prerequisites

###### This Notebook requires connecting to a lakeFS Server. 
###### To spin up lakeFS quickly - use lakeFS Cloud (https://lakefs.cloud) which provides lakeFS server on-demand with a single click; 
###### Or, alternatively, refer to lakeFS Quickstart doc (https://docs.lakefs.io/quickstart/installing.html).

## Change your lakeFS credentials

In [None]:
lakefsEndPoint = '<lakeFS Endpoint URL>' # e.g. 'https://username.aws_region_name.lakefscloud.io'
lakefsAccessKey = '<lakeFS Access Key>'
lakefsSecretKey = '<lakeFS Secret Key>'

In [1]:
lakefsEndPoint = 'http://host.docker.internal:8000' # e.g. 'https://username.aws_region_name.lakefscloud.io'
lakefsAccessKey = 'AKIAJZBE6VGNASF5TFEQ'
lakefsSecretKey = '5sq/XtsQw/qObFOrx+Ex4cWcznYW16KfQhafllE1'

## Storage Information
#### Change the Storage Namespace to a location in the bucket you’ve configured. The storage namespace is a location in the underlying storage where data for this repository will be stored.

In [None]:
storageNamespace = 's3://<S3 Bucket Name>/' # e.g. "s3://username-lakefs-cloud/"

In [6]:
storageNamespace = 's3://iddos3/importtest/ramdom/1g8hsd80h/' # e.g. "s3://username-lakefs-cloud/"

## Environment Variable

In [7]:
sourceBranch = "main"
#ingestBranch = "ingest"
repo = "multi-bucket-import-repo"

# Imports Source and Destination
importSource1 = "s3://sample-dog-images/Images/n02085620-Chihuahua/"
importSource2 = "s3://sample-dog-images/Annotation/n02085620-Chihuahua/"
importDestination1 = "Images/"
importDestination2 = "Annotations/"

## Working with the lakeFS Python client API

In [8]:
%xmode Minimal
import lakefs_client
from lakefs_client import models
from lakefs_client.client import LakeFSClient

# lakeFS credentials and endpoint
configuration = lakefs_client.Configuration()
configuration.username = lakefsAccessKey
configuration.password = lakefsSecretKey
configuration.host = lakefsEndPoint

client = LakeFSClient(configuration)

Exception reporting mode: Minimal


## If above mentioned repo already exists on your lakeFS server then you can skip following step otherwise create a new repo:

In [9]:
client.repositories.create_repository(
    repository_creation=models.RepositoryCreation(
        name=repo,
        storage_namespace=storageNamespace,
        default_branch=sourceBranch))

{'creation_date': 1685037520,
 'default_branch': 'main',
 'id': 'multi-bucket-import-repo',
 'storage_namespace': 's3://iddos3/importtest/ramdom/1g8hsd80h/'}

## Import from Several directories

In [11]:
import time

# Start Import
import_api = client.__dict__["import"]
commit = models.CommitCreation(message="import objects", metadata={"key": "value"})
paths=[
    models.ImportLocation(type="common_prefix", path=importSource1, destination=importDestination1),
    models.ImportLocation(type="common_prefix", path=importSource2, destination=importDestination2)
]
import_creation = models.ImportCreation(paths=paths, commit=commit)
create_resp = import_api.import_start(repo, sourceBranch, import_creation)

# Wait for import to finish
while True:
    status_resp = import_api.import_status(repo, sourceBranch, create_resp.id)
    print(status_resp)
    if hasattr(status_resp, "Error in import"):
        raise Exception(status_resp.err)
    if status_resp.completed:
        print("Import completed Successfully. Data imported into branch:", status_resp.import_branch)
        break
    time.sleep(2)

{'completed': False,
 'import_branch': '_main_imported',
 'ingested_objects': 0,
 'update_time': datetime.datetime(2023, 5, 25, 17, 58, 53, 436178, tzinfo=tzlocal())}
{'commit': {'committer': 'admin',
            'creation_date': 1685037534,
            'id': '1a1eea46203d9ec50aa9a37d7aa8dea3231878bfb81dff01543bbcc4f069e11e',
            'message': 'import objects',
            'meta_range_id': '',
            'metadata': {'key': 'value'},
            'parents': ['19d457f658fc9c9fb6a2e1a4c6d9345fde89d53aaf23fb25b1c19268190ef389']},
 'completed': True,
 'import_branch': '_main_imported',
 'ingested_objects': 304,
 'metarange_id': 'f09fe8fbe2a0c231fc5e92b73815a98653f43dcb3252ed2bb1539bdb20dab697',
 'update_time': datetime.datetime(2023, 5, 25, 17, 58, 54, 487001, tzinfo=tzlocal())}
Import completed Successfully. Data imported into branch: _main_imported


## Merge Changes into main

In [None]:
client.refs.merge_into_branch(
    repository=repo,
    source_ref=status_resp.import_branch, 
    destination_branch=sourceBranch)

## More Questions?

###### Join the lakeFS Slack group - https://lakefs.io/slack