# Import into a lakeFS repository from multiple paths

## Prerequisites

###### This Notebook requires connecting to a lakeFS Server. 
###### To spin up lakeFS quickly - use lakeFS Cloud (https://lakefs.cloud) which provides lakeFS server on-demand with a single click; 
###### Or, alternatively, refer to lakeFS Quickstart doc (https://docs.lakefs.io/quickstart/installing.html).

## Change your lakeFS credentials

In [None]:
lakefsEndPoint = '<lakeFS Endpoint URL>' # e.g. 'https://username.aws_region_name.lakefscloud.io'
lakefsAccessKey = '<lakeFS Access Key>'
lakefsSecretKey = '<lakeFS Secret Key>'

## Storage Information
#### Change the Storage Namespace to a location in the bucket you’ve configured. The storage namespace is a location in the underlying storage where data for this repository will be stored.

In [None]:
storageNamespace = 's3://<S3 Bucket Name>/' # e.g. "s3://username-lakefs-cloud/"

## Environment Variable

In [None]:
sourceBranch = "main"
repo = "multi-bucket-import-repo"

# Import Sources and Destinations
importSource1 = "s3://" # e.g. "s3://sample-dog-images/Images/n02085620-Chihuahua/"
importSource2 = "s3://" # e.g. "s3://sample-dog-images/Annotation/n02085620-Chihuahua/"
importDestination1 = "" # e.g. "Images/"
importDestination2 = "" # e.g. "Annotations/"

## Configuring the lakeFS Python client

In [None]:
%xmode Minimal
import lakefs_client
from lakefs_client import models
from lakefs_client.client import LakeFSClient

# lakeFS credentials and endpoint
configuration = lakefs_client.Configuration()
configuration.username = lakefsAccessKey
configuration.password = lakefsSecretKey
configuration.host = lakefsEndPoint

client = LakeFSClient(configuration)

## Creating a new lakeFS repository

In [None]:
client.repositories.create_repository(
    repository_creation=models.RepositoryCreation(
        name=repo,
        storage_namespace=storageNamespace,
        default_branch=sourceBranch))

## Import to a single repository from multiple paths

In [None]:
import time

# Start Import
import_api = client.__dict__["import"]
commit = models.CommitCreation(message="import objects", metadata={"key": "value"})
paths=[
    models.ImportLocation(type="common_prefix", path=importSource1, destination=importDestination1),
    models.ImportLocation(type="common_prefix", path=importSource2, destination=importDestination2)
]
import_creation = models.ImportCreation(paths=paths, commit=commit)
create_resp = import_api.import_start(repo, sourceBranch, import_creation)

# Wait for import to finish
while True:
    status_resp = import_api.import_status(repo, sourceBranch, create_resp.id)
    print(status_resp)
    if hasattr(status_resp, "Error in import"):
        raise Exception(status_resp.err)
    if status_resp.completed:
        print("Import completed Successfully. Data imported into branch:", status_resp.import_branch)
        break
    time.sleep(2)

## Merge import branch into main

In [None]:
client.refs.merge_into_branch(
    repository=repo,
    source_ref=status_resp.import_branch, 
    destination_branch=sourceBranch)

## More Questions?

###### Join the lakeFS Slack group - https://lakefs.io/slack