# Integration of lakeFS with Prefect

## Versioning Information

In [None]:
sourceBranch = "main"

## Import Python packages

In [None]:
import lakefs_client
import requests
import lakefs_demo

## Working with the lakeFS Python client API

###### Note: To learn more about lakeFS Python integration visit https://docs.lakefs.io/integrations/python.html

In [None]:
%xmode Minimal
if not 'lakefs' in locals():
    import lakefs_client
    from lakefs_client.models import *
    from lakefs_client.client import LakeFSClient

    # lakeFS credentials and endpoint
    configuration = lakefs_client.Configuration()
    configuration.username = lakefsAccessKey
    configuration.password = lakefsSecretKey
    configuration.host = lakefsEndPoint

    lakefs = LakeFSClient(configuration)
    print("Created lakeFS client.")

## Verify lakeFS credentials by getting lakeFS version

In [None]:
print("Verifying lakeFS credentials…")
try:
    v=lakefs.config.get_lake_fs_version()
except:
    print("🛑 failed to get lakeFS version")
else:
    print(f"…✅lakeFS credentials verified\n\nℹ️lakeFS version {v.version}")

## Create Repository

In [None]:
from lakefs_client.exceptions import NotFoundException

try:
    repo=lakefs.repositories.get_repository(repo_name)
    print(f"Found existing repo {repo.id} using storage namespace {repo.storage_namespace}")
except NotFoundException as f:
    print(f"Repository {repo_name} does not exist, so going to try and create it now.")
    try:
        repo=lakefs.repositories.create_repository(repository_creation=RepositoryCreation(name=repo_name,
                                                                                                storage_namespace=f"{storageNamespace}"))
        print(f"Created new repo {repo.id} using storage namespace {repo.storage_namespace}")
    except lakefs_client.ApiException as e:
        print(f"Error creating repo {repo_name}. Error is {e}")
        os._exit(00)
except lakefs_client.ApiException as e:
    print(f"Error getting repo {repo_name}: {e}")
    os._exit(00)

## S3A Gateway configuration

##### Note: lakeFS can be configured to work with Spark in two ways:
###### * Access lakeFS using the S3A gateway https://docs.lakefs.io/integrations/spark.html#access-lakefs-using-the-s3a-gateway.
###### * Access lakeFS using the lakeFS-specific Hadoop FileSystem https://docs.lakefs.io/integrations/spark.html#access-lakefs-using-the-lakefs-specific-hadoop-filesystem.

In [None]:
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
sc = SparkContext.getOrCreate()
spark = SparkSession(sc)

sc._jsc.hadoopConfiguration().set("fs.s3a.access.key", lakefsAccessKey)
sc._jsc.hadoopConfiguration().set("fs.s3a.secret.key", lakefsSecretKey)
sc._jsc.hadoopConfiguration().set("fs.s3a.endpoint", lakefsEndPoint)
sc._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")

## Set Prefect variables which are used by the demo workflow

In [None]:
prefectAPIEndPoint = 'http://host.docker.internal:4200/api'
prefectUIEndPoint = 'http://127.0.0.1:4200'

repo_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/repo')
repo_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "repo", "value": repo_name})
#print(repo_variable_creation_request.json())

sourceBranch_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/source_branch')
sourceBranch_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "source_branch", "value": sourceBranch})

newBranch_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/new_branch')
newBranch_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "new_branch", "value": newBranch})

lakefs_credential_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/lakefs_credential_name')
lakefs_credential_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "lakefs_credential_name", "value": lakefs_credential_name})

prefect_ui_endpoint_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/prefect_ui_endpoint')
prefect_ui_endpoint_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "prefect_ui_endpoint", "value": prefectUIEndPoint})

if lakefsEndPoint.startswith('http://host.docker.internal'):
    lakefsUIEndPoint = 'http://127.0.0.1:8000'
elif lakefsEndPoint.startswith('http://lakefs'):
    lakefsUIEndPoint = 'http://127.0.0.1:58000'
else:
    lakefsUIEndPoint = lakefsEndPoint
    
lakefs_ui_endpoint_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/lakefs_ui_endpoint')
lakefs_ui_endpoint_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "lakefs_ui_endpoint", "value": lakefsUIEndPoint})

print("Created Prefect variables")