# Using lakeFS-spec to interact with lakeFS

### lakeFS-spec makes versioned data available via a filesystem interface

In [None]:
import pandas as pd

df = pd.read_parquet("lakefs://pydata-hn/main/lakes.parquet")
df.head()












### The same way, we can write files

In [None]:
german_lakes = df[df['Country'] == "Germany"]
german_lakes.head()

In [None]:
from lakefs_spec import LakeFSFileSystem

fs = LakeFSFileSystem()

In [None]:
with fs.transaction("pydata-hn", "main") as tx:
    german_lakes.to_parquet(f"lakefs://{tx.repository}/{tx.branch.id}/german_lakes.parquet")
    tx.commit(message="Extract German lakes")

### We can access arbitrary files with `open()`

In [None]:
import json
from pathlib import Path

with fs.transaction("pydata-hn", "main") as tx:
    with fs.open(f"lakefs://{tx.repository}/{tx.branch.id}/experiment.json", "w") as f:
        data = Path("experiment.json").read_text()
        json.dump(data, f)
    tx.commit(message="Add experiment json")

### With the transaction API, we can perform complex versioning operations

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df)

In [None]:
with fs.transaction(
    "pydata-hn",
    base_branch="main",
    branch_name="demo-experiment",
    automerge=False,
    delete="never",
) as tx:
    train.to_csv(f"lakefs://{tx.repository}/{tx.branch.id}/train.csv")
    test.to_csv(f"lakefs://{tx.repository}/{tx.branch.id}/test.csv")
    
    commit = tx.commit(message="Create train test split")
print(commit)

### We can also merge branches and reference repository states using tags

In [None]:
with fs.transaction("pydata-hn", "main"):
    tx.merge(source_ref="main", into="demo-experiment")
    tag = tx.tag(ref=commit.id, name="PyDataDemo")
print(tag)

In [None]:
test_df = pd.read_csv("lakefs://pydata-hn/PyDataDemo/test.csv", index_col=0)
test_df

### We can use unique identifiers for automated versioning

In [None]:
print(commit)

In [None]:
df = pd.read_parquet(f"lakefs://pydata-hn/{commit.id}/lakes.parquet")
df

### Summary

lakeFS & lakeFS-spec
- Easy read an write operations by adding lakeFS URIs to your filesystem
- Git-style versioning and collaboration features
- Transactions as a safeguarded way to programmatically conduct versioning operations

Niceties
- Automatic authentication discovery
- Caching for up and downloads

# Questions?
![Our GitHub Repo](lakefs-spec-github-qrcode.png)

`pip install lakefs-spec`