# Install pachctl

In [None]:
!curl -L https://github.com/pachyderm/pachyderm/releases/download/v2.7.5/pachctl_2.7.5_linux_amd64.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin

# Connect to the cluster

In [None]:
cluster_address = ""

!pachctl connect $cluster_address
!pachctl auth login

Open the link in your browser and enter your credentials

# Create your MLDM project

In [None]:
project_name = "my_project" # change your project name

In [None]:
!pachctl create project $project_name

In [None]:
!pachctl list projects # default should be the active one

# Set the context to your project

In [None]:
!pachctl config update context --project $project_name

In [None]:
!pachctl list projects # your project should be the active one

In [None]:
!pachctl list repos # empty

# Create the first repo and upload data

In [None]:
!pachctl create repo raw_videos_and_images

In [None]:
!pachctl list repos # raw_videos_and_images repo should be listed here

In [None]:
!pachctl put file raw_videos_and_images@master:liberty.png -f files/liberty.jpg
!pachctl put file raw_videos_and_images@master:cat-sleeping.MOV -f files/cat-sleeping.MOV
!pachctl put file raw_videos_and_images@master:robot.png -f files/robot.jpg
!pachctl put file raw_videos_and_images@master:highway.MOV -f files/highway.MOV

In [None]:
!pachctl list files raw_videos_and_images@master

# Creating the video mp4 converter pipeline

In [None]:
!cat video_mp4_converter.yaml 

In [None]:
!pachctl create pipeline -f video_mp4_converter.yaml

# Datums and glob patterns

In [None]:
# how many datums does the video mp4 converter pipeline see?
!pachctl list datum -f video_mp4_converter.yaml

In [None]:
# what if its glob pattern was "/"?
!pachctl glob file raw_videos_and_images@master:/

In [None]:
# what if its glob pattern was "/*"?
!pachctl glob file raw_videos_and_images@master:/*

In [None]:
# what if its glob pattern was "/*.png"?
!pachctl glob file raw_videos_and_images@master:/*.png

In [None]:
# what if its glob pattern was "/*.mp4"?
!pachctl glob file raw_videos_and_images@master:/*.mp4

# Updating the video mp4 converter pipeline

Edit __video_mp4_converter.yaml__ to only process .MOV files and then, update the pipeline with:

In [None]:
!pachctl update pipeline -f video_mp4_converter.yaml

In [None]:
!pachctl list job

In [None]:
!pachctl list commit # notice that some commit IDs will be identical to job IDs

# Inspecting jobs, logs and pipeline spec

In [None]:
!pachctl list pipeline --spec --output yaml # gives the entire list of pipelines, corresponds to the latest commit by default

In [None]:
!pachctl inspect job video_mp4_converter@  # add a job id to inspect the job

In [None]:
!pachctl logs --pipeline video_mp4_converter # should be empty as datums were skipped when updating the pipeline spec

In [None]:
!pachctl logs --job video_mp4_converter@  # add a job id to get logs from the job

# Updating the video mp4 converter pipeline and reprocess your datums

In [None]:
!pachctl update pipeline -f video_mp4_converter.yaml --reprocess

# Creating the image flattener pipeline

In [None]:
!cat image_flattener.yaml 

In [None]:
!pachctl create pipeline -f image_flattener.yaml

In [None]:
!pachctl list pipeline --spec --output yaml

# Create the image tracer pipeline

In [None]:
!cat image_tracer.yaml

In [None]:
!pachctl create pipeline -f image_tracer.yaml

# Create the movie gifer pipeline

In [None]:
!cat movie_gifer.yaml

In [None]:
!pachctl create pipeline -f movie_gifer.yaml # will take a few minutes to run

# Create the content shuffler pipeline

In [None]:
!cat content_shuffler.yaml

In [None]:
!pachctl create pipeline -f content_shuffler.yaml

# Create the content collager pipeline

In [None]:
!cat content_collager.yaml

In [None]:
!pachctl create pipeline -f content_collager.yaml

# Create the apache server pipeline

In [None]:
!cat apache_server.yaml

In [None]:
!pachctl create pipeline -f apache_server.yaml

# Branches

## Create a new branch, and have it identical to master

In [None]:
!pachctl list branch raw_videos_and_images

In [None]:
!pachctl create branch raw_videos_and_images@new-branch # empty branch

In [None]:
!pachctl list branch raw_videos_and_images

In [None]:
!pachctl create branch raw_videos_and_images@new-branch --head raw_videos_and_images@master # new-branch will be identical to master

In [None]:
!pachctl list branch raw_videos_and_images

## Update the new branch

In [None]:
!pachctl put file raw_videos_and_images@new-branch:robot1.png -f "files/robot(1).jpg"

## Create a pipeline independent from master

In [None]:
!cat new-branch_image_tracer.yaml

In [None]:
!pachctl create pipeline -f new-branch_image_tracer.yaml

## Update the master branch using the new branch

In [None]:
!pachctl create branch raw_videos_and_images@master --head raw_videos_and_images@new-branch

## Roll back the master branch

In [None]:
!pachctl create branch raw_videos_and_images@master --head raw_videos_and_images@master^

In [None]:
!pachctl delete pipeline new-branch_image_tracer
!pachctl delete branch raw_videos_and_images@new-branch

# Triggers

## Create a trigger

In [None]:
!pachctl create branch raw_videos_and_images@staging --head master

In [None]:
!pachctl list branch raw_videos_and_images

In [None]:
!pachctl create branch raw_videos_and_images@master --trigger staging --trigger-commits=3

In [None]:
!pachctl list branch raw_videos_and_images

## Test the trigger

In [None]:
!pachctl put file raw_videos_and_images@staging:robot1.png -f "files/robot(1).jpg"

In [None]:
!pachctl put file raw_videos_and_images@staging:robot2.png -f "files/robot(2).jpg"

In [None]:
!pachctl put file raw_videos_and_images@staging:robot3.png -f "files/robot(3).jpg"

# Pachyderm SDK

## Install the Pachyderm SDK

In [None]:
!pip install pachyderm_sdk

## Connect to the cluster

In [None]:
import pachyderm_sdk
from pachyderm_sdk.api import pfs, pps
from pachyderm_sdk.api.pfs import File, FileType

In [None]:
client = pachyderm_sdk.Client.from_config()

In [None]:
version = client.get_version()
print(version)

## List files from a repo

In [None]:
repo = "raw_videos_and_images"
branch = "master"

for file_info in client.pfs.walk_file(file=File.from_uri(f"{project_name}/{repo}@{branch}")):
    f_path = file_info.file.path
    print(f_path)

## Create a repo

In [None]:
project = pfs.Project(name=project_name)
repo = pfs.Repo(name="sdk_repo", project=project)
branch = pfs.Branch.from_uri(f"{repo}@master")

try:
    client.pfs.create_repo(repo=repo, description="my first sdk-created repo")
    print("Repo creation successful.")
except Exception as e:
    print("Error creating project or repo:", e)
    exit(1)

## Create a commit

In [None]:
try:
    with client.pfs.commit(branch=branch) as commit:
        with open("files/liberty.jpg", "rb") as source:
            commit.put_file_from_file(path="/liberty.png", file=source)
    print("Data loaded into the repo as a commit.")
except Exception as e:
    print("Error loading data into the repo:", e)
    exit(1)

## Create a pipeline

In [None]:
try:
    input = pps.Input(pfs=pps.PfsInput(project=project.name, branch="master", repo=repo.name, glob="/*"))
    transform = pps.Transform(
        image="lbliii/image_tracer:1.0.8", 
        cmd=["python3", "/image_tracer.py", 
             "--input", "/pfs/sdk_repo",
             "--output", "/pfs/out/"]
    )

    pipeline = pps.Pipeline(name="sdk_image_tracer", project=project)

    client.pps.create_pipeline(pipeline=pipeline, input=input, transform=transform)
    print("Pipeline created successfully.")
except Exception as e:
    print("Error creating the pipeline:", e)
    exit(1)

## Delete pipeline and repo

In [None]:
client.pps.delete_pipeline(pipeline=pipeline)

In [None]:
client.pfs.delete_repo(repo=repo)