# End to end tutorial for using the ne Argilla SDK

This tutorial encomposses the full workflow of using the Argilla SDK to manage human feedback tasks. The main three areas are:

1. **Setting up a project**:
   - **Connecting to an Argilla server**:
   - **Creating a workspace**:
   - **Creating users**:
2. **Creating a dataset**:
   - **Defining the dataset's feedback task**:
   - **Adding records to the dataset**:
3. **Labelling the dataset**:
   - **Assigning records to users**:
   - **Adding suggestions and responses**:

We will work through each of these areas and show three depths of control: A line, B line, and C line. The A line is the simplest and most abstracted, the B line is a bit more detailed, and the C line is the most detailed and flexible.


In [1]:
import argilla_sdk as rg

# 1. Setting up a project

- Connecting to an Argilla server
- Creating a workspace
- Creating users


## A line : _Setting up a project_ 🪫


In [None]:
# Connecting to an Argilla server
# Creating a workspace
# Creating users

client = rg.Client(api_rul="http://localhost:6900", api_key="admin.apikey")
# As now, the client creates a default workspace, user and role

## B Line: _Setting up a project_ 🔋


In [None]:
# Connecting to an Argilla server
client = rg.Client(api_rul="http://localhost:6900", api_key="admin.apikey")

# Creating a workspace
workspace = rg.Workspace(name="MyWorkspace")
client.workspace.create(workspace)

# Creating users
user = rg.User(name="John Doe", role="admin", workspace=workspace.name)
client.user.create(user)

## C line: _Setting up a project_ 🚀


In [None]:
# Connecting to an Argilla server
local_client = rg.Client(api_rul="http://localhost:6900", api_key="admin.apikey")
production_client = rg.Client(api_rul="http://argilla.production.net", api_key="admin.apikey")

# Creating a workspace
workspace = rg.Workspace(name="MyWorkspace")
local_client.workspace.create(workspace)🚀
production_client.workspace.create(workspace)

# Creating users
user = rg.User(name="John Doe", role="admin", workspace=workspace.name)
client.user.create(user)

# We can take a user from a workspace and create it in another workspace
user_from_local = client.user.get(name="John Doe", workspace=local_client.workspace.name)
production_client.user.create(user_from_local)

# 2. Creating a dataset

- Defining the dataset's feedback task
- Adding records to the dataset


In [4]:
import pandas as pd


dataset_list = [{"text": "Hello, world!"}, {"text": "I'm a sentence."}]

dataset_dict = {"text": ["Hello, world!", "I'm a sentence."]}

dataset_df = pd.DataFrame(dataset_dict)

## A line : _Creating a dataset_ 🪫


In [None]:
# Defining the dataset's feedback task
# Adding records to the dataset

dataset = rg.Dataset.from_list(dataset_list, name="MyListDataset", field_keys=["text"])
client.dataset.create(dataset)
# we raise an error if the dicts contain different keys

dataset = rg.Dataset.from_dict(dataset_dict, name="MyDictDataset", field_keys=["text"])
client.dataset.create(dataset)
# we raise an error if the lists are different lengths

dataset = rg.Dataset.from_df(dataset_df, name="MyDfDataset", field_keys=["text"])
client.dataset.create(dataset)

## B Line: _Creating a dataset_ 🔋


In [None]:
# Defining the dataset's feedback task

template = rg.Template(
    guidelines="Classify the articles into one of the four categories.",
    fields=[
        rg.Field(name="text", title="Text from the article"),
    ],
    questions=[
        rg.Question(
            name="label",
            title="In which category does this article fit?",
            required=True,
            settings=rg.QuestionSettings.MultiLabel(options=rg.LabelOption.from_labels(["greeting", "statement"])),
        )
    ],
)

# Adding records to the dataset
dataset = rg.Dataset.from_list(dataset_list, name="MyListDataset", template=template)
client.dataset.create(dataset)

## C line: _Creating a dataset_ 🚀


In [None]:
# Defining the dataset's feedback task

template = rg.Template(
    guidelines="Classify the articles into one of the four categories.",
    fields=[
        rg.Field(name="text", title="Text from the article"),
    ],
    questions=[
        rg.Question(
            name="label",
            title="In which category does this article fit?",
            required=True,
            settings=rg.QuestionSettings.MultiLabel(options=rg.LabelOption.from_labels(["greeting", "statement"])),
        )
    ],
)

dataset = rg.Dataset(name="MyListDataset", template=template)

# Adding records to the dataset
for i, item in enumerate(ds):
    dataset.records.add(
        rg.Record(
            fields={
                "text": item["text"],
            },
            external_id=f"record-{i}",
        )
    )

client.dataset.create(dataset)

# 3. Labelling the dataset

- Assigning records to users
- Adding suggestions and responses


In [None]:
suggestion_list = [{"text": "Hello, world!", "label": "greeting"}, {"text": "I'm a sentence.", "label": "statement"}]
response_list = [{"text": "Hello, world!", "label": "greeting"}, {"text": "I'm a sentence.", "label": "statement"}]

## A line: _Labelling the dataset_ 🪫


In [None]:
# Adding suggestions and responses
dataset = dataset.suggestions.from_list(dataset_list)
dataset = dataset.responses.from_list(dataset_list)
client.datasetx.update(dataset)
# we raise an error if the dicts contain different key

In [None]:
# Assigning records to user/users
dataset = dataset.assign(groups="even_split", overlap=1, shuffle=True)

## B Line: _Labelling the dataset_ 🔋


In [None]:
# Assigning records to user/users
james = rg.User(username="james", password="jamesfish")
rashida = rg.User(username="rashida", password="rashidaslizard")
dataset = dataset.assign(splits=[james, rashida], overlap=1, shuffle=True)
client.dataset.update(dataset)

In [None]:
# Adding suggestions
for record in dataset.records:
    record.suggestions.add(
        rg.Suggestion(
            user=user,
            fields={
                "label": "greeting",
            },
        )
    )

# Adding responses
for record in dataset.records:
    record.responses.add(
        rg.Response(
            user=user,
            fields={
                "label": "greeting",
            },
        )
    )

client.datasets.update(dataset)

## C line: _Labelling the dataset_ 🚀


In [None]:
# Assigning records to user/users
tania = rg.User(username="tania", password="taniascat")
bill = rg.User(username="jeroen", password="jeroensdog")
armita = rg.User(username="armita", password="amitasbird")
tim = rg.User(username="tim", password="taniascat")
james = rg.User(username="james", password="jamesfish")
rashida = rg.User(username="rashida", password="rashidaslizard")

for user in [tania, bill, armita, tim, james, rashida]:
    client.users.create(user)

users_group_1 = [tania, bill, armita]
users_group_2 = [tim, james, rashida]

dataset = dataset.assign(splits=[users_group_1, users_group_2], overlap=1, shuffle=True)
client.datasets.update(dataset)

In [None]:
# Adding suggestions
for record in dataset.records:
    record.suggestions.add(
        rg.Suggestion(
            user=user,
            fields={
                "label": "greeting",
            },
        )
    )

# Adding responses
for record in dataset.records:
    record.responses.add(
        rg.Response(
            user=user,
            fields={
                "label": "greeting",
            },
        )
    )

client.datasets.update(dataset)