# Project

> Use this class to represent the AI project that we are working on and to interact with datasets and experiments in it.

In [None]:
# | default_exp project.core

In [None]:
# | hide
from nbdev.showdoc import *

In [None]:

from ragas_experimental.model.notion_model import NotionModel

In [None]:
# | export
import typing as t
import os
import asyncio

from fastcore.utils import patch
from pydantic import BaseModel

from ragas_experimental.backends.factory import RagasApiClientFactory
from ragas_experimental.backends.ragas_api_client import RagasApiClient
import ragas_experimental.typing as rt
from ragas_experimental.utils import async_to_sync, create_nano_id
from ragas_experimental.dataset import Dataset
from ragas_experimental.experiment import Experiment

In [None]:
# | export
class Project:
    def __init__(
        self,
        project_id: str,
        ragas_app_client: t.Optional[RagasApiClient] = None,
    ):
        self.project_id = project_id
        if ragas_app_client is None:
            self._ragas_api_client = RagasApiClientFactory.create()
        else:
            self._ragas_api_client = ragas_app_client

        # create the project
        try:
            sync_version = async_to_sync(self._ragas_api_client.get_project)
            existing_project = sync_version(project_id=self.project_id)
            self.project_id = existing_project["id"]
            self.name = existing_project["title"]
            self.description = existing_project["description"]
        except Exception as e:
            raise e

    @classmethod
    def create(
        cls,
        name: str,
        description: str = "",
        ragas_app_client: t.Optional[RagasApiClient] = None,
    ):
        ragas_app_client = RagasApiClientFactory.create()
        sync_version = async_to_sync(ragas_app_client.create_project)
        new_project = sync_version(title=name, description=description)
        return cls(new_project["id"], ragas_app_client)

    def delete(self):
        sync_version = async_to_sync(self._ragas_api_client.delete_project)
        sync_version(project_id=self.project_id)
        print("Project deleted!")

    def __repr__(self):
        return f"Project(name='{self.name}')"

In [None]:
RAGAS_APP_TOKEN = "apt.47bd-c55e4a45b27c-02f8-8446-1441f09b-651a8"
RAGAS_API_BASE_URL = "https://api.dev.app.ragas.io"

os.environ["RAGAS_APP_TOKEN"] = RAGAS_APP_TOKEN
os.environ["RAGAS_API_BASE_URL"] = RAGAS_API_BASE_URL

In [None]:
#project = Project.create("Demo Project")
project = Project(project_id="1ef0843b-231f-4a2c-b64d-d39bcee9d830")
project

Project(name='yann-lecun-wisdom')

In [None]:
# | export
@patch(cls_method=True)
def get(cls: Project, name: str, ragas_app_client: t.Optional[RagasApiClient] = None) -> Project:
    """Get an existing project by name."""
    # Search for project with given name
    if ragas_app_client is None:
        ragas_app_client = RagasApiClientFactory.create()

    # get the project by name
    sync_version = async_to_sync(ragas_app_client.get_project_by_name)
    project_info = sync_version(
        project_name=name
    )

    # Return Project instance
    return Project(
        project_id=project_info["id"],
        ragas_app_client=ragas_app_client,
    )

In [None]:
Project.get("SuperMe")

Project(name='SuperMe')

In [None]:
#project.delete()

## Manage datasets



In [None]:
#| export
async def create_dataset_columns(project_id, dataset_id, columns, create_dataset_column_func):
    tasks = []
    for column in columns:
        tasks.append(create_dataset_column_func(
            project_id=project_id,
            dataset_id=dataset_id,
            id=create_nano_id(),
            name=column["name"],
            type=column["type"],
            settings=column["settings"],
        ))
    return await asyncio.gather(*tasks)


In [None]:
# | export
@patch
def create_dataset(
    self: Project, model: t.Type[BaseModel], name: t.Optional[str] = None
) -> Dataset:
    """Create a new dataset database.

    Args:
        name (str): Name of the dataset
        model (NotionModel): Model class defining the database structure

    Returns:
        Dataset: A new dataset object for managing entries
    """
    # create the dataset
    sync_version = async_to_sync(self._ragas_api_client.create_dataset)
    dataset_info = sync_version(
        project_id=self.project_id,
        name=name if name is not None else model.__name__,
    )

    # create the columns for the dataset
    column_types = rt.ModelConverter.model_to_columns(model)
    sync_version = async_to_sync(create_dataset_columns)
    sync_version(
        project_id=self.project_id,
        dataset_id=dataset_info["id"],
        columns=column_types,
        create_dataset_column_func=self._ragas_api_client.create_dataset_column,
    )
        
    # Return a new Dataset instance
    return Dataset(
        name=name if name is not None else model.__name__,
        model=model,
        project_id=self.project_id,
        dataset_id=dataset_info["id"],
        ragas_api_client=self._ragas_api_client,
    )

In [None]:
import ragas_experimental.typing as rt

In [None]:
# create an example dataset
class TestModel(BaseModel):
    id: int
    name: str
    description: str
    tags: t.Literal["tag1", "tag2", "tag3"]
    tags_color_coded: t.Annotated[t.Literal["red", "green", "blue"], rt.Select(colors=["red", "green", "blue"])]
    url: t.Annotated[str, rt.Url()] = "https://www.google.com"

In [None]:

test_dataset = project.create_dataset(TestModel)
test_dataset

Dataset(name=TestModel, model=TestModel, len=0)

In [None]:
# | export
@patch
def get_dataset_by_id(self: Project, dataset_id: str, model) -> Dataset:
    """Get an existing dataset by name."""
    # Search for database with given name
    sync_version = async_to_sync(self._ragas_api_client.get_dataset)
    dataset_info = sync_version(
        project_id=self.project_id,
        dataset_id=dataset_id
    )

    # For now, return Dataset without model type
    return Dataset(
        name=dataset_info["name"],
        model=model,
        project_id=self.project_id,
        dataset_id=dataset_id,
        ragas_api_client=self._ragas_api_client,
    )

In [None]:
project.get_dataset_by_id(test_dataset.dataset_id, TestModel)

Dataset(name=TestModel, model=TestModel, len=0)

In [None]:
# | export
@patch
def get_dataset(self: Project, dataset_name: str, model) -> Dataset:
    """Get an existing dataset by name."""
    # Search for dataset with given name
    sync_version = async_to_sync(self._ragas_api_client.get_dataset_by_name)
    dataset_info = sync_version(
        project_id=self.project_id,
        dataset_name=dataset_name
    )

    # Return Dataset instance
    return Dataset(
        name=dataset_info["name"],
        model=model,
        project_id=self.project_id,
        dataset_id=dataset_info["id"],
        ragas_api_client=self._ragas_api_client,
    )

In [None]:
project.get_dataset("TestModel", TestModel)

Dataset(name=TestModel, model=TestModel, len=0)