<!-- TOP OF README ANCHOR -->
<a name="top"></a>
<!-- PROJECT LOGO -->
<br />
<div align="center">
  <p>
    <img src="https://github.com/fangorntreabeard/cval-lib/blob/main/images/logo.jpg?raw=true" alt="Cval logo" width="155" height="155">
  </p>
<h3 align="center">CVAL REST API LIBRARY</h3>
  <p align="center">
    A library designed to interact with the REST-API cval.ai
    <br/>
    <b>
      <a href="https://cval.ai">REST API docs</a>
      ·
      <a href="https://github.com/fangorntreabeard/cval-lib/issues">Report Bug</a>
    </b>
  </p>
</div>


# Getting started

To start using the CVAL Rest API, you need to **obtain** a **client/user API key**.
Once you have your API key, you can use it to authenticate your requests and interact with the CVAL Rest API endpoints.
Refer to our API documentation for detailed information on available endpoints, request formats, and response structures.


## Installation


In [21]:
!pip install cval-lib==0.0.2.27



# User guide

##### Set your user_api_key


In [2]:
from cval_lib.connection import CVALConnection
USER_API_KEY = 'USER_API_KEY'
cval = CVALConnection(USER_API_KEY)

### Dataset
 > Within the framework of the created system, datasets are spaces in which data for machine learning is stored.
 Creating a dataset is similar to creating a folder.


##### Create dataset

In [3]:
ds_id = cval.dataset().create(name='on-premise-scheme-ds', description='')
print(ds_id)

eb28dec2-1923-4e4d-a844-9278fb8b4926


##### Update dataset

In [4]:
ds = cval.dataset()
print(ds.update(ds_id, description='any123 string data', name='sample name'))

# :NOTE: the dataset can store the state (ds_id)

{'dataset_name': 'sample name', 'dataset_description': 'any123 string data', 'dataset_id': 'eb28dec2-1923-4e4d-a844-9278fb8b4926'}


##### Get dataset


In [5]:
print(ds.get())
print(*cval.dataset().get_all(0))

dataset_name='sample name' dataset_description='any123 string data'
dataset_name=None dataset_description=None dataset_id='9bc85c20-1fce-42e5-b969-c235831b8d68' dataset_name=None dataset_description=None dataset_id='6e6b11c9-0427-4435-b810-0de9b0a2b239' dataset_name=None dataset_description=None dataset_id='5749a101-42e0-4bef-918e-e53b723f5864' dataset_name=None dataset_description=None dataset_id='d878cd6a-0619-4fc2-8acb-bf3e62e0400d' dataset_name=None dataset_description=None dataset_id='fab0fa7b-00ac-4e24-b059-ce07a9706abf' dataset_name=None dataset_description=None dataset_id='87ace64a-5637-4fbe-87e8-77fbdb8a4a53' dataset_name=None dataset_description=None dataset_id='4af75617-382f-48c6-9cf8-586ca1178e9a' dataset_name=None dataset_description=None dataset_id='8a3a514a-6574-469a-863f-dfc09338293e' dataset_name=None dataset_description=None dataset_id='5c2862b0-f8ea-44ef-8cbd-59d83222702c' dataset_name=None dataset_description=None dataset_id='0fe4b8d2-aa18-4a04-9314-a81dd203649b' da

### :note:
> A further example of using the library concerns embedding. Since embedding is a large data object and the method of its creation is completely defined by the user, the embedding method works through query schemes (models).


### Embeddings
> Embeddings are vector representations of images obtained using pytorch or any other library


##### Create embeddings

In [7]:
from random import random
import uuid
from cval_lib.connection import CVALConnection
from cval_lib.models.embedding import EmbeddingModel, FrameEmbeddingModel

embeddings = tuple(
        map(
            lambda x: FrameEmbeddingModel(
                frame_id=uuid.uuid4().hex,
                embeddings=[
                    EmbeddingModel(embedding_id=uuid.uuid4().hex, embedding=list(map(lambda x: random(), range(500))))]
            ),
            range(100)
        )
    )

print(embeddings)

(FrameEmbeddingModel(frame_id='c3650f69821843e5b92b3301594daf7a', embeddings=[EmbeddingModel(embedding_id='fc832ce1fead4eb8b0a56ed992cd7eb6', embedding=[0.719192973959289, 0.19199759253516757, 0.4681776430939304, 0.5053458521212363, 0.7834515344506223, 0.9639092724360117, 0.012543304368498265, 0.03463089100455674, 0.14858427348169223, 0.5326941051139692, 0.34802678671420173, 0.8799326775292399, 0.31942909106913675, 0.8021421055754204, 0.47516369199492203, 0.9807809870902118, 0.6733691074308042, 0.16551850445365401, 0.324760968627278, 0.7384523103201399, 0.17168772716114644, 0.46066819812460646, 0.7775846031332269, 0.14115606843957718, 0.9892353467908306, 0.29788363062658385, 0.2571856961072485, 0.19674516601382364, 0.7939304305154282, 0.5463338933877397, 0.04261256217570841, 0.16793852779886953, 0.2279468548450978, 0.4140518240843635, 0.46126978528474527, 0.25023876379017906, 0.8581607604279333, 0.41273961714143337, 0.895437687656651, 0.8798990283859308, 0.5914245685506562, 0.084377756

##### Upload & check embeddings


In [9]:
emb = cval.embedding(ds_id, 'training')
emb.upload_many(embeddings)
print(emb.get_many())
print(emb.get_meta())

[FrameEmbeddingModel(frame_id='d9831452f5314124872e37f3816d0f3f', embeddings=[EmbeddingModel(embedding_id='a69d92dd07e9448f89ae5fe31d5e5a79', embedding=[0.18142729932975143, 0.7144467724357612, 0.21523370431499877, 0.10042622649492328, 0.7458277737843871, 0.9349572876013842, 0.6589035047558732, 0.03421428380962577, 0.06691861191444681, 0.6134788315737948, 0.31181666854749057, 0.8472723750230662, 0.513345917855292, 0.435920573415707, 0.7848205342599809, 0.6999149055375846, 0.6961882467197574, 0.07745709132368261, 0.4919156243573425, 0.5352711843927498, 0.9639007603200875, 0.28850925508882785, 0.0752169465256245, 0.32011432708514354, 0.3455074207150991, 0.9178274696410609, 0.3033053849890215, 0.07209896335952126, 0.17964953615957258, 0.8497126286802563, 0.5114319289076885, 0.050057743321844206, 0.696289224998694, 0.1671535715644873, 0.9754472220757638, 0.13308178054295616, 0.9520112671237588, 0.23684029542364415, 0.413650027379866, 0.12346544544529159, 0.19900248552233435, 0.252994804920

### :note:
> The following example is used to invoke active learning

### Active learning

##### Get predictions data


In [13]:
from random import random
import uuid
from cval_lib.models.detection import BBoxScores, FramePrediction

# :NOTE: example only
frames_predictions = list(
    map(
        lambda x: FramePrediction(
            frame_id=str(uuid.uuid4().hex),
            predictions=list(
                map(lambda x: BBoxScores(category_id=1, score=random()), range(10)))
        ),
        range(10)
    )
)
print(frames_predictions)


[FramePrediction(frame_id='e825acbacafb4269889336976db0f148', predictions=[BBoxScores(category_id='1', score=0.7816575263989136, embedding_id=None), BBoxScores(category_id='1', score=0.8993917350419026, embedding_id=None), BBoxScores(category_id='1', score=0.79980126637699, embedding_id=None), BBoxScores(category_id='1', score=0.4799045300307191, embedding_id=None), BBoxScores(category_id='1', score=0.48295548779121145, embedding_id=None), BBoxScores(category_id='1', score=0.01754114337704593, embedding_id=None), BBoxScores(category_id='1', score=0.23978973575483653, embedding_id=None), BBoxScores(category_id='1', score=0.8221639051441663, embedding_id=None), BBoxScores(category_id='1', score=0.15276448537954823, embedding_id=None), BBoxScores(category_id='1', score=0.6378096133241542, embedding_id=None)]), FramePrediction(frame_id='3c2c5f90b55f41cb9d32b5daad6a0d2c', predictions=[BBoxScores(category_id='1', score=0.8503262164507673, embedding_id=None), BBoxScores(category_id='1', score

##### Construct config

In [14]:
from cval_lib.models.detection import DetectionSamplingOnPremise

request = DetectionSamplingOnPremise(
 num_of_samples=200,
 bbox_selection_policy='min',
 selection_strategy='margin',
 sort_strategy='ascending',
 frames=frames_predictions,
)

##### Run active learning

In [15]:
emb = cval.detection()
print(emb.on_premise_sampling(request))

task_id='a2e4824e-1a4c-4a8e-bd42-d2649f7e6778' dataset_id=None time_start=1690493668.269995 time_end=1690493668.2699943 type_of_task='detection' action='sampling' weights=None result=['8a8cfbf3d7e544cca17166d27cf3546e', '32efd1fb04224e9097e77f2da8053cff', '5e5b8343bee34aaa8a79751af6ba55db', 'e825acbacafb4269889336976db0f148', '93d316b47ba44df290522b46d4ceb129', '049baf2728f147dca16ee970a2d7947a', 'beef6fa1fb5f48539db1dd55187f911b', 'b10a864aedce45ca8aabcaef84b14a4d', '854204035f304b59b3dc5683a48fce23', '3c2c5f90b55f41cb9d32b5daad6a0d2c']


###:note:
> The following method is most relevant when we are dealing with long-term tasks and, accordingly, with asynchronous interaction.

### Polling
> refers to actively sampling the status of an external device by a client program as a synchronous activity.


In [20]:
import uuid
from random import random, randint
from time import sleep

frames_predictions = list(
        map(
            lambda x: FramePrediction(
                frame_id=str(uuid.uuid4().hex),
                predictions=list(map(lambda _: BBoxScores(category_id=randint(0, 3), score=random()), range(20)))
            ),
            range(10000)
        )
    )

request = DetectionSamplingOnPremise(
        num_of_samples=200,
        bbox_selection_policy='min',
        selection_strategy='margin',
        sort_strategy='ascending',
        frames=frames_predictions,
    )

emb = cval.detection()
print(emb.on_premise_sampling(request))

result = None
sleep_sec = 1
while result is None:
    result = emb.result.get().result
    print(f'Polling... {sleep_sec} s')
    sleep(sleep_sec)
    sleep_sec *= 2
print(result)


task_id='3cc10a61-c03a-478d-be03-3d06cf30bed5' dataset_id=None time_start=1690494445.4152808 time_end=None type_of_task='detection' action='sampling' weights=None result=None
Polling... 1 s
Polling... 2 s
['940e7ea54eb44e0cad4b6f88f6a83b10', '5455dda4f1334e94a32fb3c656d34f94', '016e2b160ec2494a845be3d2aa37ed4c', '77d74953e84d4f44b5c59a4b4dd534cb', '7033cf0258f94afe8027f1fcd838e328', '1bff54fb21fe440099a1565fa305e2e7', '05f0142146a045e09065b6b04ce11916', '96bcb5dc3f6b4b3895ce6669558b2a80', 'fe0bb91c1b2d4e4fa60c56a21ffbd732', 'b2722fd7d92a4fc1ae52391f622966c9', '45344c72b8954a8aaff49255a042d1bd', '76a111e8b61e4ba3ba846ea76b3a5be3', '2f2174e487174259968b8c8e771ffd9c', 'a96a2cac3b284f70bd2979a6de2979ab', '925d87f030f74b58a527437bb219d000', 'becf523af65f430989e751253418c355', '031cc30d14ee446b9297c9646b4d7a87', '24d5f346840842f0be655bf87d04020a', '9a29041bd6dc4397b8dc28ac7c6d0a5a', '637aebff372f475ea427426bd818ff90', '0ec569c132ce48358cf2de738ac9b2c6', '01bd91f3992a451cad5a709ce572c51c', '0

### Clustering. Embedding usage

In [23]:
import math
import random
import uuid
from time import sleep

from cval_lib.connection import CVALConnection
from cval_lib.models.detection import DetectionSamplingOnPremise
from cval_lib.models.embedding import FrameEmbeddingModel, EmbeddingModel


# :note: Generate embeddings and scores, for example -- there is random generator:

def get_frames(num_images: int, num_bboxes: int, emb_sz: int, categ_sz: int):
    _predictions = []
    _embeddings = []
    for i in range(num_images):
        emb = []
        scr = []
        image_name = str(uuid.uuid4().hex)
        for boxs in range(num_bboxes):
            _id = uuid.uuid4().hex
            scr.append(
                {
                    "embedding_id": _id,
                    "score": math.cos(random.random()),
                    "category_id": random.randint(0, categ_sz),
                },
            )
            emb.append(
                EmbeddingModel(**{
                    "embedding_id": _id,
                    "embedding": list(map(lambda x: random.random(), range(emb_sz))),
                })
            )

        _embeddings.append(
            FrameEmbeddingModel(**{
                "frame_id": image_name,
                "embeddings": emb,
            }),
        )
        _predictions.append(
            {
                "frame_id": image_name,
                "predictions": scr,
            },
        )
    return _embeddings, _predictions


embeddings, predictions = get_frames(100, 1, 500, 1)
ds_id = cval.dataset().create(name='asd', description='1a2')
print(ds_id)
print(cval.embedding(dataset_id=ds_id, part_of_dataset='training').upload_many(embeddings))

# :note: create config and get task_id

task_id = cval.detection().on_premise_sampling(
    DetectionSamplingOnPremise(
        num_of_samples=20,
        dataset_id=ds_id,
        selection_strategy='clustering',
        frames=predictions,
        bbox_selection_policy='sum',
    )
).task_id

result = None
sleep_sec = 1

# :note: start long-polling

while result is None:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

print(result)


dcd90a12-efd1-408f-a773-a95af640b367
frames_quantity=100 frames=[FrameEmbeddingResponseModel(frame_id='700bb570f3ad413dafc814ceceb6dc09', embeddings_quantity=1, embeddings=['2af55faaeb4d4bf0a585c563f0c4ca3f']), FrameEmbeddingResponseModel(frame_id='0070c97e06b24b7e928dcbbfd40dc52e', embeddings_quantity=1, embeddings=['711e26ff6e324e2f910ebbe688b621f4']), FrameEmbeddingResponseModel(frame_id='c3c330860131483993581d926669572a', embeddings_quantity=1, embeddings=['208b8e0af80b4f0e93bc1ae3df41ad7c']), FrameEmbeddingResponseModel(frame_id='5ec279ab4e9c424cb879fd4c02974a4c', embeddings_quantity=1, embeddings=['59e15729b4c14ce4b8cb03505eb22203']), FrameEmbeddingResponseModel(frame_id='2fefd6c0ce284665b4ed407db2bb4b39', embeddings_quantity=1, embeddings=['dc475c994f914c059f48dcf05d252fc5']), FrameEmbeddingResponseModel(frame_id='b008c69702ec44c487e25c88c241d3b3', embeddings_quantity=1, embeddings=['1424e8b62abd422687598805f12fb40e']), FrameEmbeddingResponseModel(frame_id='67c48b279151487bafc6f

### Method combination

In [24]:
import copy
import json
import math
import random
import uuid
from pprint import pprint
from time import sleep

from cval_lib.connection import CVALConnection
from cval_lib.models.detection import DetectionSamplingOnPremise
from cval_lib.models.embedding import FrameEmbeddingModel, EmbeddingModel

# :note: score generator

def get_scores(num_images: int, num_bboxes: int, categ_sz: int):
    _predictions = []
    for i in range(num_images):
        scr = []
        image_name = str(uuid.uuid4().hex)
        for boxs in range(num_bboxes):
            _id = uuid.uuid4().hex
            scr.append(
                {
                    'embedding_id': uuid.uuid4().hex,
                    "score": math.cos(random.random()),
                    "category_id": random.randint(0, categ_sz),
                },
            )
        _predictions.append(
            {
                "frame_id": image_name,
                "predictions": scr,
            },
        )
    return _predictions

# :note: embeddings generator for images

def get_embeddings(_frames: list[str], _predictions, emb_sz=500):
    _embeddings = []
    for i in _frames:
        for j in _predictions:
            if i == j.get('frame_id'):
                emb = [
                    EmbeddingModel(**{
                        "embedding_id": k.get('embedding_id'),
                        "embedding": list(map(lambda x: random.random(), range(emb_sz))),
                    }) for k in j.get('predictions')
                ]

                _embeddings.append(
                    FrameEmbeddingModel(**{
                        "frame_id": j.get('frame_id'),
                        "embeddings": emb,
                    }),
                )
    return _embeddings

# :note: 500 detections with 14 scores, 2 categories

predictions = get_scores(500, 14, 1)

ds_id = cval.dataset().create(name='asd', description='1a2')

task_id = cval.detection().on_premise_sampling(
    DetectionSamplingOnPremise(
        num_of_samples=20,
        dataset_id=ds_id,
        selection_strategy='entropy',
        sort_strategy='ascending',
        bbox_selection_policy='sum',
        frames=predictions,
    )
).task_id

result = None
sleep_sec = 1

# :note: start polling for first sample

while result is None:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

pprint(result)

# :note: upload embedding for first sample

cval.embedding(dataset_id=ds_id, part_of_dataset='training').upload_many(get_embeddings(result, predictions))

# :note: start polling for second sample

task_id = cval.detection().on_premise_sampling(
    DetectionSamplingOnPremise(
        num_of_samples=2,
        # :note: requires: len(first_sample) > len(second_sample)
        dataset_id=ds_id,
        selection_strategy='clustering',
        frames=list(filter(lambda x: x.get('frame_id') in result, predictions)),
    )
).task_id


result = None
sleep_sec = 1

while result is None:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

pprint(result)

Polling... 1
['342b4f7c36c548ee9b6b3fe51803c56a',
 '60983453e6e2457c9ac64edf3675c66c',
 '32d2a9ae05b0498eadf106f4943a2a53',
 '5e2f87d8caaf4b198eee0b34eb723b58',
 'b0804a8eff1d4bf1ac5f4881bc62a78c',
 'bb153859c0474f4ebda65351fa0a9f89',
 'b481ede0b50d4a61b7ba4e13445fbd31',
 'e5c50009fee245ebaad02ad67da61ab6',
 'f80551f614ae47b69735830a4bcd1a51',
 'bfb14e3c0a434f4d936c0bf2a7dd7490',
 '8a73d85b69ab40e699a17760b3e8944e',
 '0bceaa6c9e054a13a6a111fd57949196',
 '36bb6bffb41545f699ecfb859ed2eaf4',
 '6ecb29071cd14c24905484b8cf62bfd6',
 '745ea57a905d411cae9552a02a3c21aa',
 '5d2b69540065495999916bf3478da18b',
 '82009fa9da9c4b4b9a9d0072a100080d',
 'b5623e4bfa2543be80b1b0fc3a359401',
 '07377213c0f14d63af7b5e1dcd0873c7',
 'e57deb74171146e19dc7b119c6e00718']
Polling... 1
Polling... 2
['bfb14e3c0a434f4d936c0bf2a7dd7490', 'e5c50009fee245ebaad02ad67da61ab6']
