<!-- TOP OF README ANCHOR -->
<a name="top"></a>
<!-- PROJECT LOGO -->
<br />
<div align="center">
  <p>
    <img src="https://github.com/fangorntreabeard/cval-lib/blob/main/images/logo.jpg?raw=true" alt="Cval logo" width="155" height="155">
  </p>
<h3 align="center">CVAL REST API LIBRARY</h3>
  <p align="center">
    A library designed to interact with the REST-API cval.ai
    <br/>
    <b>
      <a href="https://cval.ai">REST API docs</a>
      ·
      <a href="https://github.com/fangorntreabeard/cval-lib/issues">Report Bug</a>
    </b>
  </p>
</div>


# Getting started

To start using the CVAL Rest API, you need to **obtain** a **client/user API key**.
Once you have your API key, you can use it to authenticate your requests and interact with the CVAL Rest API endpoints.
Refer to our API documentation for detailed information on available endpoints, request formats, and response structures.


## Installation


In [None]:
!pip install cval-lib==0.0.2.41

# User guide

##### Set your user_api_key


In [None]:
from cval_lib.connection import CVALConnection

USER_API_KEY = '...'
cval = CVALConnection(USER_API_KEY)

### Dataset
 > Within the framework of the created system, datasets are spaces in which data for machine learning is stored.
 Creating a dataset is similar to creating a folder.


##### Create dataset

In [None]:
ds_id = cval.dataset().create(name='on-premise-scheme-ds', description='')

print(ds_id)

##### Update dataset

In [None]:
ds = cval.dataset()
print(ds.update(ds_id, description='any123 string data', name='sample name'))

# :NOTE: the dataset can store the state (ds_id)

##### Get dataset


In [None]:
print(ds.get())
print(*cval.dataset().get_all(0))

### :note:
> A further example of using the library concerns embedding. Since embedding is a large data object and the method of its creation is completely defined by the user, the embedding method works through query schemes (models).


### Embeddings
> Embeddings are vector representations of images obtained using pytorch or any other library


##### Create embeddings

In [None]:
from random import random
import uuid
from cval_lib.connection import CVALConnection
from cval_lib.models.embedding import EmbeddingModel, FrameEmbeddingModel

embeddings = list(
        map(
            lambda x: FrameEmbeddingModel(
                frame_id=uuid.uuid4().hex,
                embeddings=[
                    EmbeddingModel(embedding_id=uuid.uuid4().hex, embedding=list(map(lambda x: random(), range(500))))]
            ).dict(),
            range(100)
        )
    )


print(embeddings)


##### Upload & check embeddings


In [None]:
emb = cval.embedding(ds_id, 'training')
emb.upload_many(embeddings)
print(emb.get_many())
print(emb.get_meta())

### :note:
> The following example is used to invoke active learning

### Active learning

##### Get predictions data


In [None]:
from random import random
import uuid
from cval_lib.models.detection import BBoxScores, FramePrediction

# :NOTE: example only
frames_predictions = list(
      map(
          lambda x: FramePrediction(
              frame_id=str(uuid.uuid4().hex),
              predictions=list(
                  map(lambda x: BBoxScores(category_id=1, score=random()), range(10)))
          ),
          range(100)
      )
)

##### Construct config

In [None]:
from cval_lib.models.detection import DetectionSamplingOnPremise

request = DetectionSamplingOnPremise(
 num_of_samples=200,
 bbox_selection_policy='min',
 selection_strategy='margin',
 sort_strategy='ascending',
 frames=frames_predictions,
)

##### Run active learning

In [None]:
emb = cval.detection()
print(emb.on_premise_sampling(request))

###:note:
> The following method is most relevant when we are dealing with long-term tasks and, accordingly, with asynchronous interaction.

### Polling
> refers to actively sampling the status of an external device by a client program as a synchronous activity.


In [None]:
import uuid
from random import random, randint
from time import sleep

frames_predictions = list(
        map(
            lambda x: FramePrediction(
                frame_id=str(uuid.uuid4().hex),
                predictions=list(map(lambda _: BBoxScores(category_id=randint(0, 3), score=random()), range(20)))
            ),
            range(10000)
        )
    )

request = DetectionSamplingOnPremise(
        num_of_samples=200,
        bbox_selection_policy='min',
        selection_strategy='margin',
        sort_strategy='ascending',
        frames=frames_predictions,
    )

emb = cval.detection()
print(emb.on_premise_sampling(request))

result = None
sleep_sec = 1
while result is None or type(result) is dict:
    result = emb.result.get().result
    print(f'Polling... {sleep_sec} s')
    sleep(sleep_sec)
    sleep_sec *= 2
print(result)


### Embedding usage

#### Generate embeddings

In [None]:
from cval_lib.models.embedding import FrameEmbeddingModel, EmbeddingModel
from cval_lib.models.detection import DetectionSamplingOnPremise

def get_frames(num_images: int, num_bboxes: int, emb_sz: int, categ_sz: int):
    _predictions = []
    _embeddings = []
    for i in range(num_images):
        emb = []
        scr = []
        image_name = str(uuid.uuid4().hex)
        for boxs in range(num_bboxes):
            _id = uuid.uuid4().hex
            scr.append(
                {
                    "embedding_id": _id,
                    "score": math.cos(random.random()),
                    "category_id": random.randint(0, categ_sz),
                },
            )
            emb.append(
                EmbeddingModel(**{
                    "embedding_id": _id,
                    "embedding": list(map(lambda x: random.random(), range(emb_sz))),
                })
            )

        _embeddings.append(
            FrameEmbeddingModel(**{
                "frame_id": image_name,
                "embeddings": emb,
            }),
        )
        _predictions.append(
            {
                "frame_id": image_name,
                "predictions": scr,
            },
        )
    return _embeddings, _predictions


embeddings, predictions = get_frames(1000, 1, 500, 1)

ds_id = cval.dataset().create(name='asd', description='1a2')
print(ds_id)
print(cval.embedding(dataset_id=ds_id, part_of_dataset='training').upload_many(embeddings))

#### Clustering

In [None]:
import math
import random
import uuid
from time import sleep

# :note: Generate embeddings and scores, for example -- there is random generator:


# :note: create config and get task_id

task_id = cval.detection().on_premise_sampling(
    DetectionSamplingOnPremise(
        num_of_samples=20,
        dataset_id=ds_id,
        selection_strategy='clustering',
        frames=predictions,
        sort_strategy='ascending',
        bbox_selection_policy='sum',
    )
).task_id

result = None
sleep_sec = 1

# :note: start long-polling
while result is None or type(result) is dict:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

print(result)


In [None]:
import math
import random
import uuid
from time import sleep

# :note: create config and get task_id

task_id = cval.detection().on_premise_sampling(
    DetectionSamplingOnPremise(
        num_of_samples=20,
        dataset_id=ds_id,
        selection_strategy='hierarchical',
        frames=predictions,
        sort_strategy='ascending',
        bbox_selection_policy='sum',
    )
).task_id

result = None
sleep_sec = 1

# :note: start long-polling
while result is None or type(result) is dict:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

print(result)


## Entropy. Probabilities usage

In [None]:
import random
import uuid

from cval_lib.connection import CVALConnection
from cval_lib.models.detection import (
    DetectionSamplingOnPremise,
    FramePrediction,
    BBoxScores,
)


# :NOTE: example only -- random list generator
def generate_random_array(sz):
    random_numbers = tuple(map(lambda x: random.random(), range(sz)))
    normalized_numbers = [i/sum(random_numbers) for i in random_numbers]
    return normalized_numbers


nc = 10
prediction_per_frame = 100
frames = 100


req = DetectionSamplingOnPremise(
    bbox_selection_policy='max',
    sort_strategy='ascending',
    selection_strategy='entropy',
    probs_weights=list(generate_random_array(nc)),
    # Determines the significance (weight) of the prediction probability for each class.
    # The order in the list corresponds to the order of the classes.
    num_of_samples=10,
    frames=[
        FramePrediction(
            frame_id=uuid.uuid4().hex,
            predictions=[
                BBoxScores(
                    probabilities=list(generate_random_array(nc)),
                    # The probabilities for each object category are relative to a predicted bounding box
                    # The order in the list is determined by the category number. sum must be equal to 1
                )
                for _ in range(prediction_per_frame)
            ]
        )
        for _ in range(frames)
    ]
)

# also we can use "with construction"
with CVALConnection(USER_API_KEY) as cval:
    task_id = cval.detection().on_premise_sampling(req).task_id

while result is None or type(result) is dict:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

print(result)

### Method combination

In [None]:
import copy
import json
import math
import random
import uuid
from pprint import pprint
from time import sleep

from cval_lib.connection import CVALConnection
from cval_lib.models.detection import DetectionSamplingOnPremise
from cval_lib.models.embedding import FrameEmbeddingModel, EmbeddingModel

# :note: score generator

def get_scores(num_images: int, num_bboxes: int, categ_sz: int):
    _predictions = []
    for i in range(num_images):
        scr = []
        image_name = str(uuid.uuid4().hex)
        for boxs in range(num_bboxes):
            _id = uuid.uuid4().hex
            scr.append(
                {
                    'embedding_id': uuid.uuid4().hex,
                    "score": math.cos(random.random()),
                    "category_id": random.randint(0, categ_sz),
                },
            )
        _predictions.append(
            {
                "frame_id": image_name,
                "predictions": scr,
            },
        )
    return _predictions

# :note: embeddings generator for images

def get_embeddings(_frames: list[str], _predictions, emb_sz=500):
    _embeddings = []
    for i in _frames:
        for j in _predictions:
            if i == j.get('frame_id'):
                emb = [
                    EmbeddingModel(**{
                        "embedding_id": k.get('embedding_id'),
                        "embedding": list(map(lambda x: random.random(), range(emb_sz))),
                    }) for k in j.get('predictions')
                ]

                _embeddings.append(
                    FrameEmbeddingModel(**{
                        "frame_id": j.get('frame_id'),
                        "embeddings": emb,
                    }),
                )
    return _embeddings

# :note: 500 detections with 14 scores, 2 categories

predictions = get_scores(500, 14, 1)

ds_id = cval.dataset().create(name='asd', description='1a2')

task_id = cval.detection().on_premise_sampling(
    DetectionSamplingOnPremise(
        num_of_samples=20,
        dataset_id=ds_id,
        selection_strategy='entropy',
        sort_strategy='ascending',
        bbox_selection_policy='sum',
        frames=predictions,
    )
).task_id

result = None
sleep_sec = 1

# :note: start polling for first sample

while result is None or type(result) is dict:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

pprint(result)
print(task_id)
# :note: upload embedding for first sample

cval.embedding(dataset_id=ds_id, part_of_dataset='training').upload_many(get_embeddings(result, predictions))

# :note: start polling for second sample

task_id = cval.detection().on_premise_sampling(
    DetectionSamplingOnPremise(
        num_of_samples=2,
        # :note: requires: len(first_sample) > len(second_sample)
        dataset_id=ds_id,
        bbox_selection_policy='mean',
        sort_strategy='ascending',
        selection_strategy='clustering',
        frames=list(filter(lambda x: x.get('frame_id') in result, predictions)),
    )
).task_id


result = None
sleep_sec = 1

while result is None or type(result) is dict:
    result = cval.result().get(task_id).result
    print(f'Polling... {sleep_sec}')
    sleep(sleep_sec)
    sleep_sec *= 2

pprint(result)