In [13]:
import requests
import numpy as np
from PIL import Image
import io
import base64

# Function to encode an image for sending via HTTP
def encode_image(image):
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    return buffered.getvalue()  # Return bytes directly

# Function to generate a random image or load from a file
def get_image(image_path=None):
    if image_path:
        image = Image.open(image_path)
    else:
        image = Image.fromarray(np.uint8(np.random.rand(224, 224, 3) * 255))
    return image

# Define the endpoints
endpoints = {
    "aves": "http://localhost:8086/predictions/Mb1A2",
    "insecta": "http://localhost:8086/predictions/Mi1A2",
    "reptile": "http://localhost:8086/predictions/Mr1A2",
    "plant": "http://localhost:8086/predictions/Mp1A2"
}



In [14]:
def convert_cat_idx_to_taxonID(results, taxon_map):
    converted_results = {}
    for task, task_data in results.items():
        pred_label = task_data['pred_label']
        taxon_id = taxon_map[task].get(str(pred_label), "Unknown")
        converted_task_data = task_data.copy()
        converted_task_data['taxonID'] = taxon_id
        converted_results[task] = converted_task_data
    return converted_results

In [15]:
import json

# Load taxon maps from JSON files
def load_taxon_maps():
    taxon_maps = {
        "reptile": json.load(open("/home/caleb/repo/polliOS-core/assets/taxa/maps/S2.E24.1.5.reptilia.json")),
        "aves": json.load(open("/home/caleb/repo/polliOS-core/assets/taxa/maps/S2.E24.1.4.aves.json")),
        "insecta": json.load(open("/home/caleb/repo/polliOS-core/assets/taxa/maps/S2.E24.1.4.arthro.json")),
        "plant": json.load(open("/home/caleb/repo/polliOS-core/assets/taxa/maps/S2.E24.4.3.angio.json"))
    }
    return taxon_maps

taxon_maps = load_taxon_maps()

Return: {{{<str(task_key_0)>: [float scores for all categories in task]}, 'pred_label': <cat_idx (int)>, 'pred_score': <float>}, {str(task_key_1): [float scores for all categories in task]}, 'pred_label': <cat_idx (int)>, 'pred_score': <float>}...}
Where pred_label is the highest-scoring cat_idx. All other cat_idx can be inferred from scores list.
All models require conversion from cat_idx to taxon_id.

# Convert to taxonIDs
Load from taxon map jsons, who have strucutre like:
```
{"L10": {"0": "73792", "1": "39807", "2": "28398", "3": "36189", "4": "539039", "5": "40092", "6": "36307",...},{"L20": {"0": "345", "1": "6435", "2": "243153",...},...}
# {<"task_key_0">: {"cat_idx[0]": "taxon_id", "cat_idx[1]": "taxon_id", "cat_idx[2]": "taxon_id",...}}
```

In [16]:
def convert_cat_idx_to_taxonID(results, taxon_map):
    converted_results = {}
    for task, task_data in results.items():
        pred_label = task_data['pred_label']
        taxon_id = taxon_map[task].get(str(pred_label), "Unknown")
        converted_task_data = task_data.copy()
        converted_task_data['taxonID'] = taxon_id
        converted_results[task] = converted_task_data
    return converted_results

def convert_scores_to_taxonID_sorted(results, taxon_map, min_score=None, top_k=None):
    converted_results = {}
    for task, task_data in results.items():
        score_list = task_data['pred_scores']
        taxon_scores = {}
        for idx, score in enumerate(score_list):
            if min_score is not None and score < min_score:
                continue
            taxon_id = taxon_map[task].get(str(idx), "Unknown")
            taxon_scores[taxon_id] = score
        
        # Sort by score in descending order
        sorted_taxon_scores = dict(sorted(taxon_scores.items(), key=lambda item: item[1], reverse=True))
        
        # If top_k is specified, slice the dictionary to keep only top_k elements
        if top_k is not None:
            sorted_taxon_scores = dict(list(sorted_taxon_scores.items())[:top_k])
        
        converted_results[task] = sorted_taxon_scores
    return converted_results

In [17]:
# Load or generate an image
image = get_image()  # Pass a path to load from a file, or leave empty to generate

# Encode the image
encoded_image = encode_image(image)

# Dictionary to store results
results = {}

# Send requests to each endpoint
for model_name, url in endpoints.items():
    headers = {'Content-Type': 'application/octet-stream'}
    response = requests.post(url, data=encoded_image, headers=headers)
    results[model_name] = response.json()

# Print the results
for model, result in results.items():
    print(f"Results for {model}: {result}")

Results for aves: {'L10': {'pred_scores': [0.0005501395789906383, 0.0005462570698000491, 0.0004563193069770932, 0.0012520987074822187, 0.0006427286425605416, 0.0006430401117540896, 0.0005530781345441937, 0.009178576059639454, 0.0006280964589677751, 0.00021246586402412504, 0.0006578544853255153, 0.0011499135289341211, 0.0018389563774690032, 0.0002064660075120628, 0.00016931738355197012, 0.002526964293792844, 0.0005127104232087731, 0.00043287166045047343, 0.002883902983739972, 0.0007910248823463917, 0.00045638129813596606, 0.0016707998001948, 0.0001870145497377962, 0.0004530157020781189, 0.00011199833534192294, 0.00038657578988932073, 0.0003485528286546469, 0.0006746007711626589, 0.00029642562731169164, 0.0007058262708596885, 0.0013825715286657214, 0.0015443017473444343, 0.0006893696263432503, 0.000430368963861838, 5.952530773356557e-05, 0.0013253754004836082, 0.00031386554474011064, 0.0010525314137339592, 0.0004555826890282333, 0.00047323142644017935, 0.0003161248750984669, 0.0007424990

In [18]:
# Assuming 'results' is the dictionary containing the model results
model_taxon_maps = {
    "aves": taxon_maps["aves"],
    "insecta": taxon_maps["insecta"],
    "reptile": taxon_maps["reptile"],
    "plant": taxon_maps["plant"]
}

converted_results_0 = {}
for model_name, model_results in results.items():
    converted_results_0[model_name] = convert_cat_idx_to_taxonID(model_results, model_taxon_maps[model_name])

# Print the converted results
for model, result in converted_results_0.items():
    print(f"Converted Results (0) for {model}: {result}")
    
converted_results_1 = {}
for model_name, model_results in results.items():
    converted_results_1[model_name] = convert_scores_to_taxonID_sorted(model_results, model_taxon_maps[model_name], top_k=2)

# Print the converted results
for model, result in converted_results_1.items():
    print(f"Converted Results (1) for {model}: {result}")

Converted Results (0) for aves: {'L10': {'pred_scores': [0.0005501395789906383, 0.0005462570698000491, 0.0004563193069770932, 0.0012520987074822187, 0.0006427286425605416, 0.0006430401117540896, 0.0005530781345441937, 0.009178576059639454, 0.0006280964589677751, 0.00021246586402412504, 0.0006578544853255153, 0.0011499135289341211, 0.0018389563774690032, 0.0002064660075120628, 0.00016931738355197012, 0.002526964293792844, 0.0005127104232087731, 0.00043287166045047343, 0.002883902983739972, 0.0007910248823463917, 0.00045638129813596606, 0.0016707998001948, 0.0001870145497377962, 0.0004530157020781189, 0.00011199833534192294, 0.00038657578988932073, 0.0003485528286546469, 0.0006746007711626589, 0.00029642562731169164, 0.0007058262708596885, 0.0013825715286657214, 0.0015443017473444343, 0.0006893696263432503, 0.000430368963861838, 5.952530773356557e-05, 0.0013253754004836082, 0.00031386554474011064, 0.0010525314137339592, 0.0004555826890282333, 0.00047323142644017935, 0.0003161248750984669

Here's a structured documentation for the inference interface, the mapping JSONs, and the results conversion methods:

```markdown
## Inference Interface Documentation

### 1. Inference Interface
The inference interface accepts images for classification. The image should be encoded as a JPEG byte stream before submission. This byte stream is sent directly to the TorchServe endpoints via HTTP POST requests with the content type set to `application/octet-stream`.

#### Input:
- **Image**: JPEG byte stream of the image.

#### Output:
- **Results**: The response from the server is a JSON object where each key represents a task (e.g., 'L10', 'L20') and contains:
  - `pred_scores`: A list of floating-point scores for all categories in the task.
  - `pred_label`: The category index (integer) of the highest scoring category.
  - `pred_score`: The score (float) of the highest scoring category.

### 2. Structure of Input Mapping JSONs
The taxon mapping JSONs are structured per task, where each task key maps to another dictionary. This inner dictionary maps category indices (`cat_idx`) as strings to taxon IDs (`taxonID`) as strings.

#### Example Structure:
```json
{
  "L10": {
    "0": "73792",
    "1": "39807",
    ...
  },
  "L20": {
    "0": "345",
    "1": "6435",
    ...
  }
}
```

### 3. Structure of Results After Conversion
Two types of conversions are applied to the raw results:

#### A. Conversion to Taxon IDs
This conversion replaces the `pred_label` with a `taxonID` using the mapping JSONs.

- **Method Name**: `convert_to_taxonID`
- **Output Structure**:
  ```json
  {
    "L10": {
      "pred_scores": [...],
      "pred_label": "73792",  // Converted to taxonID
      "pred_score": 0.0372,
      "taxonID": "73792"      // Added field
    },
    ...
  }
  ```

#### B. Conversion to Sorted TaxonID-Score Pairs
This conversion transforms the scores list into a dictionary of `taxonID: score` pairs, sorted by score in descending order. Optional parameters allow filtering by a minimum score or limiting to the top-k results.

- **Method Name**: `convert_to_sorted_taxonID_scores`
- **Output Structure**:
  ```json
  {
    "L10": {
      "73792": 0.0372,
      "39807": 0.0351,
      ...
    },
    ...
  }
  ```

### Conclusion
These documentation notes outline the process from submitting images for inference, understanding the structure of the mapping JSONs, to interpreting the converted results. This ensures clarity and consistency when integrating and utilizing these processes across different codebases.
```