# Inferencia de red ResNet para clasificación de signo a texto.

---
---

# Índice.

- [Configuración](#configuración)
  - [Configuración de la red](#configuración-de-la-red)
- [Elección del model a inferir](#elección-del-model-a-inferir)
  - [Carga del modelo](#carga-del-modelo)
- [Inferencia](#inferencia)
  - [Por webcam](#por-webcam)
  - [Por archivo](#desde-archivo)

## Configuración

---

In [1]:
from config.const import *
from config.torch_config import get_transform
from config.dataset import get_dataset_path
from lib.video_dataset import VideoFrameDataset


  from .autonotebook import tqdm as notebook_tqdm


### Configuración de la red


In [2]:
DATASETS = ["WLASL/videos", "actions/frames"]
MODELS_NAME = ["WLASL_9", "actions_small"]

index = 0


In [3]:
data_path, model_path = get_dataset_path(dataset=DATASETS[index], model_name=MODELS_NAME[index])
multiple_transform = get_transform(IMAGE_SIZE)


In [4]:
dataset = VideoFrameDataset(
    root_path=data_path,
    transform=multiple_transform,
    num_segments=NUM_SEGMENTS,
    frames_per_segment=FRAMES_PER_SEGMENT,
    image_size=IMAGE_SIZE,
)

classes = dataset.classes


## Elección del modelo a inferir

---

### Carga del modelo

In [5]:
from torch import load

In [6]:
model = load(model_path)


### Carga del modelo onnx

In [7]:
import onnx

In [8]:
onnx_path = model_path.replace(".pth", ".onnx")
onnx_model = onnx.load(onnx_path)


In [11]:
import onnxruntime as ort

In [13]:
ort_session = ort.InferenceSession(
    onnx_path,
    providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
)


## Inferencia

---

In [14]:
import sys

sys.path.append("../")

from common.inference import video_webcam_inference


### Con ONNX session

In [17]:
import numpy as np

outputs = ort_session.run(
    None,
    {
        "input": np.random.randn(
            1, FRAMES_PER_SEGMENT * NUM_SEGMENTS, 3, IMAGE_SIZE, IMAGE_SIZE
        ).astype(np.float32)
    },
)


print(classes[outputs[0][0].argmax(0)])


all


### Ejemplo de video con onnx

### Por webcam con PyTorch

In [None]:
video_webcam_inference(
    model,
    classes,
    "cuda",
    multiple_transform,
    fps_interval=NUM_SEGMENTS * FRAMES_PER_SEGMENT,
)
