## Conver CLIP model to ONNX

In [1]:
!pip install git+https://github.com/Lednik7/CLIP-ONNX.git
!pip install git+https://github.com/openai/CLIP.git
!pip install onnxruntime-gpu

Collecting git+https://github.com/Lednik7/CLIP-ONNX.git
  Cloning https://github.com/Lednik7/CLIP-ONNX.git to /tmp/pip-req-build-5dg715yz
  Running command git clone --filter=blob:none --quiet https://github.com/Lednik7/CLIP-ONNX.git /tmp/pip-req-build-5dg715yz
  Resolved https://github.com/Lednik7/CLIP-ONNX.git to commit ebd4852b7d3ebf116709abf33b26832acaba947b
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch==1.13.1 (from clip_onnx==1.2)
  Downloading torch-1.13.1-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)
Collecting onnxruntime>=1.11.1 (from clip_onnx==1.2)
  Downloading onnxruntime-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting onnx>=1.11.0 (from clip_onnx==1.2)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==1.13.1->clip_onnx==1.2)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylin

In [3]:
import torch
import numpy as np
import cv2
import clip

In [7]:
from clip_onnx import clip_onnx
from clip_onnx.utils import DEFAULT_EXPORT

visual_path = "clip_visual_model.onnx"

DEFAULT_EXPORT["opset_version"] = 15
textual_export_params = DEFAULT_EXPORT.copy()
textual_export_params["dynamic_axes"] = {'input': {1: 'batch_size'},
                                         'output': {0: 'batch_size'}}


In [9]:
!wget -c -O CLIP.png https://github.com/openai/CLIP/blob/main/CLIP.png?raw=true

--2025-02-20 19:22:17--  https://github.com/openai/CLIP/blob/main/CLIP.png?raw=true
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/openai/CLIP/raw/refs/heads/main/CLIP.png [following]
--2025-02-20 19:22:17--  https://github.com/openai/CLIP/raw/refs/heads/main/CLIP.png
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/openai/CLIP/refs/heads/main/CLIP.png [following]
--2025-02-20 19:22:17--  https://raw.githubusercontent.com/openai/CLIP/refs/heads/main/CLIP.png
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 252444 (247K)

In [12]:
model, preprocess = clip.load("ViT-B/32", device="cpu")

100%|████████████████████████████████████████| 338M/338M [00:03<00:00, 101MiB/s]


In [15]:
image = preprocess(Image.open("CLIP.png")).unsqueeze(0).cpu() # [1, 3, 224, 224]

In [None]:
image_onnx = image.detach().cpu().numpy().astype(np.float32)

In [16]:
onnx_model = clip_onnx(model.cpu(), visual_path=visual_path)
onnx_model.convert2onnx(image, verbose=True)

[CLIP ONNX] Convert only visual model
[CLIP ONNX] Start convert visual model
[CLIP ONNX] Start check visual model
[CLIP ONNX] Models converts successfully


## Create text classes embedding base

In [None]:
onnx_model.start_sessions(providers=["CPUExecutionProvider"]) # cpu mode

In [None]:
# Define general product categories
categories = ["Person", "Group of People", "Clothing", "Electronic device", "Furniture", 'Animals', 'Landscape', 'City', 'Plants', 'Vehicle']

In [None]:
text_inputs = clip.tokenize(categories)
with torch.no_grad():
    text_features = model.encode_text(text_inputs)

In [None]:
import pandas as pd

In [None]:
# Create a Pandas DataFrame from the NumPy array
df_text_emb = pd.DataFrame(text_features.numpy())

In [None]:
df_text_emb['category'] = categories

In [None]:
df_text_emb.to_csv('text_emb.csv', index=False)

## Create images embedding base (ViT)

In [None]:
MEAN = np.array([0.48145466, 0.4578275, 0.40821073]).reshape(-1,1,1)
STD = np.array([0.26862954, 0.26130258, 0.27577711]).reshape(-1,1,1)


def preprocess_crop(img):
  h, w = img.shape[:2]
  r = 224 / min(h, w)
  img = cv2.resize(img, (None, None),fx=r, fy=r, interpolation=cv2.INTER_CUBIC)
  h, w = img.shape[:2]
  img = img[h//2 - 112 : h//2 + 112, w//2 - 112 : w//2 + 112]
  #img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img = img / 255.0
  img = np.transpose(img, (2, 0, 1))
  img = (img - MEAN) / STD
  return img.astype(np.float32)

In [None]:
def preprocess_pad(image_path, target_size=(224, 224)):
    """
    Preprocess an image for CLIP using only NumPy and PyTorch.
    - Loads image with OpenCV (BGR → RGB)
    - Resizes with aspect ratio preservation and padding
    - Converts to float32, normalizes, and formats for PyTorch
    """
    # Load image using OpenCV and convert BGR to RGB
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Resize while maintaining aspect ratio (adding padding)
    h, w, _ = image.shape
    scale = min(target_size[0] / h, target_size[1] / w)
    new_w, new_h = int(w * scale), int(h * scale)
    image_resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)

    # Calculate padding to center the image
    pad_w, pad_h = target_size[1] - new_w, target_size[0] - new_h
    top, bottom = pad_h // 2, pad_h - (pad_h // 2)
    left, right = pad_w // 2, pad_w - (pad_w // 2)

    # Apply padding with a black background
    image_padded = cv2.copyMakeBorder(image_resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))

    # Convert to float32 and normalize
    image_tensor = image_padded / 255.0  # Scale to [0, 1]
    # Rearrange dimensions: (H, W, C) → (C, H, W)
    image_tensor = np.transpose(image_tensor, (2, 0, 1))
    #image_tensor = image_tensor.permute(2, 0, 1)#.unsqueeze(0)  # Add batch dimension
    image_tensor = (image_tensor - MEAN) / STD  # Normalize


    return image_tensor.astype(np.float32)


In [1]:
!gdown  1Alo1T28gw_qv-6n406iTa2rOE5OleeXG

Downloading...
From (original): https://drive.google.com/uc?id=1Alo1T28gw_qv-6n406iTa2rOE5OleeXG
From (redirected): https://drive.google.com/uc?id=1Alo1T28gw_qv-6n406iTa2rOE5OleeXG&confirm=t&uuid=d365a45c-f2fa-43cd-867d-9740cc3ca47e
To: /content/test_task_data.zip
100% 2.14G/2.14G [00:20<00:00, 103MB/s] 


In [2]:
!unzip /content/test_task_data.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: test_task_data/c83c37c39cd803e7.jpg  
  inflating: test_task_data/b9d55516466d1669.jpg  
  inflating: test_task_data/c9493033bf27560f.jpg  
  inflating: test_task_data/a59916cd17a64cb6.jpg  
  inflating: test_task_data/c3bca9f6e9522843.jpg  
  inflating: test_task_data/b24c9f75e8181a8f.jpg  
  inflating: test_task_data/dedc9999cc0659a2.jpg  
  inflating: test_task_data/844e7fb1d06e4f24.jpg  
  inflating: test_task_data/80accf33a6599b8e.jpg  
  inflating: test_task_data/8f8761722c8db672.jpg  
  inflating: test_task_data/9c18a0e7dbcc6d70.jpg  
  inflating: test_task_data/9e4741693ea6a371.jpg  
  inflating: test_task_data/e6e21ebbb914032e.jpg  
  inflating: test_task_data/8ce22f9f72a94615.jpg  
  inflating: test_task_data/b430838b0e3cfdd9.jpg  
  inflating: test_task_data/98c5671a15c77873.jpg  
  inflating: test_task_data/98b9a9a6ada3a6a1.jpg  
  inflating: test_task_data/9f48ad42b7520bad.jpg  
  inflating: test

### for crop preproccesing

In [None]:
import os

In [None]:
with open("image_emb_crop_ViT.csv", "w") as myfile:
    myfile.write('filename,' + ','.join([str(i) for i in range(512)]) + ', category\n')

In [None]:
for i, file in enumerate(os.listdir('test_task_data')):
    if not i%20:
        print(i, '/ 8000')

    image_onnx_input = np.expand_dims(preprocess_crop(cv2.imread(f'test_task_data/{file}')), 0)
    image_features = torch.from_numpy(onnx_model.visual_run(image_onnx_input))

    similarities = torch.nn.functional.cosine_similarity(image_features, text_features)
    # Assign the highest similarity category
    best_category = categories[torch.argmax(similarities).item()]

    with open("image_emb_crop_ViT.csv", "a") as myfile:
        myfile.write(file +',' + ','.join([str(i) for i in image_features[0].numpy()]) + f', {best_category}\n')

0 / 8000
20 / 8000
40 / 8000
60 / 8000
80 / 8000
100 / 8000
120 / 8000
140 / 8000
160 / 8000
180 / 8000
200 / 8000
220 / 8000
240 / 8000
260 / 8000
280 / 8000
300 / 8000
320 / 8000
340 / 8000
360 / 8000
380 / 8000
400 / 8000
420 / 8000
440 / 8000
460 / 8000
480 / 8000
500 / 8000
520 / 8000
540 / 8000
560 / 8000
580 / 8000
600 / 8000
620 / 8000
640 / 8000
660 / 8000
680 / 8000
700 / 8000
720 / 8000
740 / 8000
760 / 8000
780 / 8000
800 / 8000
820 / 8000
840 / 8000
860 / 8000
880 / 8000
900 / 8000
920 / 8000
940 / 8000
960 / 8000
980 / 8000
1000 / 8000
1020 / 8000
1040 / 8000
1060 / 8000
1080 / 8000
1100 / 8000
1120 / 8000
1140 / 8000
1160 / 8000
1180 / 8000
1200 / 8000
1220 / 8000
1240 / 8000
1260 / 8000
1280 / 8000
1300 / 8000
1320 / 8000
1340 / 8000
1360 / 8000
1380 / 8000
1400 / 8000
1420 / 8000
1440 / 8000
1460 / 8000
1480 / 8000
1500 / 8000
1520 / 8000
1540 / 8000
1560 / 8000
1580 / 8000
1600 / 8000
1620 / 8000
1640 / 8000
1660 / 8000
1680 / 8000
1700 / 8000
1720 / 8000
1740 / 8000


### for padding preproccesing

In [None]:
with open("image_emb_pad_ViT.csv", "w") as myfile:
    myfile.write('filename,' + ', '.join([str(i) for i in range(512)]) + ', category\n')

In [None]:
for i, file in enumerate(os.listdir('test_task_data')):
    if not i%20:
        print(i, '/ 8000')

    image_onnx_input = np.expand_dims(preprocess_pad(f'test_task_data/{file}'), 0)
    image_features = torch.from_numpy(onnx_model.visual_run(image_onnx_input))

    similarities = torch.nn.functional.cosine_similarity(image_features, text_features)
    # Assign the highest similarity category
    best_category = categories[torch.argmax(similarities).item()]

    with open("image_emb_pad_ViT.csv", "a") as myfile:
        myfile.write(file +',' + ', '.join([str(i) for i in image_features[0].numpy()]) + f', {best_category}\n')

0 / 8000
20 / 8000
40 / 8000
60 / 8000
80 / 8000
100 / 8000
120 / 8000
140 / 8000
160 / 8000
180 / 8000
200 / 8000
220 / 8000
240 / 8000
260 / 8000
280 / 8000
300 / 8000
320 / 8000
340 / 8000
360 / 8000
380 / 8000
400 / 8000
420 / 8000
440 / 8000
460 / 8000
480 / 8000
500 / 8000
520 / 8000
540 / 8000
560 / 8000
580 / 8000
600 / 8000
620 / 8000
640 / 8000
660 / 8000
680 / 8000
700 / 8000
720 / 8000
740 / 8000
760 / 8000
780 / 8000
800 / 8000
820 / 8000
840 / 8000
860 / 8000
880 / 8000
900 / 8000
920 / 8000
940 / 8000
960 / 8000
980 / 8000
1000 / 8000
1020 / 8000
1040 / 8000
1060 / 8000
1080 / 8000
1100 / 8000
1120 / 8000
1140 / 8000
1160 / 8000
1180 / 8000
1200 / 8000
1220 / 8000
1240 / 8000
1260 / 8000
1280 / 8000
1300 / 8000
1320 / 8000
1340 / 8000
1360 / 8000
1380 / 8000
1400 / 8000
1420 / 8000
1440 / 8000
1460 / 8000
1480 / 8000
1500 / 8000
1520 / 8000
1540 / 8000
1560 / 8000
1580 / 8000
1600 / 8000
1620 / 8000
1640 / 8000
1660 / 8000
1680 / 8000
1700 / 8000
1720 / 8000
1740 / 8000


In [None]:
with open("image_names.csv", "w") as myfile:
    myfile.write('name\n')

In [None]:
for i, file in enumerate(os.listdir('test_task_data')):

    with open("image_names.csv", "a") as myfile:
        myfile.write(file + '\n')

In [None]:
df_pad = pd.read_csv('image_emb_pad.csv', delimiter=', ')
df_crop = pd.read_csv('image_emb_crop.csv', delimiter=', ')


  df_pad = pd.read_csv('image_emb_pad.csv', delimiter=', ')
  df_crop = pd.read_csv('image_emb_crop.csv', delimiter=', ')


## Convert padddle model to onnx

In [None]:
!pip install paddlepaddle #--upgrade -i https://mirror.baidu.com/pypi/simple

Collecting paddlepaddle
  Downloading paddlepaddle-2.6.2-cp311-cp311-manylinux1_x86_64.whl.metadata (8.6 kB)
Collecting astor (from paddlepaddle)
  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting opt-einsum==3.3.0 (from paddlepaddle)
  Downloading opt_einsum-3.3.0-py3-none-any.whl.metadata (6.5 kB)
Downloading paddlepaddle-2.6.2-cp311-cp311-manylinux1_x86_64.whl (126.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Installing collected packages: opt-einsum, astor, paddlepaddle
  Attempting uninstall: opt-einsum
    Found existing installation: opt_einsum 3.4.0
    Uninstalling opt_einsum-3.4.0:
      Successfully uninstalled opt_einsum-3.4.0
Successful

In [None]:
import paddle
paddle.utils.run_check()

Running verify PaddlePaddle program ... 
PaddlePaddle works well on 1 CPU.
PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now.


In [None]:
!wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/PP-ShiTuV2/general_PPLCNetV2_base_pretrained_v1.0_infer.tar && tar -xf general_PPLCNetV2_base_pretrained_v1.tar

--2025-02-20 09:36:09--  https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/PP-ShiTuV2/general_PPLCNetV2_base_pretrained_v1.0_infer.tar
Resolving paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)... 103.235.47.176, 2409:8c04:1001:1203:0:ff:b0bb:4f27
Connecting to paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)|103.235.47.176|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17103396 (16M) [application/x-tar]
Saving to: ‘general_PPLCNetV2_base_pretrained_v1.0_infer.tar’


2025-02-20 09:36:32 (809 KB/s) - ‘general_PPLCNetV2_base_pretrained_v1.0_infer.tar’ saved [17103396/17103396]

tar: general_PPLCNetV2_base_pretrained_v1.tar: Cannot open: No such file or directory
tar: Error is not recoverable: exiting now


In [None]:
!wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/PPShiTuV2/general_PPLCNetV2_base_pretrained_v1.0.pdparams

--2025-02-20 09:36:32--  https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/PPShiTuV2/general_PPLCNetV2_base_pretrained_v1.0.pdparams
Resolving paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)... 103.235.47.176, 2409:8c04:1001:1203:0:ff:b0bb:4f27
Connecting to paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)|103.235.47.176|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 413615732 (394M) [application/octet-stream]
Saving to: ‘general_PPLCNetV2_base_pretrained_v1.0.pdparams’


2025-02-20 09:37:23 (8.01 MB/s) - ‘general_PPLCNetV2_base_pretrained_v1.0.pdparams’ saved [413615732/413615732]



In [None]:
!tar -xvzf /content/general_PPLCNetV2_base_pretrained_v1.0_infer.tar

general_PPLCNetV2_base_pretrained_v1.0_infer/
general_PPLCNetV2_base_pretrained_v1.0_infer/inference.pdiparams
general_PPLCNetV2_base_pretrained_v1.0_infer/inference.pdiparams.info
general_PPLCNetV2_base_pretrained_v1.0_infer/inference.pdmodel


In [None]:
!pip install paddle2onnx onnxruntime onnxsim

Collecting paddle2onnx
  Downloading paddle2onnx-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting onnxsim
  Downloading onnxsim-0.4.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting onnx (from onnxsim)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading paddle2onnx-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownload

In [None]:
!paddle2onnx --model_dir=/content/general_PPLCNetV2_base_pretrained_v1.0_infer \
--model_filename=inference.pdmodel \
--params_filename=inference.pdiparams \
--save_file=/content/inference.onnx \
--opset_version=16 \
--enable_onnx_checker=True

[Paddle2ONNX] Start to parse PaddlePaddle model...
[Paddle2ONNX] Model file path: /content/general_PPLCNetV2_base_pretrained_v1.0_infer/inference.pdmodel
[Paddle2ONNX] Parameters file path: /content/general_PPLCNetV2_base_pretrained_v1.0_infer/inference.pdiparams
[Paddle2ONNX] Start to parsing Paddle model...
[Paddle2ONNX] Use opset_version = 16 for ONNX export.
[Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.


## Create images embedding base (Paddle)

In [None]:
import onnx
from onnxsim import simplify

In [None]:
# load your predefined ONNX model
model = onnx.load('/content/inference.onnx')

# convert model
model_simp, check = simplify(model)

if check:
  onnx.save(model_simp, '/content/paddle_features_simp_opp16.onnx')

In [None]:
import onnxruntime

In [None]:
onnx_model = onnxruntime.InferenceSession('/content/paddle_features_simp_opp16.onnx', providers=['CPUExecutionProvider'])

In [None]:
def apply_emb_model(image):
    """
    Apply model to image or images<br>
    """
    model_input = []
    if (not isinstance(image, list) or
        (isinstance(image, np.ndarray) and len(image.shape) == 3)):
        image = [image]
    for img in image:
        img_in = self.preprocess(img)
        img_in = np.ascontiguousarray(img_in)
        model_input.append(img_in)

    onnx_input_image = {onnx_model_input_name: model_input}
    output, = onnx_model.run(None, onnx_input_image)
    return output

In [None]:
onnx_model.disable_fallback()
onnx_model_input_name = onnx_model.get_inputs()[0].name
"""embedding model input names"""
onnx_model_output_name = onnx_model.get_outputs()[0].name
onnx_input_image = {onnx_model_input_name: [np.ascontiguousarray(np.ones((3, 224, 224), dtype=np.uint8))]}
output, = onnx_model.run(None, onnx_input_image)

In [None]:
onnx_input_image = {onnx_model_input_name: [np.ascontiguousarray(preprocess_crop(cv2.imread(f'test_task_data/{file}')))]}
output, = onnx_model.run(None, onnx_input_image)

### for crop preproccesing

In [None]:
with open("image_emb_crop_paddle.csv", "w") as myfile:
    myfile.write('filename, ' + ', '.join([str(i) for i in range(512)]) + '\n')

In [None]:
for i, file in enumerate(os.listdir('test_task_data')):
    if not i%20:
        print(i, '/ 8000')

    image_onnx_input = {onnx_model_input_name: [np.ascontiguousarray(preprocess_crop(cv2.imread(f'test_task_data/{file}')))]}
    image_features, = onnx_model.run(None, image_onnx_input)

    with open("image_emb_crop_paddle.csv", "a") as myfile:
        myfile.write(f'{file}, ' + ', '.join([str(j) for j in image_features[0]]) + '\n')

0 / 8000
20 / 8000
40 / 8000
60 / 8000
80 / 8000
100 / 8000
120 / 8000
140 / 8000
160 / 8000
180 / 8000
200 / 8000
220 / 8000
240 / 8000
260 / 8000
280 / 8000
300 / 8000
320 / 8000
340 / 8000
360 / 8000
380 / 8000
400 / 8000
420 / 8000
440 / 8000
460 / 8000
480 / 8000
500 / 8000
520 / 8000
540 / 8000
560 / 8000
580 / 8000
600 / 8000
620 / 8000
640 / 8000
660 / 8000
680 / 8000
700 / 8000
720 / 8000
740 / 8000
760 / 8000
780 / 8000
800 / 8000
820 / 8000
840 / 8000
860 / 8000
880 / 8000
900 / 8000
920 / 8000
940 / 8000
960 / 8000
980 / 8000
1000 / 8000
1020 / 8000
1040 / 8000
1060 / 8000
1080 / 8000
1100 / 8000
1120 / 8000
1140 / 8000
1160 / 8000
1180 / 8000
1200 / 8000
1220 / 8000
1240 / 8000
1260 / 8000
1280 / 8000
1300 / 8000
1320 / 8000
1340 / 8000
1360 / 8000
1380 / 8000
1400 / 8000
1420 / 8000
1440 / 8000
1460 / 8000
1480 / 8000
1500 / 8000
1520 / 8000
1540 / 8000
1560 / 8000
1580 / 8000
1600 / 8000
1620 / 8000
1640 / 8000
1660 / 8000
1680 / 8000
1700 / 8000
1720 / 8000
1740 / 8000


### for padding preproccesing

In [None]:
with open("image_emb_pad_paddle.csv", "w") as myfile:
    myfile.write('filename, ' + ', '.join([str(i) for i in range(512)]) + '\n')

In [None]:
for i, file in enumerate(os.listdir('test_task_data')):
    if not i%20:
        print(i, '/ 8000')

    image_onnx_input = {onnx_model_input_name: [np.ascontiguousarray(preprocess_pad(f'test_task_data/{file}'))]}
    image_features, = onnx_model.run(None, image_onnx_input)

    with open("image_emb_pad_paddle.csv", "a") as myfile:
        myfile.write(f'{file}, ' + ', '.join([str(j) for j in image_features[0]]) + '\n')

0 / 8000
20 / 8000
40 / 8000
60 / 8000
80 / 8000
100 / 8000
120 / 8000
140 / 8000
160 / 8000
180 / 8000
200 / 8000
220 / 8000
240 / 8000
260 / 8000
280 / 8000
300 / 8000
320 / 8000
340 / 8000
360 / 8000
380 / 8000
400 / 8000
420 / 8000
440 / 8000
460 / 8000
480 / 8000
500 / 8000
520 / 8000
540 / 8000
560 / 8000
580 / 8000
600 / 8000
620 / 8000
640 / 8000
660 / 8000
680 / 8000
700 / 8000
720 / 8000
740 / 8000
760 / 8000
780 / 8000
800 / 8000
820 / 8000
840 / 8000
860 / 8000
880 / 8000
900 / 8000
920 / 8000
940 / 8000
960 / 8000
980 / 8000
1000 / 8000
1020 / 8000
1040 / 8000
1060 / 8000
1080 / 8000
1100 / 8000
1120 / 8000
1140 / 8000
1160 / 8000
1180 / 8000
1200 / 8000
1220 / 8000
1240 / 8000
1260 / 8000
1280 / 8000
1300 / 8000
1320 / 8000
1340 / 8000
1360 / 8000
1380 / 8000
1400 / 8000
1420 / 8000
1440 / 8000
1460 / 8000
1480 / 8000
1500 / 8000
1520 / 8000
1540 / 8000
1560 / 8000
1580 / 8000
1600 / 8000
1620 / 8000
1640 / 8000
1660 / 8000
1680 / 8000
1700 / 8000
1720 / 8000
1740 / 8000


## Cluster data and create classifier model

In [None]:
df_pad = pd.read_csv('image_emb_pad_paddle.csv', delimiter=', ')
df_crop = pd.read_csv('image_emb_crop_paddle.csv', delimiter=', ')

  df_pad = pd.read_csv('image_emb_pad_paddle.csv', delimiter=', ')
  df_crop = pd.read_csv('image_emb_crop_paddle.csv', delimiter=', ')


### for padding preproccesing

In [None]:
from sklearn.cluster import KMeans

# Define number of clusters (experiment with values)
num_clusters = 10

# Fit K-Means clustering on image embeddings
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(df_pad.iloc[:, 1:])

# Save cluster assignments
np.save("cluster_labels_pad.npy", cluster_labels)

print(f"Clustered {8000} images into {num_clusters} metaclasses.")

Clustered 80 images into 10 metaclasses.


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

# Split data for training the classifier
X_train, X_test, y_train, y_test = train_test_split(df_pad.iloc[:, 1:], cluster_labels, test_size=0.2, random_state=42)

# Train a Support Vector Machine (SVM) classifier to predict clusters
classifier = SVC(kernel="linear", probability=True)
classifier.fit(X_train, y_train)

# Save classifier for future use
import pickle
with open("cluster_classifier.pkl", "wb") as f:
    pickle.dump(classifier, f)

print("Cluster classifier trained and saved!")

Cluster classifier trained and saved!


In [None]:
classifier.score(X_test, y_test)

0.908125

### for crop preproccesing

In [None]:
# Fit K-Means clustering on image embeddings
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(df_crop.iloc[:, 1:])

# Save cluster assignments
np.save("cluster_labels_crop.npy", cluster_labels)

print(f"Clustered {8000} images into {num_clusters} metaclasses.")

Clustered 80 images into 10 metaclasses.


In [None]:
# Split data for training the classifier
X_train, X_test, y_train, y_test = train_test_split(df_crop.iloc[:, 1:], cluster_labels, test_size=0.2, random_state=42)

# Train a Support Vector Machine (SVM) classifier to predict clusters
classifier = SVC(kernel="linear", probability=True)
classifier.fit(X_train, y_train)

# Save classifier for future use
import pickle
with open("cluster_classifier_crop.pkl", "wb") as f:
    pickle.dump(classifier, f)

print("Cluster classifier trained and saved!")

Cluster classifier trained and saved!


In [None]:
classifier.score(X_test, y_test)

0.92375