In [7]:
import cv2
import shutil
import json
import os
import logging
import sys
import torch.multiprocessing as mp
import warnings

from glob import glob
from PIL import Image, ImageOps
from tqdm import tqdm

os.environ['HF_HOME'] = '/mnt/opr/levlevi/tmp'

MINI_CPM_DIR = '/mnt/opr/levlevi/player-re-id/src/testing/ocr_model_comparisons/mini-cpm-testing/mini_cpm/MiniCPM-V'
if os.path.exists(MINI_CPM_DIR):
    sys.path.append(MINI_CPM_DIR)
    os.chdir(MINI_CPM_DIR)
else:
    raise FileNotFoundError(f"Directory {MINI_CPM_DIR} does not exist")

from chat import MiniCPMVChat, img2base64

warnings.simplefilter(action='ignore', category=FutureWarning)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

MODEL_NAME = 'openbmb/MiniCPM-Llama3-V-2_5'
MINI_CPM_DIR = "/mnt/opr/levlevi/player-re-id/src/testing/mini_cpm_testing/mini_cpm/MiniCPM-V"
PROMPT = """Analyze the basketball player shown in the provided still tracklet frame and describe the following details:
1. Player Race: Identify the player's race as either 'black', 'mixed', 'white', or 'other'.
Based on the frame description, produce an output prediction in the following JSON format:
{
  "jersey_number": "<predicted_race>",
}
[EOS]"""

The history saving thread hit an unexpected error (OperationalError('database or disk is full')).History will not be written to the database.


In [8]:
def load_model_and_tokenizer(device: int = 0):
    try:
        logger.info("Loading model and tokenizer...")
        model = MiniCPMVChat(MODEL_NAME, device)
        logger.info("Model and tokenizer loaded successfully.")
        return model
    except Exception as e:
        logger.error(f"Failed to load model or tokenizer: {e}")
        raise

def ocr(image_base64, model):
    try:
        question = PROMPT
        msgs = [{'role': 'user', 'content': question}]
        inputs = {"image": image_base64, "question": json.dumps(msgs)}
        answer = model.chat(inputs)
        result = answer
        return result
    except Exception as e:
        logger.error(f"Failed to perform OCR: {e}")
        return ""

def load_and_convert_image(fp: str):
    try:
        return img2base64(fp)
    except Exception as e:
        logger.error(f"Failed to load or convert image {fp}: {e}")
        return None

def process_image(image_fp: str, model):
    image_base64 = load_and_convert_image(image_fp)
    if image_base64:
        result = ocr(image_base64, model)
        return result
    return None

def process_image_file_paths(img_paths, model):
    results = {}
    for idx, img_path in enumerate(tqdm(img_paths, total=len(img_paths))):
        result = process_image(img_path, model)
        if result:
            results[img_path] = result
    return results

In [9]:
import pandas as pd

annotations_df_fp = '/mnt/opr/levlevi/player-re-id/src/testing/race_and_team_id_comparisons/100-img-race-team-id-benchmark.csv'
annotations_df = pd.read_csv(annotations_df_fp)
img_file_paths = annotations_df['file_path'].tolist()
model = load_model_and_tokenizer()
results = process_image_file_paths(img_file_paths, model)

2024-06-25 07:17:45,407 - INFO - Loading model and tokenizer...
2024-06-25 07:17:45,532 - INFO - vision_config is None, using default vision config
Downloading shards:  43%|████▎     | 3/7 [00:00<00:00,  9.01it/s]
2024-06-25 07:17:45,946 - ERROR - Failed to load model or tokenizer: [Errno 28] No space left on device


OSError: [Errno 28] No space left on device