In [1]:
import ollama
import glob
import random
import time
import statistics
import re

In [2]:
instruction = "Please disregard any previous instructions. You are a professional car designer tasked with evaluating the stylishness of cars using numerical scores. Assess the stylishness of the car shown in the images below by assigning a numerical score between 0 and 1, where 0 represents 'not stylish at all' and 1 signifies 'extremely stylish.' Provide the score with four decimal places (for example, 0.1322)."

모델 별 추론 속도 테스트

In [3]:
# Inference speed benchmark functions
def chat(model, instruction, image_path=None):
    messages = {'role': 'user', 'content': instruction}
    if image_path is not None:
        messages['images'] = [image_path]

    messages = [messages] # List로 변환 
    response = ollama.chat(model=model, messages=messages)
    content = response['message']['content']
    return content

def speed_test(model, instruction, image_paths):
    elapsed_times = []
    chat(model=model, instruction="Hello") # 모델 준비

    for i, image_path in enumerate(image_paths):
        print(f'{i} - {image_path} - ', end="")
        
        start_time = time.time()
        content = chat(model=model, instruction=instruction, image_path=image_path)
        elapsed_time = time.time() - start_time
        
        print(f"{elapsed_time:.1f} s")
        print(content + "\n")
        elapsed_times.append(elapsed_time)

    mean = statistics.mean(elapsed_times)
    stdev = statistics.stdev(elapsed_times)
    print(f"{model} - 이미지 1장 처리시간: {mean:.2f}±{stdev:.2f} s\n")

In [4]:
image_dir_path = 'data/confirmed_fronts'
n_test = 10
image_paths = glob.glob(image_dir_path + '/*/*/*', recursive=True)

random.seed(42)
select_image_paths = random.choices(image_paths, k=n_test)

speed_test(model='minicpm-v', instruction=instruction, image_paths=select_image_paths)
speed_test(model='llava:34b', instruction=instruction, image_paths=select_image_paths)
speed_test(model='llama3.2-vision:11b', instruction=instruction, image_paths=select_image_paths)


0 - data/confirmed_fronts/BMW/2017/BMW$$i3$$2017$$White$$8_27$$131$$image_10.jpg - 3.3 s
To evaluate the styling of this BMW i3 electric vehicle for its potential as a designer car, we consider several factors:

1. **Exterior Design**: The front end features sleek LED headlights and distinctive blue accents which are modern and innovative. However, the black panel above the grille gives it an unconventional look that may not appeal to all tastes.

2. **Brand Appeal**: As a BMW i3, it has brand recognition and association with luxury and performance electric vehicles, adding to its stylishness for those who value eco-friendly alternatives without sacrificing design or prestige.

3. **Aerodynamics**: The overall shape is aerodynamic which enhances both the aesthetic appeal and efficiency of the vehicle.

4. **Color Scheme**: The white color combined with black accents provides a clean yet striking look that can be considered stylish, although personal preference plays a significant role 

추론 결과의 일관성 확인

In [6]:
image_paths = ['1.jpg', '2.jpg', '3.jpg']
n_trial = 10
pattern = r"\d\.\d+"

def consistency(model, instruction, image_paths, n_trial):
    print(f'{model} 추론결과 일관성 확인')
    for image_path in image_paths:
        print(image_path, end='')    
        scores = []

        for i in range(n_trial):
            while True:
                content = chat(model, instruction, image_path)
                search = re.search(pattern, content) # 소수점 네자리 수 찾기 (ex. 0.9684)
                
                if search: # 찾으면 출력 및 탈출
                    score = float(search.group())
                    if (0 <= score and score <= 1):
                        scores.append(score)
                        print(f' {score:.4f}', end = '')
                        break
                
        mean = statistics.mean(scores)
        stdev = statistics.stdev(scores)
        print(f' - {mean:.4f}±{stdev:.4f}')

consistency(model='minicpm-v', instruction=instruction, image_paths=image_paths, n_trial=n_trial)
consistency(model='llava:34b', instruction=instruction, image_paths=image_paths, n_trial=n_trial)
consistency(model='llama3.2-vision:11b', instruction=instruction, image_paths=image_paths, n_trial=n_trial)


minicpm-v 추론결과 일관성 확인
1.jpg 0.8000 0.9345 0.7891 0.7832 0.9123 0.9000 0.8571 0.7851 0.8634 0.9456 - 0.8570±0.0644
2.jpg 0.7321 0.9500 0.7325 0.8756 0.8531 0.8500 0.8956 0.9523 0.8500 0.8764 - 0.8568±0.0754
3.jpg 0.4286 0.8500 0.7456 0.6741 0.7500 0.6754 0.8571 0.8571 0.1258 0.8576 - 0.6821±0.2363
llava:34b 추론결과 일관성 확인
1.jpg 0.8792 0.9658 0.4579 0.7392 0.8435 0.8519 0.9675 0.7568 0.8276 0.8312 - 0.8121±0.1450
2.jpg 0.8619 0.7943 0.7815 0.7368 0.8594 0.7849 0.6479 0.5742 0.5289 0.6458 - 0.7216±0.1163
3.jpg 0.8291 0.7256 0.4823 0.2148 0.5831 0.8475 0.6794 0.5182 0.6548 0.7436 - 0.6278±0.1888
llama3.2-vision:11b 추론결과 일관성 확인
1.jpg 0.8679 0.8761 0.7173 0.9876 0.9997 0.7686 0.8758 0.9872 0.8758 0.7778 - 0.8734±0.0980
2.jpg 0.8756 0.9281 0.2400 0.8500 0.9988 0.8000 0.8337 0.9874 0.9876 0.8123 - 0.8314±0.2210
3.jpg 0.6254 0.8321 0.8000 0.8757 0.6259 0.8727 0.7892 0.8121 0.8757 0.8778 - 0.7987±0.0971


In [5]:
import glob
import os
import re

In [6]:
image_dir_path = 'data/confirmed_fronts'
dir_paths = glob.glob(image_dir_path + '/*/*', recursive=True)
years = [int(os.path.basename(dir_path)) for dir_path in dir_paths]
print(f'min year: {min(years)}, max year: {max(years)}')

min year: 2000, max year: 2018


In [7]:
years = range(2015, 2019)
model = 'llava:34b'
pattern = r"\d\.\d+"

for year in years[0:1]:
    image_paths = glob.glob(image_dir_path + f'/*/{year}/*', recursive=True)
    for image_path in image_paths[0:1]:
        
        while(True):
            # vision-language model을 이용하여 이미지의 평가값 추출
            response = ollama.chat(
            model=model,
            messages=[{
                'role': 'user',
                'content': instruction,
                'images': [image_path]
            }])

            content = response['message']['content']
            search = re.search(pattern, content) # 소수점 네자리 수 찾기 (ex. 0.9684)
            
            if search: # 찾으면 출력 및 탈출
                print(search.group())
                break



0.8476


In [17]:
chat(model='minicpm-v', instruction=instruction, image_path='/data/ephemeral/home/Dongjin/level4-cv-finalproject-hackathon-cv-02-lv3/other_group_datas/DVM-CAR/1.jpg')

'Considering various aspects such as color scheme, design elements like front grille, headlights, hood shape, side profile, rear spoiler, and overall aesthetic appeal, I would assign a numerical style index for this car to be approximately **0.8976** on the stylishness scale (where 1 represents extremely stylish). The blue sports coupe exudes a sense of modernity with its aerodynamic lines and aggressive styling cues. While some might find certain design elements controversial or polarizing, the overall presentation aligns well within contemporary automotive trends for sporty performance vehicles.'