In [1]:
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
import os
import pandas as pd
from time import sleep
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [3]:
def identify_tank(prompt: str, image_url: str, model: str, api_key: str) -> str:
    client = InferenceClient(api_key=api_key)

    completion = client.chat.completions.create(
        model=model,
        messages=[
            {
                'role': 'user',
                'content': [
                    {'type': 'text', 'text': prompt},
                    {'type': 'image_url', 'image_url': {'url': image_url}},
                ],
            }
        ],
    )

    sleep(5)

    return completion.choices[0].message.content.lower()

In [4]:
models = {'qwen': 'Qwen/Qwen2.5-VL-7B-Instruct:hyperbolic',
          'gemma': 'google/gemma-3-27b-it:nebius'}

PROMPT = 'Write one word: Soviet if the picture shows a Soviet tank, German if the picture shows a German tank.'

HF_TOKEN = os.environ['HF_TOKEN']

urls = pd.read_csv('urls.csv')

In [5]:
urls['qwen_prediction'] = urls['url'].apply(lambda x: identify_tank(
    prompt=PROMPT, image_url=x, model=models['qwen'], api_key=HF_TOKEN))

In [6]:
urls['gemma_prediction'] = urls['url'].apply(lambda x: identify_tank(
    prompt=PROMPT, image_url=x, model=models['gemma'], api_key=HF_TOKEN))

In [13]:
def url_to_html_img(url):
    if pd.isna(url) or not isinstance(url, str):
        return ""
    return f'<img src="{url}" width="250" style="max-height: 250px; object-fit: contain;">'


def format_prediction(value, true_country):
    if pd.isna(value) or not isinstance(value, str):
        return ''
    return '✅' if value == true_country else '❌'


df_formatted = urls.copy()
df_formatted['qwen_prediction'] = urls.apply(
    lambda row: format_prediction(row['qwen_prediction'], row['country']), axis=1
)
df_formatted['gemma_prediction'] = urls.apply(
    lambda row: format_prediction(row['gemma_prediction'], row['country']), axis=1
)

df_styled = df_formatted.style.format({'url': url_to_html_img})
df_styled.set_properties(**{'text-align': 'center'}).set_table_styles([
    {'selector': 'td', 'props': [('text-align', 'center')]},
    {'selector': 'th', 'props': [('text-align', 'center')]}
])

Unnamed: 0,url,country,difficulty,qwen_prediction,gemma_prediction
0,,soviet,easy,✅,✅
1,,soviet,easy,✅,✅
2,,soviet,easy,✅,✅
3,,soviet,easy,✅,✅
4,,soviet,easy,✅,✅
5,,soviet,easy,✅,✅
6,,soviet,easy,❌,✅
7,,soviet,easy,✅,✅
8,,soviet,easy,✅,✅
9,,soviet,easy,✅,✅


In [14]:
y_true = urls['country']
y_pred_qwen = urls['qwen_prediction']
y_pred_gemma = urls['gemma_prediction']

print('Метрики классификации Qwen:')
print("Accuracy:", accuracy_score(y_true, y_pred_qwen))
print('\nClassification Report:')
print(classification_report(y_true, y_pred_qwen))
print(confusion_matrix(y_true, y_pred_qwen))

print('\n\nМетрики классификации Gemma:')
print('Accuracy:', accuracy_score(y_true, y_pred_gemma))
print('\nClassification Report:')
print(classification_report(y_true, y_pred_gemma))
print(confusion_matrix(y_true, y_pred_gemma))

Метрики классификации Qwen:
Accuracy: 0.90625

Classification Report:
              precision    recall  f1-score   support

      german       0.88      0.94      0.91        16
      soviet       0.93      0.88      0.90        16

    accuracy                           0.91        32
   macro avg       0.91      0.91      0.91        32
weighted avg       0.91      0.91      0.91        32

[[15  1]
 [ 2 14]]


Метрики классификации Gemma:
Accuracy: 0.9375

Classification Report:
              precision    recall  f1-score   support

      german       0.94      0.94      0.94        16
      soviet       0.94      0.94      0.94        16

    accuracy                           0.94        32
   macro avg       0.94      0.94      0.94        32
weighted avg       0.94      0.94      0.94        32

[[15  1]
 [ 1 15]]


#### Результаты предсказания на предыдущем запуске


In [None]:
def url_to_html_img(url):
    if pd.isna(url) or not isinstance(url, str):
        return ""
    return f'<img src="{url}" width="200" style="max-height: 200px; object-fit: contain;">'


def format_prediction(value, true_country):
    if pd.isna(value) or not isinstance(value, str):
        return ''
    return '✅' if value == true_country else '❌'


df_formatted = urls.copy()
df_formatted['qwen_prediction'] = urls.apply(
    lambda row: format_prediction(row['qwen_prediction'], row['country']), axis=1
)
df_formatted['gemma_prediction'] = urls.apply(
    lambda row: format_prediction(row['gemma_prediction'], row['country']), axis=1
)

df_styled = df_formatted.style.format({'url': url_to_html_img})
df_styled.set_properties(**{'text-align': 'center'}).set_table_styles([
    {'selector': 'td', 'props': [('text-align', 'center')]},
    {'selector': 'th', 'props': [('text-align', 'center')]}
])

Unnamed: 0,url,country,difficulty,qwen_prediction,gemma_prediction
0,,soviet,easy,✅,✅
1,,soviet,easy,✅,✅
2,,soviet,easy,✅,✅
3,,soviet,easy,✅,✅
4,,soviet,easy,✅,✅
5,,soviet,easy,✅,✅
6,,soviet,easy,✅,✅
7,,soviet,easy,✅,✅
8,,soviet,easy,✅,✅
9,,soviet,easy,✅,✅


In [None]:
y_true = urls['country']
y_pred_qwen = urls['qwen_prediction']
y_pred_gemma = urls['gemma_prediction']

print('Метрики классификации Qwen:')
print("Accuracy:", accuracy_score(y_true, y_pred_qwen))
print('\nClassification Report:')
print(classification_report(y_true, y_pred_qwen))
print(confusion_matrix(y_true, y_pred_qwen))

print('\n\nМетрики классификации Gemma:')
print('Accuracy:', accuracy_score(y_true, y_pred_gemma))
print('\nClassification Report:')
print(classification_report(y_true, y_pred_gemma))
print(confusion_matrix(y_true, y_pred_gemma))

Метрики классификации Qwen:
Accuracy: 0.9375

Classification Report:
              precision    recall  f1-score   support

      german       1.00      0.88      0.93        16
      soviet       0.89      1.00      0.94        16

    accuracy                           0.94        32
   macro avg       0.94      0.94      0.94        32
weighted avg       0.94      0.94      0.94        32

[[14  2]
 [ 0 16]]


Метрики классификации Gemma:
Accuracy: 0.9375

Classification Report:
              precision    recall  f1-score   support

      german       0.94      0.94      0.94        16
      soviet       0.94      0.94      0.94        16

    accuracy                           0.94        32
   macro avg       0.94      0.94      0.94        32
weighted avg       0.94      0.94      0.94        32

[[15  1]
 [ 1 15]]
