In [62]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from PIL import Image, ImageEnhance, ImageOps
import ssl
import os
import pandas as pd
from scipy.spatial.distance import cosine
from torch.utils.data import DataLoader, random_split
import random
from collections import Counter

In [16]:
ssl._create_default_https_context = ssl._create_stdlib_context

### Модель для определение схожести пар объектов

In [117]:
# Загрузка предобученной модели ResNet18
model = models.resnet18(pretrained=True)
model = nn.Sequential(*list(model.children())[:-1])  # Удаление последнего слоя классификации
model.eval()

# Преобразования для изображений
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [97]:
def extract_features(image_path, model):
    image = Image.open(image_path).convert('RGB')
    image = preprocess(image).unsqueeze(0)
    with torch.no_grad():
        features = model(image).squeeze().numpy()
    return features

In [98]:
def are_images_similar(image_path1, image_path2, model, threshold=0.5):
    features1 = extract_features(image_path1, model)
    features2 = extract_features(image_path2, model)
    similarity = 1 - cosine(features1, features2)
    return similarity > threshold, similarity

### Модель для классификации изображений

In [118]:
# Определяем трансформации для предобработки данных
# transform = transforms.Compose([
#     transforms.Resize((128, 128)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Задаем путь к папке с данными
data_dir = '/Users/annapetrov/Desktop/ozon_хакатон/dataset_new'

# Загружаем данные
full_data = datasets.ImageFolder(root=data_dir, transform=transform)

# Разделяем данные на обучающие и валидационные
train_size = int(0.8 * len(full_data))
valid_size = len(full_data) - train_size
train_data, valid_data = random_split(full_data, [train_size, valid_size])

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_data, batch_size=32, shuffle=False, num_workers=4)

# Количество классов
num_classes = len(full_data.classes)

In [100]:
full_data.classes

['кроссовки',
 'ноутбук',
 'планшет',
 'смартфон',
 'стол',
 'стул',
 'телевизор',
 'туфли']

In [101]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(128 * 16 * 16, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

model = SimpleCNN(num_classes)

In [102]:
model_torch = torch.load('models/model_torch_save.pth')

  model_torch = torch.load('models/model_torch_save.pth')


In [158]:
def classify_image(model, image_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    # Трансформируем изображение
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image)
        _, preds = torch.max(outputs, 1)
        class_id = preds.item()
        
    if class_id < len(full_data.classes):
        return full_data.classes[class_id]
    else:
        print(f"Warning: Class index {class_id} out of range for image {image_path}")
        return None

### Добавление до 10 изобаржений в объектах

In [104]:
dataset_dir = '/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders'

In [37]:
# Функция для трансформации изображения
def transform_image(image_path):
    image = Image.open(image_path)
    
    # Применение случайной трансформации
    transform_type = random.choice(['rotate', 'flip', 'color', 'contrast', 'invert'])
    
    if transform_type == 'rotate':
        angle = random.randint(0, 360)
        image = image.rotate(angle)
    elif transform_type == 'flip':
        image = ImageOps.mirror(image)
    elif transform_type == 'color':
        enhancer = ImageEnhance.Color(image)
        factor = random.uniform(0.5, 1.5)
        image = enhancer.enhance(factor)
    elif transform_type == 'contrast':
        enhancer = ImageEnhance.Contrast(image)
        factor = random.uniform(0.5, 1.5)
        image = enhancer.enhance(factor)
    elif transform_type == 'invert':
        image = ImageOps.invert(image.convert("RGB"))

    return image

In [40]:
# Проход по всем подпапкам
for category in os.listdir(dataset_dir):
    category_path = os.path.join(dataset_dir, category)
    
    if os.path.isdir(category_path):
        for item in os.listdir(category_path):
            item_path = os.path.join(category_path, item)
            
            if os.path.isdir(item_path):
                images = [f for f in os.listdir(item_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
                
                num_images = len(images)
                
                if num_images < 10:
                    # Если изображений меньше 10, делаем дубликаты с трансформацией
                    for i in range(10 - num_images):
                        img_to_duplicate = random.choice(images)
                        img_path = os.path.join(item_path, img_to_duplicate)
                        
                        new_image = transform_image(img_path)
                        new_image_name = f"img_dup_{i+1}.jpg"
                        new_image.save(os.path.join(item_path, new_image_name))
                
                elif num_images > 10:
                    # Если изображений больше 10, удаляем лишние
                    for img_to_remove in random.sample(images, num_images - 10):
                        os.remove(os.path.join(item_path, img_to_remove))

### Проверяем схожесть объектов

In [120]:
root_folder = "/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders"
folder1 = os.path.join(root_folder, "планшет")
folder2 = os.path.join(root_folder, "ноутбук")

objects1 = [os.path.join(folder1, obj) for obj in os.listdir(folder1) 
            if os.path.isdir(os.path.join(folder1, obj))]
objects2 = [os.path.join(folder2, obj) for obj in os.listdir(folder2) 
            if os.path.isdir(os.path.join(folder2, obj))]

In [143]:
obj1 = objects1[3]
obj2 = objects2[5]

In [144]:
obj1, obj2

('/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/планшет/775720580',
 '/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/814389213')

In [145]:
images1 = [os.path.join(obj1, img) for img in os.listdir(obj1) if img.endswith(('.jpg', '.png', '.jpeg'))]
images2 = [os.path.join(obj2, img) for img in os.listdir(obj2) if img.endswith(('.jpg', '.png', '.jpeg'))]

In [146]:
total_comparisons = 0
similar_count = 0
data = []

In [147]:
# Сравнение каждой картинки из одного объекта с каждой картинкой другого объекта
for img1 in images1:
    for img2 in images2:
        total_comparisons += 1
        similar, similarity_score = are_images_similar(img1, img2, model)
        if similar:
            similar_count += 1
        data.append({
            "Object1": os.path.basename(obj1),
            "Object2": os.path.basename(obj2),
            "Image1": img1,
            "Image2": img2,
            "Similar": similar,
            "Similarity_Score": similarity_score
        })

In [148]:
# Подсчитываем процент схожих изображений
if total_comparisons > 0:
    similarity_percentage = similar_count / total_comparisons

    # Проверяем, превышает ли процент схожести пороговое значение
    if similarity_percentage >= 0.6:
        print(f"{obj1}\n{obj2}\nсхожи ({similarity_percentage*100:.2f}% совпадений).\n\n")
    else:
        print(f"{obj1}\n{obj2}\nНЕ схожи ({similarity_percentage*100:.2f}% совпадений).\n\n")

/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/планшет/775720580
/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/814389213
схожи (100.00% совпадений).




In [149]:
df_result = pd.DataFrame(data)

In [150]:
df_result

Unnamed: 0,Object1,Object2,Image1,Image2,Similar,Similarity_Score
0,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.640726
1,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.687709
2,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.693105
3,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.636644
4,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.688356
...,...,...,...,...,...,...
95,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.613469
96,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.714053
97,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.698274
98,775720580,814389213,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,/Users/annapetrov/Desktop/ozon_хакатон/dataset...,True,0.669770


### Классифицируем каждое изображение и выделяем главную метку для каждого объекта

In [151]:
# Функция для определения главной метки объекта
def determine_main_label(labels):
    # Подсчет частоты каждой метки
    label_counts = Counter(labels)
    # Выбор метки с наибольшей частотой
    main_label = label_counts.most_common(1)[0][0]
    return main_label

In [152]:
for img1 in images1:
    labels_1 = []
    label = classify_image(model_torch, img1)
    labels_1.append(label)

for img2 in images2:
    labels_2 = []
    label = classify_image(model_torch, img2)
    labels_2.append(label)

In [153]:
main_label_1 = determine_main_label(labels_1)
main_label_2 = determine_main_label(labels_2)

print(f"Object1: {obj1.split('/')[-2]}/{obj1.split('/')[-1]} - Main Label: {main_label_1}")
print(f"Object2: {obj2.split('/')[-2]}/{obj2.split('/')[-1]} - Main Label: {main_label_2}")

Object1: планшет/775720580 - Main Label: планшет
Object2: ноутбук/814389213 - Main Label: ноутбук


### Test all objects

In [None]:
root_folder = "/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders"
folder1 = os.path.join(root_folder, "планшет")
folder2 = os.path.join(root_folder, "ноутбук")

objects1 = [os.path.join(folder1, obj) for obj in os.listdir(folder1) 
            if os.path.isdir(os.path.join(folder1, obj))]
objects2 = [os.path.join(folder2, obj) for obj in os.listdir(folder2) 
            if os.path.isdir(os.path.join(folder2, obj))]

In [159]:
all_count = 0
correct_count = 0

for obj1 in objects1:
    for obj2 in objects2:
        all_count+=1

        images1 = [os.path.join(obj1, img) for img in os.listdir(obj1) if img.endswith(('.jpg', '.png', '.jpeg'))]
        images2 = [os.path.join(obj2, img) for img in os.listdir(obj2) if img.endswith(('.jpg', '.png', '.jpeg'))]

        total_comparisons = 0
        similar_count = 0
        data = []
        similar_bool = None

        for img1 in images1:
            for img2 in images2:
                total_comparisons += 1
                similar, similarity_score = are_images_similar(img1, img2, model)
                if similar:
                    similar_count += 1

        if total_comparisons > 0:
            similarity_percentage = similar_count / total_comparisons

            # Проверяем, превышает ли процент схожести пороговое значение
            if similarity_percentage >= 0.6:
                similar_bool = True
            else:
                similar_bool = False

        for img1 in images1:
            labels_1 = []
            label = classify_image(model_torch, img1)
            labels_1.append(label)

        for img2 in images2:
            labels_2 = []
            label = classify_image(model_torch, img2)
            labels_2.append(label)

        main_label_1 = determine_main_label(labels_1)
        main_label_2 = determine_main_label(labels_2)

        if similar_bool == True and main_label_1 == main_label_2:
            print("СХОЖИ")
            print(obj1, obj2)
            print()
            if obj1.split('/')[-2] == obj2.split('/')[-2]:
                correct_count+=1
            



In [160]:
all_count, correct_count

(100, 0)

### Test 2

In [161]:
root_folder = "/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders"
folder1 = os.path.join(root_folder, "ноутбук")
folder2 = os.path.join(root_folder, "ноутбук")

objects1 = [os.path.join(folder1, obj) for obj in os.listdir(folder1) 
            if os.path.isdir(os.path.join(folder1, obj))]
objects2 = [os.path.join(folder2, obj) for obj in os.listdir(folder2) 
            if os.path.isdir(os.path.join(folder2, obj))]

In [162]:
all_count = 0
correct_count = 0

for obj1 in objects1:
    for obj2 in objects2:
        all_count+=1

        images1 = [os.path.join(obj1, img) for img in os.listdir(obj1) if img.endswith(('.jpg', '.png', '.jpeg'))]
        images2 = [os.path.join(obj2, img) for img in os.listdir(obj2) if img.endswith(('.jpg', '.png', '.jpeg'))]

        total_comparisons = 0
        similar_count = 0
        data = []
        similar_bool = None

        for img1 in images1:
            for img2 in images2:
                total_comparisons += 1
                similar, similarity_score = are_images_similar(img1, img2, model)
                if similar:
                    similar_count += 1

        if total_comparisons > 0:
            similarity_percentage = similar_count / total_comparisons

            # Проверяем, превышает ли процент схожести пороговое значение
            if similarity_percentage >= 0.6:
                similar_bool = True
            else:
                similar_bool = False

        for img1 in images1:
            labels_1 = []
            label = classify_image(model_torch, img1)
            labels_1.append(label)

        for img2 in images2:
            labels_2 = []
            label = classify_image(model_torch, img2)
            labels_2.append(label)

        main_label_1 = determine_main_label(labels_1)
        main_label_2 = determine_main_label(labels_2)

        if similar_bool == True and main_label_1 == main_label_2:
            print("СХОЖИ")
            print(obj1, obj2)
            print()
            if obj1.split('/')[-2] == obj2.split('/')[-2]:
                correct_count+=1
            

СХОЖИ
/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/876925642 /Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/876925642

СХОЖИ
/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/876925642 /Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/769963030

СХОЖИ
/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/876925642 /Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/1081452165

СХОЖИ
/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/876925642 /Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/881783789

СХОЖИ
/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/876925642 /Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/870321139

СХОЖИ
/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноутбук/876925642 /Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/ноут

In [163]:
all_count, correct_count

(100, 98)

### Test

In [61]:
img1_path = os.path.join('dataset_objects_folders/кроссовки/173547555/image_1.jpg', img1)
img2_path = os.path.join('dataset_objects_folders/ноутбук/814389213/image_0.jpg', img2)
similar, similarity_score = are_images_similar(img1_path, img2_path, model)
clissify1 = classify_image(model_torch, img1_path)
classify2 = classify_image(model_torch, img2_path)

In [71]:
img1_path

'/Users/annapetrov/Desktop/ozon_хакатон/dataset_objects_folders/кроссовки/175976781/image_6.jpg'

In [67]:
clissify1

'кроссовки'

In [68]:
classify2

'ноутбук'