In [40]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from PIL import Image
import base64
import os
import shutil
import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Dropout, Dense
from keras.applications import MobileNet
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.mobilenet import preprocess_input
from pathlib import Path
import matplotlib.pyplot as plt

In [18]:
curr_dir = os.getcwd()

### Count weak examples

In [20]:
def count_photos_in_folders(main_folder):
    folder_photo_count = {}
    
    for folder_name in os.listdir(main_folder):
        folder_path = os.path.join(main_folder, folder_name)
        if os.path.isdir(folder_path):
            photo_count = 0
            for file in os.listdir(folder_path):
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    photo_count += 1
                folder_photo_count[folder_name] = photo_count
    
    return folder_photo_count

In [22]:
main_folder = os.path.join(curr_dir, 'clear_data_by_name')
results = count_photos_in_folders(main_folder)

In [24]:
n = 5
for folder, count in results.items():
    if count <= n:
        print(f'Папка: {folder}, Количество фотографий: {count}')

Папка: 1941-1945, Количество фотографий: 3
Папка: Библиотечная лужайка, Количество фотографий: 3
Папка: Дева Мария, Количество фотографий: 1
Папка: Макет ракеты-носителя Союз-ТМ, Количество фотографий: 2
Папка: П_И_ Чайковский, Количество фотографий: 1
Папка: Сердце, Количество фотографий: 2
Папка: Такса, Количество фотографий: 2
Папка: Фавор, Количество фотографий: 2
Папка: Чернобыль - трагедия XX века, Количество фотографий: 4


### ModileNET

In [13]:
def mean_score(scores):
    si = np.arange(1, 11, 1)
    mean = np.sum(scores * si)
    return mean

def std_score(scores):
    si = np.arange(1, 11, 1)
    mean = mean_score(scores)
    std = np.sqrt(np.sum(((si - mean) ** 2) * scores))
    return std

In [35]:
main_folder = os.path.join(curr_dir, 'clear_data_by_name')

with tf.device('/CPU:0'):
    base_model = MobileNet((None, None, 3), 
                           alpha=1, 
                           include_top=False, 
                           pooling='avg', 
                           weights=None)
    x = Dropout(0.75)(base_model.output)
    x = Dense(10, activation='softmax')(x)

    model = Model(base_model.input, x)
    model.load_weights('mobilenet_weights.h5')

    target_size = (224, 224)

    for folder_name in os.listdir(main_folder):
        best_score = -np.inf
        best_image = None
        folder_path = os.path.join(main_folder, folder_name)
        for file in os.listdir(folder_path):
            img = load_img(os.path.join(main_folder, 
                                        folder_path, 
                                        file), 
                                        target_size=target_size)
            x = img_to_array(img)
            x = np.expand_dims(x, axis=0)

            x = preprocess_input(x)

            scores = model.predict(x, batch_size=1, verbose=0)[0]

            mean = mean_score(scores)
            std = std_score(scores)

            if mean > best_score:
                best_score = mean
                best_image = file

        if best_image:
            best_image_path = os.path.join(main_folder, 
                                           folder_name, 
                                           best_image)
            best_image_new_path = os.path.join(main_folder, 
                                               folder_name, 
                                               f'best_image.{best_image.split(".")[-1]}')
            shutil.copy(best_image_path, best_image_new_path)

### Generate best image table

In [41]:
data = pd.read_csv('clear_processed_data.csv')
unique_names = data['Name'].unique()

print(unique_names.shape)

best_images_data = pd.DataFrame(columns=['Name', 'Best_Image_Path', 'Best_Image_Base64'])

for name in unique_names:
    image_path = data[data['Name'] == name]['image_path'].iloc[0]
    folder_path = os.path.dirname(image_path)
    
    for file in os.listdir(folder_path):
        if 'best_image' in file:
            with open(os.path.join(folder_path, file), "rb") as image_file:
                base64_string = base64.b64encode(image_file.read()).decode('utf-8')
            
            best_images_data = pd.concat([best_images_data, pd.DataFrame({
                'Name': [name], 
                'Best_Image_Path': [os.path.join(folder_path, file)],
                'Best_Image_Base64': [base64_string]
            })], ignore_index=True)
            break

best_images_data.to_csv('best_images.csv', index=False)

(356,)


In [42]:
best_images_data.head()

Unnamed: 0,Name,Best_Image_Path,Best_Image_Base64
0,"Здание бывшей гостиницы ""Мадрид""",c:\Users\alex\Desktop\NTO\clear_data_by_name\З...,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUEBAUEAwUFBA...
1,Серго Орджоникидзе,c:\Users\alex\Desktop\NTO\clear_data_by_name\С...,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUEBAUEAwUFBA...
2,Н. И. Кузнецову,c:\Users\alex\Desktop\NTO\clear_data_by_name\Н...,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUEBAUEAwUFBA...
3,Малышеву,c:\Users\alex\Desktop\NTO\clear_data_by_name\М...,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUEBAUEAwUFBA...
4,Комсомолу Урала,c:\Users\alex\Desktop\NTO\clear_data_by_name\К...,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUEBAUEAwUFBA...
