In [None]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

# Load pre-trained VGG16 model without the classification layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(400, 400, 3))

In [None]:
from PIL import Image
from tensorflow.keras.preprocessing.image import img_to_array

def get_feature_vector(img_path):
    # Load image from file path and resize to 400x400
    img = Image.open(img_path)
    img = img.resize((400, 400))
    
    # Convert image to numpy array and expand dimensions to match input shape of VGG16
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)
    
    # Preprocess input image using the same method as used during training VGG16
    x = preprocess_input(x)
    
    # Pass the preprocessed image through the VGG16 model and obtain the feature vector
    features = base_model.predict(x)
    
    # Flatten the feature vector and return as 1D array
    feature_vector = features.flatten()
    return feature_vector


In [None]:
import os

# Define the directory path where the logos are stored
logo_dir = './test'

# Initialize an empty array to store the feature vectors
feature_vectors = []

# Iterate through all the image files in the logos directory
for filename in os.listdir(logo_dir):
    # Check if the file is a PNG image
    if filename.endswith('.png'):
        # Construct the full file path
        img_path = os.path.join(logo_dir, filename)
        # Obtain the feature vector for the image and append to the feature_vectors array
        feature_vector = get_feature_vector(img_path)
        feature_vectors.append(feature_vector)

# Save the feature_vectors array as a numpy array to disk
np.save('feature_vectors.npy', feature_vectors)


In [None]:
f_vect = np.load('./feature_vectors.npy')
print(f_vect.shape)

## Subdividing the dataset to do parllel processing

In [None]:
with open("./LLD-logo_files/files.txt",'r') as f:
    count = 0
    subset = []
    i = 0
    for line in f:
        count+=1
        subset.append(line)
        if count == 10000:
            count = 0
            with open("subset_files_{}.txt".format(str(i)),'w') as w:
                w.write("".join(subset))
            subset = []
            i+=1
    if len(subset) > 0:
        with open("subset_files_{}.txt".format(str(i)),'w') as w:
            w.write("".join(subset))
        subset = []



## Trying Batch Processing

In [None]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np
from tqdm import tqdm_notebook

# Load pre-trained VGG16 model without the classification layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


In [48]:
from PIL import Image
from tensorflow.keras.preprocessing.image import img_to_array

def get_feature_vector(img_path):
    # Load image from file path and resize to 400x400
    print(img_path)
    img = Image.open(img_path)
    if img.mode == 'RGBA':
        img = img.convert('RGB')
    img = img.resize((224, 224))
    # Convert image to numpy array and expand dimensions to match input shape of VGG16
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)
    
    # Preprocess input image using the same method as used during training VGG16
    x = preprocess_input(x)
    
    # Pass the preprocessed image through the VGG16 model and obtain the feature vector
    features = base_model.predict(x)
    
    # Flatten the feature vector and return as 1D array
    feature_vector = features.flatten()
    return feature_vector

def divide_dataset_into_subsets(path):
    import os

    subset_folder = path

    subset_files = os.listdir(subset_folder)

    all_subset_names = []
    for file_name in subset_files:
        if file_name.endswith('.txt'):
            file_path = os.path.join(subset_folder, file_name)
            with open(file_path, 'r') as file:
                subset_names = file.read().splitlines()
                all_subset_names.append(subset_names)

    return all_subset_names
def process_photos(photo_subset):
    # Process each photo in the subset
    subset_feature_vectors = []
    for photo_path in tqdm_notebook(photo_subset, desc='Processing subset'):
        # Process the photo and extract the feature vector
        feature_vector = get_feature_vector(photo_path)
        print("done one")
        subset_feature_vectors.append(feature_vector)
    
    return subset_feature_vectors


In [None]:
import os
import numpy as np
import pandas as pd

# Define the directory path where the logos are stored
logo_dir = './LLD-logo_files/LLD-logo-files/'

# Initialize empty arrays to store the feature vectors and labels
feature_vectors = []
labels = []

# Iterate through all the image files in the logos directory
with open("./LLD-logo_files/subsets/subset_files_12.txt") as f:
    filenames = f.read().split("\n")

for filename in filenames:
    # Check if the file is a PNG image
    if filename.endswith('.png'):
        # Construct the full file path
        img_path = os.path.join(logo_dir, filename)
        # Obtain the feature vector for the image and append it to the feature_vectors array
        feature_vector = get_feature_vector(img_path)
        feature_vectors.append(feature_vector)
        # Extract the label from the filename and append it to the labels array
        label = filename.split('.')[0]  # Assuming the label is the part of the filename before the extension
        labels.append(label)

# Convert the feature_vectors and labels arrays to numpy arrays
feature_vectors = np.array(feature_vectors)
labels = np.array(labels)

# Create a pandas DataFrame with the feature vectors and labels
data = pd.DataFrame({'Feature Vector': feature_vectors, 'Label': labels})

# Save the DataFrame as a CSV file
data.to_csv('annotated_feature_vectors_12.csv', index=False)


In [40]:
import os
import numpy as np
import pandas as pd

# Define the directory path where the logos are stored
logo_dir = './LLD-logo_files/LLD-logo-files/'

# Initialize empty lists to store the feature vectors and labels
feature_vectors = []
labels = []

# Iterate through all the image files in the logos directory
with open("./LLD-logo_files/subsets/subset_files_12.txt") as f:
    filenames = f.read().split("\n")[:10]

for filename in filenames:
    # Check if the file is a PNG image
    if filename.endswith('.png'):
        # Construct the full file path
        img_path = os.path.join(logo_dir, filename)
        # Obtain the feature vector for the image and append it to the feature_vectors list
        feature_vector = get_feature_vector(img_path)
        
        if len(feature_vector) > 0:
            feature_vectors.append(feature_vector)
            # Extract the label from the filename and append it to the labels list
            label = filename.split('.')[0]  # Assuming the label is the part of the filename before the extension
            labels.append(label)

# Convert the feature_vectors and labels lists to numpy arrays
feature_vectors = np.array(feature_vectors)
labels = np.array(labels)

# Save the feature_vectors array as a numpy file
np.save('feature_vectors_12.npy', feature_vectors)

dataframe_vectors = [vector.flatten() for vector in feature_vectors]

# Check if the lengths of feature_vectors and labels are the same
if len(dataframe_vectors) == len(labels):
    # Create a pandas DataFrame with the feature vectors and labels
    data = pd.DataFrame({'Label': labels, 'Feature Vector': dataframe_vectors})

    # Save the DataFrame as a CSV file
    data.to_csv('annotated_feature_vectors_12.csv', index=False)
else:
    print("Error: Length mismatch between feature vectors and labels.")


./LLD-logo_files/LLD-logo-files/worldoptions.png
./LLD-logo_files/LLD-logo-files/worldoweb.png
./LLD-logo_files/LLD-logo-files/worldpac.png
./LLD-logo_files/LLD-logo-files/worldpackers.png
./LLD-logo_files/LLD-logo-files/worldpadeltour.png
./LLD-logo_files/LLD-logo-files/worldpainter.png
./LLD-logo_files/LLD-logo-files/worldpay.png
./LLD-logo_files/LLD-logo-files/worldpolicy.png
./LLD-logo_files/LLD-logo-files/worldpoliticsreview.png
./LLD-logo_files/LLD-logo-files/worldpressphoto.png


In [42]:
import os
import numpy as np
import pandas as pd

# Define the directory path where the logos are stored
logo_dir = './LLD-logo_files/LLD-logo-files/'

# Initialize empty lists to store the feature vectors and labels
feature_vectors = []
labels = []

# Iterate through all the image files in the logos directory
with open("./LLD-logo_files/subsets/subset_files_12.txt") as f:
    filenames = f.read().split("\n")

for filename in filenames:
    # Check if the file is a PNG image
    if filename.endswith('.png'):
        # Construct the full file path
        img_path = os.path.join(logo_dir, filename)
        # Obtain the feature vector for the image and append it to the feature_vectors list
        feature_vector = get_feature_vector(img_path)
        
        if len(feature_vector) > 0:
            feature_vectors.append(feature_vector)
            # Extract the label from the filename and append it to the labels list
            label = filename.split('.')[0]  # Assuming the label is the part of the filename before the extension
            labels.append(label)

# Convert the feature_vectors and labels lists to numpy arrays
feature_vectors = np.array(feature_vectors)
labels = np.array(labels)

# Round the feature vectors to a specified number of decimal places (e.g., 6)
feature_vectors = np.round(feature_vectors, decimals=6)

# Save the feature_vectors array as a numpy file
np.save('feature_vectors_12.npy', feature_vectors)

dataframe_vectors = [vector.flatten() for vector in feature_vectors]

# Check if the lengths of feature_vectors and labels are the same
if len(dataframe_vectors) == len(labels):
    # Create a pandas DataFrame with the feature vectors and labels
    data = pd.DataFrame({'Label': labels, 'Feature Vector': dataframe_vectors})

    # Save the DataFrame as a CSV file
    data.to_csv('annotated_feature_vectors_12.csv', index=False)
else:
    print("Error: Length mismatch between feature vectors and labels.")


./LLD-logo_files/LLD-logo-files/worldoptions.png
./LLD-logo_files/LLD-logo-files/worldoweb.png
./LLD-logo_files/LLD-logo-files/worldpac.png
./LLD-logo_files/LLD-logo-files/worldpackers.png
./LLD-logo_files/LLD-logo-files/worldpadeltour.png
./LLD-logo_files/LLD-logo-files/worldpainter.png
./LLD-logo_files/LLD-logo-files/worldpay.png
./LLD-logo_files/LLD-logo-files/worldpolicy.png
./LLD-logo_files/LLD-logo-files/worldpoliticsreview.png
./LLD-logo_files/LLD-logo-files/worldpressphoto.png
./LLD-logo_files/LLD-logo-files/worldrag.png
./LLD-logo_files/LLD-logo-files/worldreader.png
./LLD-logo_files/LLD-logo-files/worldremit.png
./LLD-logo_files/LLD-logo-files/worldrowing.png
./LLD-logo_files/LLD-logo-files/worlds-away.png
./LLD-logo_files/LLD-logo-files/worldsbestbars.png
./LLD-logo_files/LLD-logo-files/worldsciencefestival.png
./LLD-logo_files/LLD-logo-files/worldscienceu.png
./LLD-logo_files/LLD-logo-files/worldscreen.png
./LLD-logo_files/LLD-logo-files/worldshare.png
./LLD-logo_files/LLD-

In [44]:
feature_vectors = np.load('feature_vectors_12.npy')


In [49]:
query_image_path = 'C:/Users/ACER/Documents/GitHub/mini-project/LLD-logo_files/fakes/yabupushelberg2.png'
query_feature_vector = get_feature_vector(query_image_path)


C:/Users/ACER/Documents/GitHub/mini-project/LLD-logo_files/fakes/yabupushelberg2.png


In [50]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_scores = cosine_similarity(query_feature_vector.reshape(1, -1), feature_vectors)


In [51]:
sorted_indices = np.argsort(similarity_scores)[0][::-1]


In [52]:
k = 10
top_k_indices = sorted_indices[:k]
similar_images = []
for index in top_k_indices:
    similar_image_path = filenames[index]  # Assuming filenames contains the paths of the images
    similarity_score = similarity_scores[0][index]
    similar_images.append((similar_image_path, similarity_score))


In [53]:
similar_images

[('yabupushelberg.png', 0.7308096),
 ('xyht.png', 0.44533214),
 ('zkipster.png', 0.41436666),
 ('yosisamra.png', 0.4047302),
 ('worldversus.png', 0.39549258),
 ('yabancidizi.png', 0.39183703),
 ('zizki.png', 0.38815054),
 ('zems.png', 0.38811046),
 ('zago.png', 0.3856465),
 ('zindigo.png', 0.37309796)]

In [54]:
import os
import shutil


source_dir = '.\\LLD-logo_files\\LLD-logo-files\\'
destination_dir = './results/'

# Create the destination directory if it doesn't exist
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Copy the similar images to the destination directory
for image_name, _ in similar_images:
    source_path = os.path.join(source_dir, image_name)
    destination_path = os.path.join(destination_dir, image_name)
    shutil.copyfile(source_path, destination_path)

print("Image files copied successfully!")


Image files copied successfully!
