In [6]:
# import nltk
# nltk.download('wordnet')

In [16]:
import os
import cv2
import numpy as np
from nltk.corpus import wordnet as wn

# Specify the path to the dataset directory
dataset_dir = '/home/a-schulz/Projects/applied_artificial_intelligence/src/week_10/Datasets'
target_size = (224, 224)

In [28]:
from tensorflow.keras.preprocessing import image as tf_image
from tensorflow.keras.applications.vgg16 import preprocess_input
# Get all images
def read_images_from_directory(directory):
    images = []
    labels = []

    class_names = []
    for root, dirs, files in os.walk(directory):
        for class_name in dirs:
            class_directory = os.path.join(root, class_name)
            for filename in os.listdir(class_directory):
                file_path = os.path.join(class_directory, filename)
                if os.path.isfile(file_path):
                    img = tf_image.load_img(file_path, target_size=(224, 224))
                    if img is not None:
                        img = tf_image.img_to_array(img)
                        img = np.expand_dims(img, axis=0)
                        img = preprocess_input(img)
                        images.append(img)
                        labels.append(class_name)
                        if class_name not in class_names:
                            class_names.append(class_name)

    images = np.array(images)
    labels = np.array(labels)
    class_names = np.array(class_names)

    return images, labels, class_names

In [22]:
import re

def remove_prefix_from_list(input_list, regex_pattern):
    """
    Remove prefix from each element of the input list using the given regex pattern.

    Parameters:
        input_list (list): The list of strings from which to remove the prefix.
        regex_pattern (str): The regular expression pattern representing the prefix to remove.

    Returns:
        list: A new list with the prefix removed from each element.
    """
    return [re.sub(regex_pattern, '', element) for element in input_list]

In [31]:
# Get labels from own dataset
dataset_dir = "/home/a-schulz/Projects/applied_artificial_intelligence/src/week_10/Datasets/"
images, labels, class_names = read_images_from_directory(dataset_dir)

labels = remove_prefix_from_list(labels, r'^[0-9]+-')

In [32]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import decode_predictions
import numpy as np

# Load the VGG16 model
model = VGG16(weights='imagenet')

# Make predictions
image_pred_labels = []
for image in images:
    preds = model.predict(image)
    decoded_preds = decode_predictions(preds, top=3)[0]
    vgg16_labels = []
    # Print the top predictions
    for _, label, prob in decoded_preds:
        vgg16_labels.append(label)
        print(f"{label}: {prob}")    
    image_pred_labels.append(vgg16_labels)
print(image_pred_labels)

kelpie: 0.8813256025314331
coyote: 0.05048876255750656
bluetick: 0.013472615741193295
Ibizan_hound: 0.6635900139808655
toy_terrier: 0.040264565497636795
English_foxhound: 0.03564338758587837
parachute: 0.2394523024559021
wing: 0.12567239999771118
geyser: 0.08932481706142426
wing: 0.4148896038532257
geyser: 0.13202735781669617
volcano: 0.1044885665178299
parachute: 0.19215597212314606
wing: 0.13615572452545166
geyser: 0.1019660010933876
tiger_cat: 0.4895579516887665
tabby: 0.3037868142127991
Egyptian_cat: 0.19396740198135376
tiger_cat: 0.7226136922836304
tabby: 0.22957421839237213
Egyptian_cat: 0.04474480450153351
megalith: 0.3911474645137787
hay: 0.0901685506105423
stone_wall: 0.06855273246765137
hay: 0.2676747143268585
fountain: 0.2384824901819229
ostrich: 0.13512930274009705
teddy: 0.18077440559864044
triceratops: 0.17558324337005615
tench: 0.1536303162574768
pelican: 0.3772205710411072
lionfish: 0.0986860916018486
dowitcher: 0.06982645392417908
sports_car: 0.8325793147087097
racer: 

In [34]:
def find_related_words(word_list, category):
    related_words = set()
    
    # Find synsets related to the category
    category_synsets = wn.synsets(category)
    
    # Iterate over the words in the list
    for word in word_list:
        # Calculate similarity between word and category synsets
        max_similarity = 0
        for synset in category_synsets:
            for word_synset in wn.synsets(word):
                similarity = synset.path_similarity(word_synset)
                if similarity and similarity > max_similarity:
                    max_similarity = similarity
        
        # Add word to related_words if similarity threshold is met
        if max_similarity > 0.15:
            related_words.add(word)
    
    return related_words

In [None]:
# Example usage
word_list = ['apple', 'banana', 'carrot', 'potato', 'pear','cars', 'vehicle', 'convertible']
category = 'transport'

# Find related words from the list to the category
related_words = find_related_words(word_list, category)

# Print the related words
print("Related words to", category + ":")
for word in related_words:
    print(word)

Related words to transport:
convertible
cars
vehicle


In [36]:
# Access prediction
# 1 if found correctly
accessment = []
for label_idx, label in enumerate(labels):
    if len(find_related_words(image_pred_labels[label_idx], label)) > 0 :
        accessment.append(1)
    else:
        accessment.append(0)
print(accessment)

[1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0]


In [38]:
# merge label, prediction label, accessment
summary = list(zip(labels,image_pred_labels,accessment))
print(summary)

accuracy = accessment.count(1)/len(labels)
print("Accuracy: ", accuracy)

[('Dog', ['kelpie', 'coyote', 'bluetick'], 1), ('Dog', ['Ibizan_hound', 'toy_terrier', 'English_foxhound'], 1), ('Cloud', ['parachute', 'wing', 'geyser'], 1), ('Cloud', ['wing', 'geyser', 'volcano'], 1), ('Cloud', ['parachute', 'wing', 'geyser'], 1), ('Cat', ['tiger_cat', 'tabby', 'Egyptian_cat'], 1), ('Cat', ['tiger_cat', 'tabby', 'Egyptian_cat'], 1), ('Tree', ['megalith', 'hay', 'stone_wall'], 0), ('Tree', ['hay', 'fountain', 'ostrich'], 1), ('Fisch', ['teddy', 'triceratops', 'tench'], 0), ('Fisch', ['pelican', 'lionfish', 'dowitcher'], 0), ('Car', ['sports_car', 'racer', 'convertible'], 1), ('Car', ['limousine', 'cab', 'sports_car'], 1), ('Computer', ['desktop_computer', 'monitor', 'desk'], 1), ('Computer', ['desktop_computer', 'monitor', 'screen'], 1), ('Train', ['streetcar', 'passenger_car', 'steam_locomotive'], 0), ('Train', ['passenger_car', 'electric_locomotive', 'freight_car'], 0), ('Building', ['flagpole', 'obelisk', 'cab'], 1), ('Building', ['fountain', 'stupa', 'palace'], 1

In [None]:
# Getting synonyms for words
from nltk.corpus import wordnet as wn

synonyms = []

for syn in wn.synsets("love"):
    for i in syn.lemmas():
        synonyms.append(i.name())

print(set(synonyms))

{'have_sex', 'dearest', 'get_it_on', 'be_intimate', 'passion', 'lovemaking', 'get_laid', 'sleep_with', 'roll_in_the_hay', 'erotic_love', 'honey', 'have_intercourse', 'make_out', 'love', 'know', 'bed', 'do_it', 'dear', 'love_life', 'making_love', 'hump', 'bonk', 'bang', 'have_it_away', 'lie_with', 'eff', 'beloved', 'sleep_together', 'make_love', 'fuck', 'screw', 'sexual_love', 'jazz', 'enjoy', 'have_it_off', 'have_a_go_at_it'}
