In [1]:
import pandas as pd
rway_img_df_ok = pd.read_csv("collection_images_download.csv")
rway_img_df = rway_img_df_ok.copy()

# EXTRACT BRAND FROM IMAGE NAME
# Creazione delle nuove colonne
rway_img_df['brand'] = rway_img_df['image_name'].str.lower()
rway_img_df['brand'] = rway_img_df['brand'].str.split('couture').str[0]
rway_img_df['brand'] = rway_img_df['brand'].str.split('ready-to-wear').str[0]
rway_img_df['brand'] = rway_img_df['brand'].str.split('menswear').str[0]
rway_img_df['brand'] = rway_img_df['brand'].str.replace("-", " ")
rway_img_df['brand'] = rway_img_df['brand'].str.strip()

In [2]:
# subset rway_img_df where brand cointains versage,hermes
brand_keywords = ['hermess','versace','dior','chanel','rick owens', 'givenchy','mugler']
subset = rway_img_df[rway_img_df['brand'].isin(brand_keywords)]

In [3]:
subset['brand'].unique()

array(['mugler', 'chanel', 'dior', 'rick owens', 'givenchy', 'versace'],
      dtype=object)

In [5]:
# ... (your code to load the data and preprocess it)
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np

img_height, img_width = 224, 224

# Define the preprocess_image function to load, resize, and normalize the pixel values of the images.
def preprocess_image(image_path):
    image_path = './imgs/images/images/' + image_path  # Add the path prefix
    img = image.load_img(image_path, target_size=(img_height, img_width))
    x = image.img_to_array(img)
    x = np.expand_dims(x,axis=0)
    x = preprocess_input(x)
    return x

In [8]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Dropout, Input, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam

# Define the custom AttentionBlock
class AttentionBlock(tf.keras.layers.Layer):
    def _init_(self, in_features_l, in_features_g, attn_features, up_factor, normalize_attn=True):
        super(AttentionBlock, self)._init_()
        self.up_factor = up_factor
        self.normalize_attn = normalize_attn
        self.W_l = tf.keras.layers.Conv2D(filters=attn_features, kernel_size=1, padding='same', activation='relu')
        self.W_g = tf.keras.layers.Conv2D(filters=attn_features, kernel_size=1, padding='same', activation='relu')
        self.phi = tf.keras.layers.Conv2D(filters=1, kernel_size=1, padding='same')

    def call(self, l, g):
        N, H, W, C = l.shape
        l_ = self.W_l(l)
        g_ = self.W_g(g)
        if self.up_factor > 1:
            g_ = tf.image.resize(g_, size=(H, W), method='bilinear')
        c = self.phi(tf.nn.relu(l_ + g_))  # batch_size x 1 x W x H

        # compute attn map
        if self.normalize_attn:
            a = tf.nn.softmax(tf.reshape(c, (N, 1, -1)), axis=2)
            a = tf.reshape(a, (N, 1, H, W))
        else:
            a = tf.sigmoid(c)
        # re-weight the local feature
        f = tf.multiply(a, l)  # batch_size x C x W x H
        if self.normalize_attn:
            output = tf.reduce_sum(f, axis=[2, 3])  # weighted sum
        else:
            output = tf.reduce_mean(f, axis=[2, 3])  # global average pooling
        return a, output

# Define the custom AttnVGG model
class AttnVGG(Model):
    def _init_(self, num_classes, normalize_attn=False, dropout=None):
        super(AttnVGG, self)._init_()
        self.vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        self.conv_block1 = tf.keras.Sequential(self.vgg16.layers[:6])
        self.conv_block2 = tf.keras.Sequential(self.vgg16.layers[7:13])
        self.conv_block3 = tf.keras.Sequential(self.vgg16.layers[14:20])
        self.conv_block4 = tf.keras.Sequential(self.vgg16.layers[21:27])
        self.conv_block5 = tf.keras.Sequential(self.vgg16.layers[28:34])
        self.pool = tf.keras.layers.MaxPool2D(pool_size=(7, 7), strides=1)
        self.dpt = None
        if dropout is not None:
            self.dpt = Dropout(dropout)
        self.cls = Dense(units=num_classes)

        # initialize the attention blocks defined above
        self.attn1 = AttentionBlock(256, 512, 256, 4, normalize_attn=normalize_attn)
        self.attn2 = AttentionBlock(512, 512, 256, 2, normalize_attn=normalize_attn)

    def call(self, x):
        block1 = self.conv_block1(x)       # /1
        block2 = self.conv_block2(block1)  # /2
        block3 = self.conv_block3(block2)  # /4
        block4 = self.conv_block4(block3)  # /8
        block5 = self.conv_block5(block4)  # /16
        N, H, W, C = block5.shape

        g = self.pool(block5)
        g = Flatten()(g)
        a1, g1 = self.attn1(block3, block5)
        a2, g2 = self.attn2(block4, block5)
        g_hat = tf.concat((g, g1, g2), axis=1)  # batch_size x C
        if self.dpt is not None:
            g_hat = self.dpt(g_hat)
        out = self.cls(g_hat)

        return [out, a1, a2]

# Load the image data and labels
# Split the data into training and validation sets
X_train_paths, X_val_paths, y_train, y_val = train_test_split(
    subset['image_path'],
    subset['brand'],
    test_size=0.2,
    random_state=1234
)

# Define image dimensions (img_height and img_width) to resize the images during preprocessing.
img_height, img_width = 224, 224

# Map the brand labels in y_train and y_val to their corresponding integer labels using the brand_to_label dictionary.
brand_to_label = {brand: i for i, brand in enumerate(np.unique(subset['brand']))}

# Map brand labels to integer labels
y_train = np.array([brand_to_label[brand] for brand in y_train])
y_val = np.array([brand_to_label[brand] for brand in y_val])

# Calculate the number of classes (num_classes) based on the unique labels.
num_classes = len(brand_to_label)

# Preprocess the images in X_train and X_val by applying the preprocess_image function to each image path.
X_train = np.array([preprocess_image(path) for path in X_train_paths])
X_val = np.array([preprocess_image(path) for path in X_val_paths])

# Convert the integer labels in y_train and y_val to one-hot encoded vectors using tf.keras.utils.to_categorical.
y_train = tf.keras.utils.to_categorical(y_train, num_classes=num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes=num_classes)

# Create the AttnVGG model
normalize_attn = False  # Set to True if you want to normalize attention weights
dropout = 0.5  # Set to None if you don't want dropout, otherwise set to a dropout rate

# Input layer
input_shape = (img_height, img_width, 3)
inputs = Input(shape=input_shape)

# Create the AttnVGG model
model = AttnVGG(num_classes=num_classes, normalize_attn=normalize_attn, dropout=dropout)(inputs)

# Define the model with the input and output
model = Model(inputs=inputs, outputs=model[0])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.1), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using the preprocessed data
history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_val, y_val))

TypeError: ('Keyword argument not understood:', 'num_classes')