In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import pandas as pd

### Enable GPU, (if available)

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)

if gpus:
  try:    
    for gpu in gpus:
      tf.config.experimental.set_visible_devices(gpu, 'GPU')
      tf.config.experimental.set_memory_growth(gpu, True)

    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except Exception as e:
    print("EXCEPTION")
    print(e)

[]


### Load metadata from disk and determine image paths. Break data into training and test set.

In [3]:
df_all = pd.read_csv('./data/clothing/fashion.csv').sample(frac = 1)
df_all['ImagePath'] = df_all.apply(lambda row: f'./data/clothing/{row.Category}/{row.Gender}/Images/images_with_product_ids/{row.Image}', axis = 1)
df = df_all[:int(df_all.shape[0] * 0.7)]
df_test = df_all[int(df_all.shape[0] * 0.7):]

### Initialize vectorizer for text embeddings

In [4]:
raw_titles = df['ProductTitle'].tolist()
output_sequence_length = 32
max_tokens = 1024

vectorizer = keras.layers.TextVectorization(
    max_tokens = max_tokens, output_mode = 'int', output_sequence_length = output_sequence_length
)

vectorizer.adapt([title for title in raw_titles])

### Prepare image and text features

In [5]:
def load_image(image_path):
    image_raw = tf.io.read_file(image_path)
    image_tensor = tf.image.decode_jpeg(image_raw, channels = 3)
    image_tensor = tf.image.convert_image_dtype(image_tensor, tf.float32)
    image_resized =  tf.image.resize(image_tensor, [224, 224])
    return image_resized
    
def featureize(df):
    return (
        tf.stack([vectorizer(title) for title in df['ProductTitle'].tolist()], axis = 0), 
        tf.stack([load_image(image_path) for image_path in df['ImagePath'].tolist()], axis = 0), 
        tf.constant((df['Category'] == 'Footwear').tolist())
    )
    
training_titles, training_images, training_isfoot = featureize(df)

### Define multi-input model

In [6]:
sentence_input = keras.Input(shape = (None,), dtype = tf.int32)
x = keras.layers.Embedding(len(vectorizer.get_vocabulary()), output_sequence_length)(sentence_input)
x = keras.layers.GlobalAveragePooling1D()(x)
sentence_output = keras.layers.Dense(32, activation = 'relu')(x)

image_input = keras.Input(shape = (224, 224, 3))
x = keras.layers.Rescaling(1.0 / 255)(image_input)
x = keras.layers.Conv2D(192, kernel_size = 2, activation = 'relu')(x)
x = keras.layers.MaxPooling2D(pool_size = (2, 2))(x)
x = keras.layers.Conv2D(192, kernel_size = 2, activation = 'relu')(x)
x = keras.layers.MaxPooling2D()(x)
x = keras.layers.Flatten()(x)
image_output = keras.layers.Dense(32, activation = 'relu')(x)

merged = keras.layers.concatenate([sentence_output, image_output], axis = -1)
output = keras.layers.Dense(1, activation = 'sigmoid')(merged)

model = keras.models.Model([sentence_input, image_input], output)

In [None]:
### Compile and train model

In [7]:
model.compile(optimizer = 'adam', loss = 'crossentropy')
model.fit([training_titles, training_images], training_isfoot, epochs = 4, batch_size = 4)

Epoch 1/4
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 97ms/step - loss: 0.6032
Epoch 2/4
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 99ms/step - loss: 0.0263
Epoch 3/4
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 99ms/step - loss: 0.0045
Epoch 4/4
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 99ms/step - loss: 0.0017


<keras.src.callbacks.history.History at 0x31a68d8a0>

### Test model against test set

In [10]:
test_titles, test_images, test_isfoot = featureize(df_test)

dataset = tf.data.Dataset.from_tensor_slices((test_titles, test_images, test_isfoot)).batch(10)
test_results = 0
i = 0

for batch_titles, batch_images, batch_isfoot in dataset:
    batch_results = model((batch_titles, batch_images), training = False) > 0.5
    test_results += tf.math.count_nonzero(tf.reshape(batch_results, [-1]) == batch_isfoot)
    i += batch_results.shape[0]

(test_results / i).numpy().item()

2025-02-08 08:12:38.444967: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


0.9988532110091743