In [475]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import load_model


## 1- Preprocessing the data

In [429]:
data = pd.read_csv('all_data.csv',index_col=0)

In [430]:
data.head()

Unnamed: 0,image_link,type,color,combi_image_link,combi_type,combi_color,match
0,A-Mango-images/A0,Shirts,Ecru,B-Mango-images/B0,Skirts,Ecru,1
1,A-Mango-images/A1,Shirts,Ecru,B-Mango-images/B1,Bags,Brown,1
2,A-Mango-images/A2,Shirts,Ecru,B-Mango-images/B2,Jewellery,Gold,1
3,A-Mango-images/A3,Shirts,Pink,B-Mango-images/B3,Trousers,Pink,1
4,A-Mango-images/A4,Shirts,Pink,B-Mango-images/B4,Shoes,Silver,1


In [431]:
new_data = pd.read_csv('all_data.csv',index_col=0)
new_data = new_data.rename(columns={'image_link': 'combi_image_link', 'type':'combi_type','color':'combi_color',
             'combi_image_link':'image_link','combi_type':'type','combi_color':'color'})

In [432]:
new_data.head()

Unnamed: 0,combi_image_link,combi_type,combi_color,image_link,type,color,match
0,A-Mango-images/A0,Shirts,Ecru,B-Mango-images/B0,Skirts,Ecru,1
1,A-Mango-images/A1,Shirts,Ecru,B-Mango-images/B1,Bags,Brown,1
2,A-Mango-images/A2,Shirts,Ecru,B-Mango-images/B2,Jewellery,Gold,1
3,A-Mango-images/A3,Shirts,Pink,B-Mango-images/B3,Trousers,Pink,1
4,A-Mango-images/A4,Shirts,Pink,B-Mango-images/B4,Shoes,Silver,1


In [433]:
data = pd.concat([data,new_data])
data.reset_index(drop=True, inplace=True)

In [434]:
data.head()

Unnamed: 0,image_link,type,color,combi_image_link,combi_type,combi_color,match
0,A-Mango-images/A0,Shirts,Ecru,B-Mango-images/B0,Skirts,Ecru,1
1,A-Mango-images/A1,Shirts,Ecru,B-Mango-images/B1,Bags,Brown,1
2,A-Mango-images/A2,Shirts,Ecru,B-Mango-images/B2,Jewellery,Gold,1
3,A-Mango-images/A3,Shirts,Pink,B-Mango-images/B3,Trousers,Pink,1
4,A-Mango-images/A4,Shirts,Pink,B-Mango-images/B4,Shoes,Silver,1


In [435]:
data['type_indices'] = pd.factorize(data['type'])[0]
data['color_indices'] = pd.factorize(data['color'])[0]
data['combi_type_indices'] = pd.factorize(data['combi_type'])[0]
data['combi_color_indices'] = pd.factorize(data['combi_color'])[0]

In [436]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

## 2- Image Preprocessing

In [437]:
def preprocess_image(image_path):
    image = tf.io.read_file(image_path + '.jpg')
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resizing the image to 224x224
    image = image / 255.0  # Normalizing to [0,1]
    return image

def process_row(row):
    # Process images
    image = preprocess_image(row['image_link'])
    combi_image = preprocess_image(row['combi_image_link'])
    
    type = tf.one_hot(row['type_indices'], depth=len(data['type'].unique()))
    combi_type = tf.one_hot(row['combi_type_indices'], depth=len(data['combi_type'].unique()))
    color = tf.one_hot(row['color_indices'], depth=len(data['color'].unique()))
    combi_color = tf.one_hot(row['combi_color_indices'], depth=len(data['combi_color'].unique()))
    
    # Combine all features
    features = (image, combi_image, type, combi_type, color, combi_color)
    label = row['match']
    
    return features, label

def tf_dataset(df, batch_size=32):
    ds = tf.data.Dataset.from_tensor_slices(dict(df))
    ds = ds.map(process_row, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

# Create TensorFlow datasets
train_ds = tf_dataset(train)
test_ds = tf_dataset(test)

## 3- Model Architecture

In [438]:
# Base model for feature extraction
base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model

# Model architecture
def build_model():
    # Inputs
    input_image1 = layers.Input(shape=(224, 224, 3))
    input_image2 = layers.Input(shape=(224, 224, 3))
    input_category1 = layers.Input(shape=(len(data['type'].unique()),))
    input_category2 = layers.Input(shape=(len(data['combi_type'].unique()),))
    input_color1 = layers.Input(shape=(len(data['color'].unique()),))
    input_color2 = layers.Input(shape=(len(data['combi_color'].unique()),))

    # Image feature extraction
    image_feature_extractor = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
    ])

    image1_features = image_feature_extractor(input_image1)
    image2_features = image_feature_extractor(input_image2)
    
    # Combine all features
    combined_features = layers.concatenate([image1_features, image2_features, input_category1, input_category2, input_color1, input_color2])
    
    # Fully connected layers
    x = layers.Dense(512, activation='relu')(combined_features)
    x = layers.Dropout(0.5)(x)
    output = layers.Dense(1, activation='sigmoid')(x)
    
    # Model
    model = models.Model(inputs=[input_image1, input_image2, input_category1, input_category2, input_color1, input_color2], outputs=output)
    
    # Compile
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

model = build_model()
model.summary()


## 4- Training

In [439]:
history = model.fit(train_ds, validation_data=test_ds, epochs=7)

Epoch 1/7
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m904s[0m 3s/step - accuracy: 0.7285 - loss: 0.6120 - val_accuracy: 0.9485 - val_loss: 0.1573
Epoch 2/7
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m996s[0m 3s/step - accuracy: 0.9406 - loss: 0.1763 - val_accuracy: 0.9676 - val_loss: 0.1215
Epoch 3/7
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1109s[0m 3s/step - accuracy: 0.9552 - loss: 0.1283 - val_accuracy: 0.9776 - val_loss: 0.0740
Epoch 4/7
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1000s[0m 3s/step - accuracy: 0.9639 - loss: 0.1030 - val_accuracy: 0.9706 - val_loss: 0.0733
Epoch 5/7
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2359s[0m 7s/step - accuracy: 0.9656 - loss: 0.0993 - val_accuracy: 0.9857 - val_loss: 0.0641
Epoch 6/7
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m942s[0m 3s/step - accuracy: 0.9717 - loss: 0.0866 - val_accuracy: 0.9742 - val_loss: 0.0858
Epoch 7/7
[1m340/340[0m

In [440]:
model.evaluate(test_ds)


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 2s/step - accuracy: 0.9872 - loss: 0.0534


[0.05269061401486397, 0.9871228933334351]

### Save & Load Models

In [487]:
model.save_weights('my_model.weights.h5')

In [488]:
loaded_model = build_model()
loaded_model.load_weights('my_model.weights.h5')

  trackable.load_own_variables(weights_store.get(inner_path))


## 5- Prediction

In [250]:
test.to_csv('test.csv')

In [251]:
pred_subset = test.iloc[:5] 
pred_subset.drop(columns='match',inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_subset.drop(columns='match',inplace=True)


In [252]:
def process_row_for_prediction(row):
    # Process images
    image = preprocess_image(row['image_link'])
    combi_image = preprocess_image(row['combi_image_link'])
    
    # Recalculate unique values or pass them as parameters if this is computationally expensive
    type = tf.one_hot(row['type_indices'], depth=len(data['type'].unique()))  
    combi_type = tf.one_hot(row['combi_type_indices'], depth=len(data['combi_type'].unique()))
    color = tf.one_hot(row['color_indices'], depth=len(data['color'].unique()))
    combi_color = tf.one_hot(row['combi_color_indices'], depth=len(data['combi_color'].unique()))
    
    # Combine all features
    features = (image, combi_image, type, combi_type, color, combi_color)
    #labels = row['match']
    
    return features,None

In [253]:
def tf_dataset_for_prediction(df, batch_size=32):
    ds = tf.data.Dataset.from_tensor_slices(dict(df))
    ds = ds.map(process_row_for_prediction, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

In [254]:
predict_ds = tf_dataset_for_prediction(pred_subset)

In [490]:
# Make predictions
predictions = model.predict(predict_ds)

# Convert predictions to a binary outcome based on a threshold
predicted_matches = (predictions > 0.5).astype(int)

# Attach these predictions to your DataFrame (Optional)
pred_subset['predicted_match'] = predicted_matches.flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


In [491]:
pred_subset

Unnamed: 0,image_link,type,color,combi_image_link,combi_type,combi_color,type_indices,color_indices,combi_type_indices,combi_color_indices,predicted_match
1061,A-Mango-images/A1061,Dresses,Burgundy,B-Mango-images/B1061,Shoes,White,3,18,4,9,1
5890,A-Zara-images/A2097,Tops,Grey,B-Zara-images/B2097,Trousers,Red,6,6,3,17,0
3562,A-Mango-images/A3562,Jeans,Charcoal,B-Mango-images/B3562,Jackets,Brown,18,21,12,1,1
4660,A-Zara-images/A867,Sweaters,Ecru,B-Zara-images/B867,Shorts,Black,5,0,10,6,0
11467,B-Zara-images/B881,Skirts,Red,A-Zara-images/A881,Blazers,Red,17,13,9,17,0


## 6- Wardrobe

In [358]:
bottom = ['Trousers','Skirts','Jeans','Joggers','Shorts','Jumpsuits','Dresses and jumpsuits','Dresses']
top = ['Tops','Shirts','Sweaters','Dresses','Tshirts','Hoodie','Cardigans','Sweaters and cardigans','Blouses and shirts','Blouses','Sweatshirts','Dresses and jumpsuits','Jumpsuits']
jackets = ['Jackets', 'Gilets','Trench coats', 'Quilted coats/Padded', 'Blazers', 'Suit jackets',
       'Coats']

In [543]:
wardrobe =  test.iloc[:400][['image_link','type','color']] 
#wardrobe_combi = test.iloc[20:60][['combi_image_link','combi_type','combi_color']] 

In [544]:
wardrobe
wardrobe.to_csv('wardrobe.csv',index=False)

In [381]:
def make_combins(selected_row, other_rows):
    combin = pd.DataFrame()
    for idx, row in other_rows.iterrows():  # Iterate over rows of the Serie 
        if row['type'] == selected_row['type']:
            continue
        bottom = ['Trousers','Skirts','Jeans','Joggers','Shorts','Jumpsuits','Dresses and jumpsuits']
        top = ['Tops','Shirts','Sweaters','Dresses','Tshirts','Hoodie','Cardigans','Sweaters and cardigans','Blouses and shirts','Blouses','Sweatshirts','Dresses and jumpsuits','Jumpsuits']
        if row['type'] in bottom and selected_row['type'] in bottom:
            continue
        if row['type'] in top and selected_row['type'] in top:
            continue
        combination = {
            'image_link': selected_row['image_link'],
            'type': selected_row['type'],
            'color': selected_row['color'],
            'combi_image_link': row['image_link'],
            'combi_type':  row['type'],
            'combi_color': row['color']
        }  
        combin = pd.concat([combin, pd.DataFrame([combination])], ignore_index=True)
    return combin

In [448]:
def find_pairs(selected_row, other_rows):
    combins = make_combins(selected_row, other_rows)
    combins['type_indices'] = pd.factorize(combins['type'])[0]
    combins['color_indices'] = pd.factorize(combins['color'])[0]
    combins['combi_type_indices'] = pd.factorize(combins['combi_type'])[0]
    combins['combi_color_indices'] = pd.factorize(combins['combi_color'])[0]
    predict_ds = tf_dataset_for_prediction(combins)
    predictions = model.predict(predict_ds)

    predicted_matches = (predictions > 0.90).astype(int)
    combins['predicted_match'] = predictions

    matches = combins[predicted_matches.flatten() == 1]
    return matches

In [449]:
matches = find_pairs(wardrobe.iloc[7], wardrobe)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


In [450]:
matches[['image_link','type','combi_image_link','combi_type','predicted_match']]

Unnamed: 0,image_link,type,combi_image_link,combi_type,predicted_match
1,A-Mango-images/A2308,Gilets,B-Zara-images/B1965,Skirts,0.918652
2,A-Mango-images/A2308,Gilets,A-Mango-images/A99,Shirts,0.998644
4,A-Mango-images/A2308,Gilets,B-Mango-images/B1252,Bags,0.988635
5,A-Mango-images/A2308,Gilets,B-Zara-images/B2595,Skirts,0.998094
6,A-Mango-images/A2308,Gilets,B-Mango-images/B2004,Shoes,0.97688
7,A-Mango-images/A2308,Gilets,A-Mango-images/A353,Shirts,0.996054
8,A-Mango-images/A2308,Gilets,B-Zara-images/B1285,Skirts,0.998206
9,A-Mango-images/A2308,Gilets,B-Mango-images/B3065,Shoes,0.985019
11,A-Mango-images/A2308,Gilets,A-Mango-images/A2483,Suit jackets,0.990936
12,A-Mango-images/A2308,Gilets,A-Mango-images/A2144,Cardigans,0.993714


In [308]:
from IPython.display import display, Image, HTML

def show_matches(matches):
    for index, row in matches.iterrows():
        display(pd.DataFrame([{
            'Image Link': row['image_link'],
            'Type': row['type'],
            'Combi Image Link': row['combi_image_link'],
            'Combi Type': row['combi_type'],
            'Predicted Match': row['predicted_match']
        }]))
        display(HTML(f"<img src='{row['image_link'] + '.jpg'}' style='width:200px; display:inline-block;'><img src='{row['combi_image_link'] + '.jpg'}' style='width:200px; display:inline-block;'>"))
        print("\n")


In [None]:
show_matches(matches[['image_link', 'type', 'combi_image_link', 'combi_type', 'predicted_match']])

In [146]:
def make_all_combins(item_df):
    all_combins = pd.DataFrame()
    for index, selected_row in item_df.iterrows():
        other_rows = item_df.drop(index)  # Exclude the selected row from other_rows
        combin = make_combins(selected_row, other_rows)
        all_combins = all_combins.append(combin, ignore_index=True)
    return all_combins

In [541]:
save_list = pd.DataFrame({'combin_id':[]})

In [542]:
save_list.to_csv('my library/save_list.csv',index=False)