# Dataset

In [None]:
import os

In [None]:
os.listdir("/content/sample_data")

['anscombe.json',
 'README.md',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv']

In [None]:
from keras.layers import Dense, Flatten, GlobalMaxPooling2D
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image
from keras.models import Sequential, Model
import numpy as np
from numpy.linalg import norm
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import json
%matplotlib inline


In [None]:
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

for layer in base_model.layers:
  layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
model = Sequential([
    base_model,
    GlobalMaxPooling2D()
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_max_pooling2d (Glob  (None, 2048)              0         
 alMaxPooling2D)                                                 
                                                                 
Total params: 23587712 (89.98 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 23587712 (89.98 MB)
_________________________________________________________________


In [None]:
def get_embedding(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    expanded_img_array /= 255.0
    processed_img = preprocess_input(expanded_img_array)
    embedding = model.predict(processed_img).flatten()
    embedding = embedding / norm(embedding)
    return embedding

# Loading Dataset from Kaggle

In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"hbilalzia","key":"6cc4db74cd39e328e73b56242d7ef06d"}'}

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download dnepozitek/polyvore-outfits

Dataset URL: https://www.kaggle.com/datasets/dnepozitek/polyvore-outfits
License(s): unknown
Downloading polyvore-outfits.zip to /content
100% 6.21G/6.21G [03:45<00:00, 35.1MB/s]
100% 6.21G/6.21G [03:45<00:00, 29.6MB/s]


In [None]:
! unzip /content/polyvore-outfits.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: polyvore_outfits/images/90489434.jpg  
  inflating: polyvore_outfits/images/90492213.jpg  
  inflating: polyvore_outfits/images/90492494.jpg  
  inflating: polyvore_outfits/images/90496029.jpg  
  inflating: polyvore_outfits/images/90497446.jpg  
  inflating: polyvore_outfits/images/90518135.jpg  
  inflating: polyvore_outfits/images/90518552.jpg  
  inflating: polyvore_outfits/images/90521419.jpg  
  inflating: polyvore_outfits/images/90524054.jpg  
  inflating: polyvore_outfits/images/90531717.jpg  
  inflating: polyvore_outfits/images/90537718.jpg  
  inflating: polyvore_outfits/images/90539482.jpg  
  inflating: polyvore_outfits/images/90540596.jpg  
  inflating: polyvore_outfits/images/90541187.jpg  
  inflating: polyvore_outfits/images/90543355.jpg  
  inflating: polyvore_outfits/images/90543409.jpg  
  inflating: polyvore_outfits/images/90543514.jpg  
  inflating: polyvore_outfits/images/90543898.jpg  

In [None]:
def josn_to_data_frame(json_list):
  filtered_data = [d for d in data if len(d["items"]) == 3]
  data_list = []
  for entry in filtered_data:
      set_id = entry["set_id"]
      items = entry["items"]
      item_ids = [item["item_id"] for item in items]
      data_list.append({
          "set_id": set_id,
          "Item1": item_ids[0],
          "Item2": item_ids[1],
          "Item3": item_ids[2]
      })
  return pd.DataFrame(data_list)

In [None]:
def txt_to_data_frame(file_path):
  scores = []
  items = []
  with open(compatibility_file, 'r') as file:
      for line in file:
          parts = line.strip().split()
          if len(parts) == 4:
            scores.append(int(parts[0]))
            items.append((parts[1].split("_")[0]))
  data = {'score': scores, 'set_id': items}
  return pd.DataFrame(data)



# Test Data Frame

In [None]:
with open('/content/polyvore_outfits/disjoint/test.json', 'r') as f:
    data = json.load(f)
test_df1 = josn_to_data_frame(data)
test_df1


Unnamed: 0,set_id,Item1,Item2,Item3
0,147517821,95204606,45445077,42472948
1,179865492,151108044,149982827,150956393
2,224163225,212224021,213037964,213850329
3,223120194,209675642,212039912,212038799
4,223258844,209154036,195503105,209178276
...,...,...,...,...
2304,204302632,174852342,176834459,178542721
2305,218343341,196891288,203417954,203346506
2306,211959947,184292036,189872176,191630851
2307,167810779,121417485,83573814,71232234


In [None]:
compatibility_file = '/content/polyvore_outfits/disjoint/compatibility_test.txt'
test_df2 = txt_to_data_frame(compatibility_file)
test_df2

Unnamed: 0,score,set_id
0,1,147517821
1,1,179865492
2,1,224163225
3,1,223120194
4,1,223258844
...,...,...
4613,0,199030327
4614,0,154745233
4615,0,85586321
4616,0,72403647


In [None]:
test_df = pd.merge(test_df1, test_df2, on='set_id', how='inner')
test_df

Unnamed: 0,set_id,Item1,Item2,Item3,score
0,147517821,95204606,45445077,42472948,1
1,179865492,151108044,149982827,150956393,1
2,224163225,212224021,213037964,213850329,1
3,223120194,209675642,212039912,212038799,1
4,223258844,209154036,195503105,209178276,1
...,...,...,...,...,...
2573,204302632,174852342,176834459,178542721,0
2574,218343341,196891288,203417954,203346506,1
2575,211959947,184292036,189872176,191630851,1
2576,167810779,121417485,83573814,71232234,1


# Train Data Frame

In [None]:
with open('/content/polyvore_outfits/disjoint/train.json', 'r') as f:
    data = json.load(f)
train_df1 = josn_to_data_frame(data)
train_df1

Unnamed: 0,set_id,Item1,Item2,Item3
0,219816430,205039314,205037505,202777492
1,222964080,209609138,210544460,174100841
2,187821847,114654675,146875636,158199633
3,170897888,130087763,127505393,129912276
4,222887917,196463164,194164707,210310276
...,...,...,...,...
1837,184119558,148121318,154298042,155186790
1838,221702059,209070427,209584693,207794259
1839,223178027,206237344,169025514,200727688
1840,217522867,195691336,195303990,200104943


In [None]:
compatibility_file = '/content/polyvore_outfits/disjoint/compatibility_train.txt'
train_df2 = txt_to_data_frame(compatibility_file)
train_df2

Unnamed: 0,score,set_id
0,1,219816430
1,1,222964080
2,1,187821847
3,1,170897888
4,1,222887917
...,...,...
3679,0,194769623
3680,0,219482753
3681,0,224599250
3682,0,198682256


In [None]:
train_df = pd.merge(train_df1, train_df2, on='set_id', how='inner')
train_df

Unnamed: 0,set_id,Item1,Item2,Item3,score
0,219816430,205039314,205037505,202777492,1
1,222964080,209609138,210544460,174100841,1
2,187821847,114654675,146875636,158199633,1
3,170897888,130087763,127505393,129912276,1
4,222887917,196463164,194164707,210310276,1
...,...,...,...,...,...
2004,221702059,209070427,209584693,207794259,1
2005,223178027,206237344,169025514,200727688,1
2006,217522867,195691336,195303990,200104943,1
2007,217522867,195691336,195303990,200104943,0


# Get Embeddings

In [None]:
image_dir = "/content/polyvore_outfits/images"
for item_col in ["Item1", "Item2", "Item3"]:
    embeddings = []
    for item_id in train_df[item_col]:
        image_path = os.path.join(image_dir, f"{item_id}.jpg")
        if os.path.exists(image_path):
            embedding = get_embedding(image_path)
            embeddings.append(embedding)
        else:
            print(f"Image {image_path} not found.")
            embeddings.append(np.full((2048,), np.nan))
    train_df[item_col + '_embedding'] = embeddings

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [None]:
image_dir = "/content/polyvore_outfits/images"
for item_col in ["Item1", "Item2", "Item3"]:
    embeddings = []
    for item_id in test_df[item_col]:
        image_path = os.path.join(image_dir, f"{item_id}.jpg")
        if os.path.exists(image_path):
            embedding = get_embedding(image_path)
            embeddings.append(embedding)
        else:
            print(f"Image {image_path} not found.")
            embeddings.append(np.full((2048,), np.nan))
    test_df[item_col + '_embedding'] = embeddings

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [None]:
train_item_1 = np.stack(train_df['Item1_embedding'].values)
train_item_2 = np.stack(train_df['Item2_embedding'].values)
train_item_3 = np.stack(train_df['Item3_embedding'].values)
train_score = train_df['score'].values

test_item_1 = np.stack(test_df['Item1_embedding'].values)
test_item_2 = np.stack(test_df['Item2_embedding'].values)
test_item_3 = np.stack(test_df['Item3_embedding'].values)
test_score = test_df['score'].values

# Outfit Model

In [None]:
from keras.layers import Input, Dense, Concatenate, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam

In [None]:
input_shape = test_item_1.shape[1]
print(input_shape)

2048


In [None]:
topwear_input = Input(shape=(input_shape,), name='topwear_input')
topwear_dense = Dense(1024, activation='relu')(topwear_input)
topwear_dense = BatchNormalization()(topwear_dense)

# Bottomwear input
bottomwear_input = Input(shape=(input_shape,), name='bottomwear_input')
bottomwear_dense = Dense(1024, activation='relu')(bottomwear_input)
bottomwear_dense = BatchNormalization()(bottomwear_dense)

# Footwear input
footwear_input = Input(shape=(input_shape,), name='footwear_input')
footwear_dense = Dense(1024, activation='relu')(footwear_input)
footwear_dense = BatchNormalization()(footwear_dense)

# Concatenate all inputs
concatenated = Concatenate()([topwear_dense, bottomwear_dense, footwear_dense])

# Dense layers
x = Dense(4096, activation='relu')(concatenated)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)


# Output layer
output = Dense(1, activation='sigmoid')(x)

In [None]:
Outfit_model = Model(inputs=[topwear_input, bottomwear_input, footwear_input], outputs=output)

In [None]:
Outfit_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 topwear_input (InputLayer)  [(None, 2048)]               0         []                            
                                                                                                  
 bottomwear_input (InputLay  [(None, 2048)]               0         []                            
 er)                                                                                              
                                                                                                  
 footwear_input (InputLayer  [(None, 2048)]               0         []                            
 )                                                                                                
                                                                                            

In [None]:
Outfit_model.compile(optimizer=Adam(learning_rate=[0.0001]), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [None]:
history = Outfit_model.fit(
    [train_item_1, train_item_2, train_item_3],
    train_score,
    epochs=50,
    batch_size=16,
    validation_data=([test_item_1, test_item_2, test_item_3], test_score),
    verbose=1
    #callbacks=[early_stopping]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
top = get_embedding('/content/WhatsApp Image 2024-05-18 at 11.41.15 AM.jpeg').reshape(1, -1)
bottom = get_embedding('/content/WhatsApp Image 2024-04-20 at 10.38.05 AM (2).jpeg').reshape(1, -1)
foot = get_embedding('/content/WhatsApp Image 2024-04-20 at 10.38.19 AM.jpeg').reshape(1, -1)
print(top.shape)
print(bottom.shape)
print(foot.shape)

(1, 2048)
(1, 2048)
(1, 2048)


In [None]:
predict = Outfit_model.predict([top,bottom,foot])



In [None]:
print(predict)

[[0.9997762]]


In [None]:
Outfit_model.save('/content/recommendation_model.h5')

  saving_api.save_model(
