In [8]:
import pandas as pd
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D
import tensorflow as tf

In [6]:


base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(239, 239, 3))

x = GlobalAveragePooling2D()(base_model.output)
model = Model(inputs=base_model.input, outputs=x)



In [9]:
# Create a sequential model.
model = tf.keras.Sequential([

    # Add a dense layer with 128 units and the relu activation function.
    tf.keras.layers.Dense(128, activation='relu'),

    # Add a dense layer with 64 units and the relu activation function.
    tf.keras.layers.Dense(256, activation='relu'),

    tf.keras.layers.Dense(256, activation='relu'),

    tf.keras.layers.Dense(128, activation='relu'),

    # Add a dense layer with 1 unit and the sigmoid activation function.
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [10]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:


csv_file_path = 'relations_full_links.csv'
df = pd.read_csv(csv_file_path)

new_columns = ['color1', 'fabric1', 'category1', 'embedding1', 'color2', 'fabric2', 'category2', 'embedding2']
df_new = pd.DataFrame(columns=new_columns)

df_subset = df.iloc[0:1000]
i = 0
for index, row in df_subset.iterrows():
    try:
        img_path = '../datathon/images/' + row['image1']
        img = image.load_img(img_path, target_size=(239, 239))
        img_array = image.img_to_array(img)
        img_array = preprocess_input(img_array)
        img_array = np.expand_dims(img_array, axis=0)
        output1 = model.predict(img_array)

        img_path = '../datathon/images/' + row['image2']
        img = image.load_img(img_path, target_size=(239, 239))
        img_array = image.img_to_array(img)
        img_array = preprocess_input(img_array)
        img_array = np.expand_dims(img_array, axis=0)
        output2 = model.predict(img_array)
    except:
        continue
    new_row = {
        'color1': row['color1'],  # Modify or use values from the original row
        'fabric1':  row['fabric1'],  # Assign a custom value
        'category1': row['category1'],
        'embedding1': output1,
        'color2': row['color2'],  # Modify or use values from the original row
        'fabric2':  row['fabric2'],  # Assign a custom value
        'category2': row['category2'],
        'embedding2': output2,
        #'image': embeeding(row['des_filename'])  # Assign a custom value 
    }
    df_new = pd.concat([df_new, pd.DataFrame([new_row])], ignore_index=True)
    
    i += 1
    if i %20 == 0:
        print(i)

print("New DataFrame with user-defined columns:")
print(df_new.head())

20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
820
840
860
880
900
920
940
960
980
1000
New DataFrame with user-defined columns:
  color1 fabric1 category1                                         embedding1  \
0      1       3         1  [[[[0.3635261], [0.3635261], [0.3635261], [0.3...   
1     10       4         7  [[[[0.3706159], [0.37139705], [0.37139705], [0...   
2      1       4         3  [[[[0.35642537], [0.35642537], [0.35642537], [...   
3     10       4         3  [[[[0.36479807], [0.3706159], [0.36983535], [0...   
4      1       4         4  [[[[0.35642537], [0.35642537], [0.35642537], [...   

  color2 fabric2 category2                                         embedding2  
0      1       1         2  [[[[0.35792688], [0.35932282], [0.35932282], [...  
1     10       4         3  [[[[0.37139705], [0.37139705], [0.37139705], [...  
2      1       4         7  [[[[0.3

In [17]:
from sklearn.model_selection import train_test_split

print(df_new)
X = df_new.iloc[:, :8]

y = df_new.iloc[:, 8]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

    color1 fabric1 category1  \
0        1       3         1   
1       10       4         7   
2        1       4         3   
3       10       4         3   
4        1       4         4   
..     ...     ...       ...   
995      1       4         4   
996      9       4         7   
997      4       1         1   
998      8       1         5   
999      8       4         8   

                                            embedding1 color2 fabric2  \
0    [[[[0.3635261], [0.3635261], [0.3635261], [0.3...      1       1   
1    [[[[0.3706159], [0.37139705], [0.37139705], [0...     10       4   
2    [[[[0.35642537], [0.35642537], [0.35642537], [...      1       4   
3    [[[[0.36479807], [0.3706159], [0.36983535], [0...      2       3   
4    [[[[0.35642537], [0.35642537], [0.35642537], [...      1       4   
..                                                 ...    ...     ...   
995  [[[[0.35642537], [0.35642537], [0.35642537], [...     10       4   
996  [[[[0.39314234], [0.393142

IndexError: single positional indexer is out-of-bounds

In [None]:
# Evaluate the model.
loss, accuracy = model.evaluate(X_test, y_test)

# Print the accuracy of the model.
print('Accuracy:', accuracy)

In [None]:
model.fit(X_train, y_train, epochs=50)

In [13]:
!pip3 install -U scikit-learn scipy matplotlib

Collecting scikit-learn
  Downloading scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scipy
  Downloading scipy-1.11.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m361.0 kB/s[0m eta [36m0:00:00[0mMB/s[0m eta [36m0:00:01[0m
[?25hCollecting matplotlib
  Downloading matplotlib-3.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting joblib>=1.1.1 (from scikit-learn)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Downloading threadpoolctl-3.2.0-py3-none-any.whl.metadata (10.0 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-