In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load dataset
product_data = pd.read_csv('/content/product_data.csv')
user_preferences = pd.read_csv('/content/user_preferences.csv')

In [3]:
# Merge product_data and user_preferences
# data_merge = pd.merge(product_data, user_preferences, on=['Product_Colour', 'Product_Quality', 'Product_Size', 'Product_Thickness'])

In [4]:
# data_merge combines product_data and user_preferences
# but we use a dummy dataset that has already adjusted its attributes because when combining product_data and user_preferences there are missing values.
data_merge = pd.read_csv('/content/dummy.csv')
data_merge = data_merge.drop(columns=['Product_Price', 'Product_Image'])

In [5]:
data_merge.head()

Unnamed: 0,Product_ID,Product_Name,Product_Colour,Product_Size,Product_Quality,Product_Thickness,Customer_ID
0,1,[Cotton Combed] Black T-Shirt S 20s,Black,S,Cotton Combed,20s,CS036
1,1,[Cotton Combed] Black T-Shirt S 20s,Black,S,Cotton Combed,0,CS014
2,2,[Cotton Combed] Grey T-Shirt S 20s,Grey,S,Cotton Combed,20s,CS031
3,3,[Cotton Combed] Maroon T-Shirt S 20s,Maroon,S,Cotton Combed,20s,CS036
4,3,[Cotton Combed] Maroon T-Shirt S 20s,Maroon,S,Cotton Combed,0,CS028


In [6]:
data_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 451 entries, 0 to 450
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Product_ID         451 non-null    int64 
 1   Product_Name       451 non-null    object
 2   Product_Colour     451 non-null    object
 3   Product_Size       451 non-null    object
 4   Product_Quality    451 non-null    object
 5   Product_Thickness  451 non-null    object
 6   Customer_ID        451 non-null    object
dtypes: int64(1), object(6)
memory usage: 24.8+ KB


In [7]:
# Encoding labels on category features
encoder = LabelEncoder()
data_merge['Product_Colour'] = encoder.fit_transform(data_merge['Product_Colour'])
data_merge['Product_Quality'] = encoder.fit_transform(data_merge['Product_Quality'])
data_merge['Product_Size'] = encoder.fit_transform(data_merge['Product_Size'])
data_merge['Product_Thickness'] = encoder.fit_transform(data_merge['Product_Thickness'])

In [8]:
data_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 451 entries, 0 to 450
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Product_ID         451 non-null    int64 
 1   Product_Name       451 non-null    object
 2   Product_Colour     451 non-null    int64 
 3   Product_Size       451 non-null    int64 
 4   Product_Quality    451 non-null    int64 
 5   Product_Thickness  451 non-null    int64 
 6   Customer_ID        451 non-null    object
dtypes: int64(5), object(2)
memory usage: 24.8+ KB


In [9]:
data_merge.head()

Unnamed: 0,Product_ID,Product_Name,Product_Colour,Product_Size,Product_Quality,Product_Thickness,Customer_ID
0,1,[Cotton Combed] Black T-Shirt S 20s,0,2,0,1,CS036
1,1,[Cotton Combed] Black T-Shirt S 20s,0,2,0,0,CS014
2,2,[Cotton Combed] Grey T-Shirt S 20s,1,2,0,1,CS031
3,3,[Cotton Combed] Maroon T-Shirt S 20s,2,2,0,1,CS036
4,3,[Cotton Combed] Maroon T-Shirt S 20s,2,2,0,0,CS028


In [10]:
# Split features and targets
features = data_merge[['Product_Colour', 'Product_Quality', 'Product_Size', 'Product_Thickness']]
target = encoder.fit_transform(data_merge['Product_Name'])

In [11]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [12]:
# Building a content-based recommendation model
input_colour = tf.keras.Input(shape=(1,))
input_quality = tf.keras.Input(shape=(1,))
input_size = tf.keras.Input(shape=(1,))
input_thickness = tf.keras.Input(shape=(1,))

embedding_dim = 10

embedding_colour = tf.keras.layers.Embedding(len(encoder.classes_), embedding_dim)(input_colour)
embedding_quality = tf.keras.layers.Embedding(len(encoder.classes_), embedding_dim)(input_quality)
embedding_size = tf.keras.layers.Embedding(len(encoder.classes_), embedding_dim)(input_size)
embedding_thickness = tf.keras.layers.Embedding(len(encoder.classes_), embedding_dim)(input_thickness)

flatten_colour = tf.keras.layers.Flatten()(embedding_colour)
flatten_quality = tf.keras.layers.Flatten()(embedding_quality)
flatten_size = tf.keras.layers.Flatten()(embedding_size)
flatten_thickness = tf.keras.layers.Flatten()(embedding_thickness)

concatenated = tf.keras.layers.Concatenate()([flatten_colour, flatten_quality, flatten_size, flatten_thickness])

dense1 = tf.keras.layers.Dense(64, activation='relu')(concatenated)
dense2 = tf.keras.layers.Dense(32, activation='relu')(dense1)
output = tf.keras.layers.Dense(len(encoder.classes_), activation='softmax')(dense2)

model = tf.keras.Model(inputs=[input_colour, input_quality, input_size, input_thickness], outputs=output)

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
# Training the model
model.fit([X_train['Product_Colour'], X_train['Product_Quality'], X_train['Product_Size'], X_train['Product_Thickness']], y_train, epochs=50, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fc4dc6f1b10>

In [14]:
desired_user_id = 'CS006'  # Replace with desired user ID

for i, user_pref in data_merge.iterrows():
    user_id = user_pref['Customer_ID']

    if user_id == desired_user_id:
        user_features = user_pref[['Product_Colour', 'Product_Quality', 'Product_Size', 'Product_Thickness']]

        # Encode user features
        encoded_features = encoder.fit_transform(user_features)

        # Expand dimensions to match the model input shape
        user_input = [np.expand_dims(encoded_features[j], axis=0) for j in range(len(encoded_features))]

        # Predict the target probabilities for the user
        user_probabilities = model.predict(user_input)

        # Compute cosine similarity between user preferences and all product features
        similarities = cosine_similarity(np.mean(user_probabilities, axis=0).reshape(1, -1), model.predict([X_test['Product_Colour'], X_test['Product_Quality'], X_test['Product_Size'], X_test['Product_Thickness']]))

        # Get index of the most similar product
        top_index = similarities[0].argsort()[-3:][::-1]

        # Get the top product name
        top_products = data_merge.loc[top_index, 'Product_Name']

        print(f"Top product for {user_id}: {top_products}")
        break

Top product for CS006: 62            [Spandex] Mustard T-Shirt S
83     [Cotton Combed] Grey T-Shirt M 20s
0     [Cotton Combed] Black T-Shirt S 20s
Name: Product_Name, dtype: object


In [15]:
keras_file = "model.h5"
tf.keras.models.save_model(model, keras_file)
converter = tf.lite.TFLiteConverter.from_keras_model(model)

tflite_model = converter.convert()
open("model.tflite", "wb").write(tflite_model)



67644