# Approach :
The csv file has been loaded and the embeddings are calculated using a sentence transformer. I am using this because sentence transformers are multi-modal ; they work for both images and text. The incoming image is also put through the embeddings and cosine similarity is calculated between the descriptions and the image. The article id and description of the garment that has the highest cosine similarity is returned to the chat model which then structures the response to the user around it.
GPT-4 model has the capability of ingesting images but I have assumed simple input process for simplicity.

# Packages

In [1]:
!pip -q install langchain openai sentence_transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.3/220.3 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.1/46.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m5.9 MB/s[0m e

# Libraries

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.schema.messages import SystemMessage
from langchain.schema import HumanMessage

# Uploading the articles file

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import pandas as pd
articles = pd.read_csv("/content/drive/MyDrive/Copy of articles.csv")


def concat_columns(df, cols_to_concat, new_col_name, sep=","):
    df[new_col_name] = df[cols_to_concat[0]]
    for col in cols_to_concat[1:]:
        df[new_col_name] = df[new_col_name] + sep + df[col]
    return df

cols_to_concat = ['prod_name', 'product_type_name', 'product_group_name','graphical_appearance_name', 'colour_group_name', 'garment_group_name','detail_desc']
new_col_name = 'concatenated_desc'
articles = concat_columns(articles, cols_to_concat, new_col_name, sep=",")
print(len(articles))
print(articles.columns)
articles = articles[articles['concatenated_desc'].notnull()]
articles.head(n=2)

1000
Index(['article_id', 'prod_name', 'product_type_name', 'product_group_name',
       'graphical_appearance_name', 'colour_group_name', 'garment_group_name',
       'detail_desc', 'concatenated_desc'],
      dtype='object')


Unnamed: 0,article_id,prod_name,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,garment_group_name,detail_desc,concatenated_desc
0,695255001,Siv t-shirt,T-shirt,Garment Upper body,All over pattern,Dark Blue,Jersey Fancy,Short-sleeved top in soft viscose jersey with ...,"Siv t-shirt,T-shirt,Garment Upper body,All ove..."
1,821115007,RICHIE SKIRT,Skirt,Garment Lower body,Check,Pink,Skirts,"Short, pleated skirt in woven fabric with a hi...","RICHIE SKIRT,Skirt,Garment Lower body,Check,Pi..."


## extracting embeddings of the concatenated descriptions

In [7]:
from sentence_transformers import SentenceTransformer
import warnings
warnings.filterwarnings('ignore')

articles['desc_embedding'] = ""
model_trf = SentenceTransformer('paraphrase-MiniLM-L6-v2')
desc_emb = []
for i in range(len(articles)):
      articles['desc_embedding'].iloc[i] = model_trf.encode(articles['concatenated_desc'].iloc[i])
articles.head(n=2)

.gitattributes:   0%|          | 0.00/690 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.69k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

Unnamed: 0,article_id,prod_name,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,garment_group_name,detail_desc,concatenated_desc,desc_embedding
0,695255001,Siv t-shirt,T-shirt,Garment Upper body,All over pattern,Dark Blue,Jersey Fancy,Short-sleeved top in soft viscose jersey with ...,"Siv t-shirt,T-shirt,Garment Upper body,All ove...","[-0.3154046, 0.5944378, 0.27026424, 0.09982575..."
1,821115007,RICHIE SKIRT,Skirt,Garment Lower body,Check,Pink,Skirts,"Short, pleated skirt in woven fabric with a hi...","RICHIE SKIRT,Skirt,Garment Lower body,Check,Pi...","[0.45758393, 0.13024047, 0.26896846, -0.141328..."


In [9]:
"""calculating cosine similarity"""
from numpy import dot
from numpy.linalg import norm
def cosine_similarity (a,b):
    cos_sim = dot(a, b)/(norm(a)*norm(b))
    return cos_sim

In [21]:
"""Function to convert matched output to conversational tone"""
def BestMatch(articles,image_path, input_txt):
    model_trf = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    input_img_embedding = model_trf.encode(image_path)


    articles['embedding_similarity'] = articles['desc_embedding'].apply(lambda x: cosine_similarity(x, input_img_embedding))
    articles.sort_values('embedding_similarity', ascending=False,inplace=True)
    top_match_article_id = articles['article_id'].iloc[0] #article id
    top_match_concatenated_desc = articles['concatenated_desc'].iloc[0] # description


    llm = ChatOpenAI()
    system_prompt_3 = "you work at a garment store. when the user asks you if you have an ittem then tell the user that the nearest match to the garment in the image is article id: "+str(top_match_article_id)+" that can be described as: "+top_match_concatenated_desc


    messages = [
        SystemMessage(content=system_prompt_3),
          HumanMessage(content=input_txt)
    ]
    op_txt = llm(messages)
    return op_txt



In [22]:
str(BestMatch(articles, '/content/drive/MyDrive/test_images/jeans.jpeg', "do you have this ?"))

"content='Yes, we do have a similar item to the one in the image. The nearest match we have is article ID: 871997001. It is a pair of Bootcut HW Nova trousers in dark blue denim. These trousers have a high waist, zip fly, and button closure. The legs have wide, flared hems. Additionally, these jeans are made with stretch denim and have a 5-pocket design. They are also partly made from recycled cotton.'"