In [1]:
# !pip install llama-index

In [2]:
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")


In [3]:
import openai
import os

In [4]:
import pandas as pd
from llama_index.core import Document,VectorStoreIndex, ServiceContext, PromptHelper

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
# from llama_index.core.llms import OpenAI
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import TokenTextSplitter

import ast



openai.api_key = open("OpenAISeceretKey.txt", "r").read().strip()
os.environ['OPENAI_API_KEY'] = openai.api_key

In [5]:
import pandas as pd
import re

In [6]:
# Reading csv file and removing any row with null or blank values

df = pd.read_csv('Data/FashionDataset/FashionDatasetv2.csv')
df_cleaned = df.dropna(how='any')
print('Data before cleaning', df.shape)
print('Data after cleaning', df_cleaned.shape)

Data before cleaning (14214, 11)
Data after cleaning (6530, 11)


### Rectifying Json. Json contain details about clothes

In [7]:
def correct_json_string(json_str):
    json_str = re.sub(r"(?<=\w)'(?=\w)", r"%%", json_str)
    json_str = re.sub(r"(?<!\\)'", '"', json_str)
    json_str = re.sub(r"%%", r"'", json_str)
    return json_str

df_cleaned['p_attribute_corrected_json'] = df_cleaned['p_attributes'].apply(correct_json_string)

### Identifying if there are any common attributes about clothes

In [8]:
from collections import Counter
import json

key_counter = Counter()
for jsSonObj in df_cleaned['p_attribute_corrected_json']:
    json_obj = json.loads(jsSonObj)
    key_counter.update(json_obj.keys())

key_count_df = pd.DataFrame(key_counter.items(), columns=['Key', 'Count'])
key_count_df = key_count_df.sort_values(by='Count', ascending=False)
key_count_df.head(10)

# for _, row in key_count_df.iterrows():
#     print(row['Key'],' ',row['Count'])

Unnamed: 0,Key,Count
33,Wash Care,6232
15,Occasion,6230
23,Sustainable,5519
2,Body or Garment Size,5384
44,Pattern,5088
39,Fabric,5019
18,Print or Pattern Type,4726
52,Closure,4200
43,Length,4153
14,Number of Pockets,4069


## Observation
- There are no attributes which are common to all the clothes, hence we have to utilize all the attributes 
Below attributes of clothes are identified for specific usage.
- image_id 
- color
- brand
- ratingCount
- price
- products
- avg_rating
- pattern':pattern
- PrintorPatternType
- fabric

### Function to create documents directly from the dataframe create from the csv file fashion data

In [9]:
def read_csv_to_documents(df_cleaned):
    """
    This function creates documents from the dataframe.
    """
    df = df_cleaned
    documents = []
    for _, row in df.iterrows():
        # attributes = ast.literal_eval(row['attributes']) if isinstance(row['attributes'], str) else row['attributes']
        # p_id	name	products	price	colour	brand	img	ratingCount	avg_rating		p_attributes	p_attribute_json
        attributes=row['p_attribute_corrected_json']
        json_obj_attribute=json.loads(attributes)
        pattern=json_obj_attribute.get("Pattern", 'NA')
        PrintorPatternType	=json_obj_attribute.get("Print or Pattern Type", 'NA')
        fabric=json_obj_attribute.get("Fabric", 'NA')
        doc = Document(
            text=row['description'],
            metadata={
                'image_id': row['p_id'],
                'color': row['colour'],
                'brand': row['brand'],
                'ratingCount': row['ratingCount'],
                'price': row['price'],
                'products': row['products'],
                'avg_rating': row['avg_rating'],
                'pattern':pattern,
                'PrintorPatternType':PrintorPatternType,
                'fabric':fabric,
                'attributes': attributes
            }
        )
        documents.append(doc)
    return documents

In [10]:
# Creating Documents from the dataframe
documents=read_csv_to_documents(df_cleaned)
# documents[0]

In [11]:
# Defing LLM
llm = OpenAI(model='gpt-3.5-turbo', temperature=0, max_tokens=256)

# Initialize an OpenAIEmbedding model
embed_model = OpenAIEmbedding()
Settings.embed_model = embed_model


# Create a VectorStoreIndex from a list of documents and assigning embedding model
index = VectorStoreIndex.from_documents(documents,embed_model=embed_model)



## Defining Agent as Query Tools

In [12]:
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import QueryEngineTool, ToolMetadata

# Defining Query engine
fashionEngine = index.as_query_engine(similarity_top_k=20)

# creting query engine tools
query_engine_tools = [
    QueryEngineTool(
        query_engine=fashionEngine,
        metadata=ToolMetadata(
            name="fashionEngine_Top20",
            description="Searches information in the vectorstore with respect to user query. This agent always searches in vector store."
            "Use a detailed plain text question as input to the tool.",
        )
    ),
    
]

# initialize ReAct agent
agent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=False)

### This Creates HTML datafram which displays data along with pitcures
It contains below columns
- Score
- Brand
- ImageId
- Image
- Price
- Rating
- Product Description

In [13]:
from IPython.display import Image,display, HTML

def extractHTMLDataFrame(agentResponse):
    """
    This function creates HTML dataframe with the search results. Dataframe created bu this function also contains image.
    """
    score=[]
    brand=[]
    image=[]
    imageid=[]
    price=[]
    rating=[]
    textDesc=[]
    
    for tool_output in agentResponse.sources:
        
        for toolSpecific_source_node in tool_output.raw_output.source_nodes:
            toolSpecific_source_node
            score.append(toolSpecific_source_node.score)
            brand.append(toolSpecific_source_node.node.metadata['brand'])
            textDesc.append(toolSpecific_source_node.node.text)
            imageid.append(toolSpecific_source_node.node.metadata['image_id'])
            imageurl='Data/images/'+str(toolSpecific_source_node.node.metadata['image_id'])+'.jpg'
            image.append(imageurl)
            price.append(toolSpecific_source_node.node.metadata['price'])
            rating.append(toolSpecific_source_node.node.metadata['avg_rating'])


    images_html = [f'<img src="{path}" width="350" height="350">' for path in image]


    searchResults=pd.DataFrame({'Score': score, 'Brand': brand,
                                'ImageId':imageid,
                                'Image': images_html,'Price': price,
                                'Rating': rating,'Product Description':textDesc})         


    searchResults = searchResults.sort_values(by='Score', ascending=False) 
    return searchResults


### search input verification function to identify if question asked is related to clothing or not

In [14]:
from llama_index.core.llms import ChatMessage
def isClothingQuestion(userinput):
    """
    This function checks if question asked by user in search is realted to clothing or not.
    """
    messages = [
        ChatMessage(
            role="system",
            content="You are a helpful assistant with a high understanding of language. Identify if a question is related to clothing. Respond only with 'yes' or 'no'."
        ),
        ChatMessage(
            role="user",
            content=f"Identify if question: '{userinput}' is related to clothing search and return answer only as yes or no."
        ),
        ]
    resp = OpenAI().chat(messages)
    if "yes" in str(resp).strip().lower():
        return "yes"
    else:
        return "no"
    

In [15]:
def indexSearchWithoutAgent(userinput):
    """
    This function seaches vector index without agent tools
    """
    retriever = index.as_retriever(similarity_top_k=20,embed_model=embed_model)
    nodes = retriever.retrieve(userinput)
        # print(nodes)
    score=[]
    brand=[]
    image=[]
    imageid=[]
    price=[]
    rating=[]
    textDesc=[]
    for source_nodesTemp in nodes:
        # print(source_nodesTemp.node.metadata['image_id'],'      ',source_nodesTemp.score)         
        score.append(source_nodesTemp.score)
        brand.append(source_nodesTemp.node.metadata['brand'])
        textDesc.append(source_nodesTemp.node.text)
        imageid.append(source_nodesTemp.node.metadata['image_id'])
        imageurl='Data/images/'+str(source_nodesTemp.node.metadata['image_id'])+'.jpg'
        image.append(imageurl)
        price.append(source_nodesTemp.node.metadata['price'])
        rating.append(source_nodesTemp.node.metadata['avg_rating'])
            
        images_html = [f'<img src="{path}" width="350" height="350">' for path in image]
        searchResults=pd.DataFrame({'Score': score, 'Brand': brand,
                                'ImageId':imageid,
                                'Image': images_html,'Price': price,
                                'Rating': rating,'Product Description':textDesc}) 
        return searchResults
        

### This function initiates Search conversation with user

In [16]:
def initiateSearch(userinput):
    """
    This function initiates the user conversation
    """
    # Check if user has entered anything other than clothing search
    if isClothingQuestion(userinput) =="yes":
        agentResponse=agent.chat(userinput)
        searchResultsDf=extractHTMLDataFrame(agentResponse)

        # If agent is not able to search then direct vector index serch is initiated
        if searchResultsDf.empty: 
            searchResults=indexSearchWithoutAgent(userinput)        
            
            searchResults = searchResults.sort_values(by='Score', ascending=False)
            print('No Exact match of clothes found')
            display(HTML(searchResultsDf.to_html(escape=False)))
    
        
        else:
            display(HTML(searchResultsDf.to_html(escape=False)))

            # Feedback mechanism in which user is asked to refine search if he is not happy with results
            while(True):
                user_feedback = input(" Do you want to refine search ? \n Type yes or no : ").strip().lower()
                if "yes" != user_feedback and "no" != user_feedback:
                    print('Please enter only yes or no \n')
                    continue
    
                if "no" == user_feedback:
                    print('Thanks, Enjoy clothing!')
                    break
    
                if "yes" == user_feedback:
                    additional_input = input("Please provide more details to refine the search: \n")
                    refined_input = f" Additional details: {additional_input}"
                    agent_response = agent.chat(refined_input)
                    searchResultsDf = extractHTMLDataFrame(agent_response)
                    if not searchResultsDf.empty:
                        display(HTML(searchResultsDf.to_html(escape=False)))
                    else:
                        print("No results found.")
                        break
            
    else:
        print('Question: "',userinput,'" is not about clothing.')
        userinput=input(" Please enter fashion clothing queries:\n")
        initiateSearch(userinput)

### Top 20 Tshirts

In [18]:
userinput="blue T shirts"
initiateSearch(userinput)

Unnamed: 0,Score,Brand,ImageId,Image,Price,Rating,Product Description
0,0.846739,The Souled Store,18446902,,2499,4.916667,Women Blue Tie Dye Printed T-Shirt & JoggersMaterial & Care: 100% Cotton Machine wash.
1,0.841406,M&H Easy,17796294,,1799,4.357143,Blue Printed Co-ord set B;ue and white printed T-shirt comes with a round neck and short drop-down sleeves Blue Solid Short have a slip-on closure with a drawstring100% Cotton machine washThe model (height 5'8) is wearing a size S
2,0.840283,INDYA,15078386,,2500,1.666667,"This clothing set consists of Shirt and Trousers Blue striped Shirt ,has a shirt collar, three-quarter sleeves Blue striped Trousers ,has a slip-on closureTop fabric: Viscose Rayon Bottom fabric: Viscose Rayon Hand washThe model (height 5'8) is wearing a size S"
3,0.837178,Levis,14860462,,2799,4.1875,"Blue typography and floral printed sweatshirtRound neckLong sleevesPulloverRibbed hem80% Cotton, 20% Polyester Machine wash Do not bleach Tumble dry low Warm ironThe model (height 5'8) is wearing a size S"
4,0.836393,STREET 9,13041492,,1799,4.0,"Blue solid sweatshirt, has a round neck, straight hemMaterial: 100%Cotton Machine WashThe model (height 5'8"") is wearing a size S"
5,0.834766,People,15091702,,1099,4.315789,"Blue and yellow embroidered tunic ,has a round neck, three-quarter sleeves, embroidered detail,The model (height 5'8) is wearing a size SViscose rayon Machine wash"
6,0.833389,Tokyo Talkies,18379052,,799,4.375,"Make heads turn anywhere you go donning this fashionable top. It comes with a lovely checked pattern and attractive cowl neck. Blue shadeChecked patternCowl NeckSleevelessPolyester, Machine WashThe model (height 5'8) is wearing a size S95% Polyester 5% Spendex Machine wash"
7,0.832972,DressBerry,14080698,,1749,3.8,"Welcome the new season with this chic sweatshirt. The exemplary style of this sweatshirt includes a round neck and beautiful long sleeves. Stunning blue colourTypography printRound neckLong sleevesCotton, machine washTrend Alert Quirky outerwear is any outerwear that is unconventional in one or many ways. It garners interest and highlights various details such as patchwork, standout silhouettes, eccentric prints, and so on.60% Cotton & 40% Polyester Machine washThe model (height 5'8) is wearing a size S"
8,0.832395,F.R.I.E.N.D.S By Sztori,14396716,,2099,4.36,"Blue printed sweatshirt, has a round neck, long sleeves, and a ribbed hem70% polyester, 30% cotton Machine washThe model (height 5'8) is wearing a size 3XL"
9,0.832125,Saffron Threads,14259238,,1499,4.0,"Blue, Maroon and gGreen printed Tunic, has a round neck, and short sleevesThe model (height 5'8'') is wearing a size SMaterial: Cotton  Machine Wash"


 Do you want to refine search ? 
 Type yes or no :  no


Thanks, Enjoy clothing!


### Top 20  lehenga

In [28]:
userinput="lehgas for wedding"
initiateSearch(userinput)

Unnamed: 0,Score,Brand,ImageId,Image,Price,Rating,Product Description
0,0.857978,Chhabra 555,15344604,,9900,3.0,"Bring your dream look to life with this fashionable lehenga choli. With an embroidered lehenga and solid choli, the set lends charm and the flared bottom completes the look. Embroidered lehenga with solid choliRound neckFlared bottomSolid dupatta with taping borderTrend Alert Lehengas are full ankle-length skirts that find their roots in the Mughal era. They are pleated, embroidered, printed or embellished, and can be worn for formal or ceremonial occasions.Blouse fabric: Poly Georgette Lehenga fabric: Poly Georgette Lehenga lining fabric: Satin Dupatta fabric: Poly Georgette"
1,0.853835,Chhabra 555,15344602,,9900,4.833333,"Own the party by wearing this stylishly designed lehenga choli. Crafted with an embroidered lehenga and solid choli the set looks appealing while the flared bottom lends a feminine touch. Embroidered lehenga with solid choliRound neckFlared bottomSolid dupatta with taping borderTrend Alert Lehengas are full ankle-length skirts that find their roots in the Mughal era. They are pleated, embroidered, printed or embellished, and can be worn for formal or ceremonial occasions.Blouse fabric: Poly Georgette Lehenga fabric: Poly Georgette Lehenga lining fabric: Satin Dupatta fabric: Poly Georgette"
2,0.852792,Inddus,15463538,,8499,4.171429,"Pair festivities with this lehenga choli for a mesmerising look. Expertly crafted with woven design lehenga and woven design choli, this lehenga choli will give you a royal feel. Woven design lehenga with woven design choliFlared bottomSolid dupatta with taping borderTrend Alert A tiered garment is usually one that is completed by stitching three or more pieces of fabric one below the other. They could also have layers and ruffles to add beauty.Blouse fabric: Silk Blend Lehenga fabric: Silk Blend Lehenga lining fabric: Shantoon Dupatta fabric: Net Dry-clean"
3,0.849977,DIVASTRI,12824978,,5999,4.606061,"Purple, pink and golden floral embroidered lehenga choli with dupatta Purple, pink and golden floral embroidered unstitched blouse piece Purple solid semi-stitched tiered net lehenga, has drawstring closure, attached cancan, flared hem Purple solid net dupatta, has taping borderLehenga length: 41 inches Waist: customisable up to 38 inch Lehenga flair: 3.5 metres Blouse length: 80 centimetres Dupatta length: 2.10 metresBlouse fabric: Silk Blend Lehenga fabric: Net Lehenga lining fabric: Cotton Dupatta fabric: Net Dry-clean"
4,0.848831,Kvsfab,17405158,,8649,4.142857,"Make a lasting impression on the next special occasion by donning this graceful lehenga choli. Impeccably crafted with a boat neck, this lehenga choli also includes a beautiful flared bottom. Embroidered lehenga with solid choliBoat neckFlared bottomSolid dupatta with taping borderPoly-georgette, dry cleanAll in the detailsTrend Alert Defined by edgy details, such as cutouts, pockets, tie-ups, back detailing, wrap, which add style and drama to your apparel, all in the details trend stands out beautifully. It experiments with prints and patterns, like ombre, garden artist, embellished, polka dots, floral, porcelain blooms, and so on.Poly georgette Dry clean onlyThe model (height 5'8) is wearing a size S"
5,0.84816,LilPicks,14824280,,2799,3.675676,"Peach-coloured solid lehenga choli Green solid ready to wear blouse, short sleeves, peach-coloured and green printed ready to wear lehenga, has slip-on closure, flared hem Blouse fabric: Art Silk Lehenga fabric: Art Silk Lehenga lining fabric: Shantoon"
6,0.84782,VRSALES,14346692,,7751,1.5,"Fancy Yellow Semi Stiched Woven Jacquard Lehenga Choli Having Yellow Color A Line Lehnega Of Lehenga Stitched Upto 42 Inches , Waist: Upto 42 Inch,Hips : Upto 42 Inches. Gher Of Lehenga Is 2.5 Meter Jacquard Fabric And Blue Color Unstitched Blouse0.80 Mtr With Dupatta Of Net Material 2.25 MtrLehenga:-Jacquard(42 Inch) || Blouse:-Jacquard(0.80 Mtr) || Dupatta:-Net(2.20 Mtr)||Duaptta Widht:(42 Inch)||Lehenga Flare : 3 Mtr"
7,0.847627,Ode by House of Pataudi,15293570,,2999,4.333333,"Pair festivities with this lehenga choli for a mesmerising look. It includes a V-neck and flared bottom to enhance your allure. Maroon Flower Bazaar Lehenga choli.Printed lehenga with choliV-neckFlared bottomPolyesterTrend Alert Lehengas are full ankle-length skirts that find their roots in the Mughal era. They are pleated, embroidered, printed or embellished, and can be worn for formal or ceremonial occasions.Choli fabric: 100% Polyester Lehenga fabric: 100% Polyester Hand-washThe model (height 5'8) is wearing a size S"
8,0.847546,max,16444512,,1299,4.5,"Yellow and pink embroidered lehenga choli, Yellow and pink embroidered ready to wear blouse, has a round neck, sleeveless, zip closure Yellow and pink solid ready to wear lehenga, straight hem Yellow and pink solid dupatta, taping border100% Polyester Dry MachineLength: 1.75 m Width: 0.8 m"
9,0.847503,Chhabra 555,15552632,,12400,2.333333,"Own the party by wearing this stylishly designed lehenga choli. Expertly crafted with embroidered lehenga and embroidered choli, this lehenga choli will give you a royal feel. Embroidered lehenga with embroidered choliSquare neckFlared bottomSolid dupatta with taping borderTrend Alert Inspired by the rich heritage and legacy of India, lehangas and fusion lehanga sets are ideal for traditional weddings and festive occasions. The sets include long skirts with patterns, designs, and embroidery that are coordinated with suitable fusion tops. Right from elaborate and intricate artwork to delicate embroidery, lehenga sets offer something for everyone.Blouse fabric: Poly Georgette Lehenga fabric: Poly Georgette Lehenga lining fabric: Shantoon Dupatta fabric: Net"


 Do you want to refine search ? 
 Type yes or no :  no


Thanks, Enjoy clothing!


### Trying to enter something query other than clothing

In [27]:
userinput="Kill the Shirt"
initiateSearch(userinput)

Question: " Kill the Shirt " is not about clothing.
