<H1><B>Atrribute Extractor</B></H1>

### Importing Libraries

In [7]:
# Importing functions from libraries

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnableLambda
from langchain.schema.runnable import RunnablePassthrough
from langchain_community.vectorstores import FAISS
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_openai import OpenAIEmbeddings
import yaml
import ast
import pandas as pd

### Approach 1: One Attribute at a time

In [8]:
# Gettiing api_key from env file
import os
from dotenv import load_dotenv
load_dotenv()

api_key = os.environ.get('API_KEY')

### Input funcions

In [9]:
def import_yaml(file_name):
    with open(file_name, 'r') as file:
        data = yaml.safe_load(file)
    return(data)

### Taking all file names

In [10]:
# File names
# Provide the product_description and attributes_description file in csv format
model_details = 'model_details.yaml'
product_description_file_name = 'source_files/p_description.csv'
atrributes_description_file_name = 'source_files/a_description.csv'
example_file_name = 'sku example.csv'

### Enable example sementic similarity option

In [11]:
# Answer Y/N
Run_semantic_similarity_option = "N"


### Function to create an OpenAI api client by taking model settings from a yaml file

In [12]:
def model_settings(file_name,api_key):

    # Load YAML data from a file
    data = import_yaml(file_name)

    # Print the loaded data
    # Setting up OpenAI client

    model = ChatOpenAI(model=data['model_name'],
                    api_key=api_key)
    return(model)


### Function to import data from product_description and attributes_description csv files and combining them together

In [13]:
# Data importing
def data_importing(product_description_file_name, atrributes_description_file_name):
    import pandas as pd
    product_description_df = pd.read_csv(product_description_file_name)
    attribute_description_df = pd.read_csv(atrributes_description_file_name)
    return(pd.merge(product_description_df, attribute_description_df, how='cross'))
    

### Function to convert the combined dataframe into list of dictionaries which would be model input

In [14]:
# Data Cleaning 
def data_cleaning(imported_data):
    list_of_dictionaries = []
    for i in range(imported_data.shape[0]):
            list_of_dictionaries.append({'attribute_name':imported_data['Atrribute Name'][i],
                                            'attribute_description':imported_data['Attribute Description'][i],
                                            'attribute_example':imported_data['Example'][i],
                                            'other_information':imported_data['Other Information'][i],
                                            'product_description':imported_data['SKU Description'][i] + " " + imported_data['SKU Description'][i]})
    return(list_of_dictionaries)

### Creating Prompt

In [15]:
# Function to create prompt
prompt_to_extract_attrbutes = ChatPromptTemplate.from_messages([
    ('system',"You are a data entry operator, only provide one word answers from the product description provided. If you don't find the answer in product description return Not available"),
    ('human','Extract the following attribute: Attribute name:{attribute_name}, Attribute description:{attribute_description}, Attribute example:{attribute_example} and Other information:{other_information} from Product description:{product_description}')
])


### Function to create a runnable chain

In [16]:
# Creating chains
def extracting_attributes_using_open_ai(prompt,model,data):
    chain_attribute_extractor = prompt | model | StrOutputParser()
    return(chain_attribute_extractor.batch(data))

### Function to remove duplicate values from the model

In [17]:
#Removing duplicates from the value column

def remove_duplicate_values(df, product_column, value_column):
    df[value_column] = df.groupby(product_column)[value_column].transform(
        lambda group: group.where(~group.duplicated(), 'Not available')
    )
    
    return df


### Function to format the output from the model

In [18]:
# Output formating
def output_formating(data,output):
    data['Value'] = output
    data = remove_duplicate_values(data,'SKU Name','Value')
    data = data[['SKU Name', 'SKU Description', 'Atrribute Name','Value']]
    data = data.pivot(index=['SKU Name','SKU Description'], columns='Atrribute Name', values='Value').reset_index()
    data.columns.name = None
    return(data)


### Example Semantic similarity

In [19]:
# Example selector by semantics
# function to create an example dictionary
import pandas as pd
def Add_Examples_for_model_col(data,example_file_name):
    data_for_example = pd.read_csv(example_file_name)
    data_for_example = data_for_example.melt(id_vars=['SKU Name','SKU Description'],var_name='Atrribute Name',value_name='Value')
    data_for_example['Examples_for_model'] = data_for_example.apply(lambda data_for_example: {'Product_description':data_for_example['SKU Description'],'Attribute_name':data_for_example['Atrribute Name'],'Value':data_for_example['Value']}, axis = 1)
    data_for_example = data_for_example.groupby('Atrribute Name')['Examples_for_model'].agg(list).reset_index()
    data = data_importing(product_description_file_name,atrributes_description_file_name)
    data = pd.merge(data,data_for_example,on='Atrribute Name')
    return(data)

In [20]:
def example_extractor(Attribute_name,Examples,api_key,number_of_examples):
    example_prompt = PromptTemplate(
        input_variables=["Product_description","Attribute_name", "Value"],
        template="Product_description: {Product_description} Attribute_name: {Attribute_name} Value:{Value}",
    )
    examples = Examples
    example_selector = SemanticSimilarityExampleSelector.from_examples(
        # The list of examples available to select from.
        examples,
        # The embedding class used to produce embeddings which are used to measure semantic similarity.
        OpenAIEmbeddings(api_key=api_key),
        # The VectorStore class that is used to store the embeddings and do a similarity search over.
        FAISS,
        # The number of examples to produce.
        k=number_of_examples,
    )
    similar_prompt = FewShotPromptTemplate(
        # We provide an ExampleSelector instead of examples.
        example_selector=example_selector,
        example_prompt=example_prompt,
        prefix="",
        suffix="",
        input_variables=["Attribute_name"],
    )
    # Input is a feeling, so should select the happy/sad example
    final_examples = similar_prompt.format(Attribute_name = Attribute_name)
    return(final_examples)

In [21]:
def similar_example(data,Attribute_name_column_name,All_examples_column_name,Relevent_example_column_name,api_key,number_of_examples):
    semantic_similar_exaples = []
    for i in range(data.shape[0]):
        semantic_similar_exaples.append(example_extractor(data[Attribute_name_column_name][i],data[All_examples_column_name][i],api_key,number_of_examples))
    data[Relevent_example_column_name] = semantic_similar_exaples
    return(data)

### Creating a semantic similarity example pipeline

In [22]:
#Setting up a pipeline
def example_semantic_similary_pipeline(product_description_file_name,atrributes_description_file_name,example_file_name,api_key,number_of_examples,Atrribute_name,Examples_for_model,Examples_selected_by_semantic_similarity):
    data = data_importing(product_description_file_name,atrributes_description_file_name)
    data = Add_Examples_for_model_col(data,example_file_name)
    data = similar_example(data,Atrribute_name,Examples_for_model,Examples_selected_by_semantic_similarity,api_key,number_of_examples)
    return(data)

### Creating a pipeline which takes file names, api_key and prompt as input to provide the output

In [23]:
# Creating a pipeline which takes both file names, model and prompt to produce the output
def pipeline(product_description_file_name, atrributes_description_file_name,model_file_name,api_key,prompt,Run_semantic_similarity_option,example_file_name,number_of_examples,Atrribute_Name,Examples_for_model,Example):
    model = model_settings(model_file_name,api_key) # Importing llm
    data = data_importing(product_description_file_name,atrributes_description_file_name) #Data importing
    if Run_semantic_similarity_option == 'Y': data = example_semantic_similary_pipeline(product_description_file_name,atrributes_description_file_name,example_file_name,api_key,number_of_examples,Atrribute_Name,Examples_for_model,Example) #Example semantic similarity
    data_cleaned = data_cleaning(data) #Formating the input data
    output = extracting_attributes_using_open_ai(prompt,model,data_cleaned) #Running the llm for answers
    output_formated = output_formating(data,output) #Fromating the output from llm
    return(output_formated) #Output of the function
    

In [24]:
output_1 = pipeline(product_description_file_name,atrributes_description_file_name,model_details,api_key,prompt_to_extract_attrbutes,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')

In [25]:
output_1.drop(columns=['SKU Description'])

Unnamed: 0,SKU Name,Brand,Flavour,Type
0,0700144801 - TIP TOP NYONYA KUIH BANGKIT 60G,TIP TOP,Not available,Not available
1,0750073001 - TONG GARDEN SHRIMP COATED PEANUT ...,Tong Garden,Shrimp,Peanut
2,0750073101 - TONG GARDEN CHICKEN COATED PEANUT...,Tong Garden,Chicken,Peanut
3,0750073201 - TONG GARDEN BBQ COATED PEANUT 160G,TONG,BBQ,Peanut
4,0790080001 - OTTOGI DRIED SEAWEED 50G,Ottogi,Seaweed,Not available
...,...,...,...,...
1495,3230000301 - KUIH MASIN/MASIN WANGI 4S,Not available,Not available,Not available
1496,3230000901 - ROTI AYAM & IKAN 200G,Not available,Not available,Not available
1497,3230001101 - KACANG TUMBUK 6S,Not available,Not available,Kacang
1498,3230001201 - GULA TARIK 6S,Not available,Not available,Not available


### Approach 2: One product at a time

In [26]:
def data_cleaning_2(data):
    list_of_dictionaries = []
    for product in data['SKU Name'].unique():
        list_of_dictionaries.append({'attribute_name':list(data[data['SKU Name'] == product]['Atrribute Name']),
                                    'attribute_description':list(data[data['SKU Name'] == product]['Attribute Description']),
                                    'attribute_example':list(data[data['SKU Name'] == product]['Example']),
                                    'other_information':list(data[data['SKU Name'] == product]['Other Information']),
                                    'product_description':list(data[data['SKU Name'] == product]['SKU Description'])})
    return(list_of_dictionaries)
        

In [27]:
# Function to create prompt
prompt_to_extract_attrbutes_2 = ChatPromptTemplate.from_messages([
    ('system',"You are a data entry operator, only provide one word answers from the product description provided. If you don't find the answer in product description return Not available and make sure do not provide same answers for two diffrent attributes in case of duplicates keep the first one and make all other Not available"),
    ('human','Extract the following list of attribute: Attribute name:{attribute_name}, Attribute description:{attribute_description}, Attribute example:{attribute_example} and Other information:{other_information} from Product description:{product_description}')
])


In [28]:
def output_formating_2(data,output):
    data = data_importing(product_description_file_name,atrributes_description_file_name)
    data = data[['SKU Name','SKU Description']].drop_duplicates(subset='SKU Name')
    data = data.reset_index(drop=True)
    #dict_list = [dict(item.split(": ") for item in entry.split("\n")) for entry in output]
    dict_list =[dict(item.split(": ", 1) for item in entry.split("\n") if ": " in item) for entry in output]
    data = pd.concat([data,pd.DataFrame(dict_list)],axis=1)
    return(data)


In [29]:
# Creating a pipeline which takes both file names, model and prompt to produce the output
def pipeline_2(product_description_file_name, atrributes_description_file_name,model_file_name,api_key,prompt,Run_semantic_similarity_option,example_file_name,number_of_examples,Atrribute_Name,Examples_for_model,Example):
    model = model_settings(model_file_name,api_key)
    data = data_importing(product_description_file_name,atrributes_description_file_name) #Data importing
    if Run_semantic_similarity_option == 'Y': data = example_semantic_similary_pipeline(product_description_file_name,atrributes_description_file_name,example_file_name,api_key,number_of_examples,Atrribute_Name,Examples_for_model,Example) #Example semantic similarity
    data_cleaned = data_cleaning_2(data) #Formating the input data
    output = extracting_attributes_using_open_ai(prompt,model,data_cleaned) #Running the llm for answers
    output_formated = output_formating_2(data,output) #Fromating the output from llm
    return(output_formated) #Output of the function

In [30]:
output_2 = pipeline_2(product_description_file_name, atrributes_description_file_name,model_details,api_key,prompt_to_extract_attrbutes_2,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')

### Approach 3: Each Attribute in diffrent parallel chains

In [31]:
def data_cleaning_3(data):
    test = []
    for products in data['SKU Description'].unique():
        test.append({'product_description':products})
    return(test)

In [32]:
# Generating prompt and their response
def extracting_attributes_using_open_ai_3(model,data,data_cleaned):
    runnable_parallel_dict = {}
    attribute_name_list = []
    for attribute_name in data['Atrribute Name'].unique():
        attribute_name_list.append(attribute_name)
        human_prompt = 'Extract the following attribute: Attribute name:'+attribute_name+', Attribute description:'+data[data['Atrribute Name']==attribute_name]['Attribute Description'].unique()+', Attribute example:'+data[data['Atrribute Name']==attribute_name]['Example'].unique()+' and Other information:'+data[data['Atrribute Name']==attribute_name]['Other Information'].unique()+' from Product description:{product_description}'
        prompt_atrribute_extractor = (ChatPromptTemplate.from_messages([
            ('system',"You are a data entry operator, only provide one word answers from the product description provided. If you don't find the answer in product description return Not available and make sure do not provide same answers for two diffrent attributes in case of duplicates keep the first one and make all other Not available"),
            ('human',human_prompt[0])
        ]))
        chain = prompt_atrribute_extractor | model | StrOutputParser()
        runnable_parallel_dict = runnable_parallel_dict | {attribute_name:chain}
    
    make_list_chain = RunnableLambda(lambda x: list(x['branches'].values()))
    prompt_remove_duplicates = ChatPromptTemplate.from_messages([
        ('human',"ensure length of input and output list should be same"),
        ('human',"Except 'Not avialable' replace other duplicates in:  {list_of_output} with 'Not available'"),
        ('human',"Only return the list in the output")
    ])

    chain_remove_duplicates = prompt_remove_duplicates | model | StrOutputParser()
    #chain = RunnableParallel(branches = runnable_parallel_dict) | make_list_chain | chain_remove_duplicates
    chain = RunnableParallel(branches = runnable_parallel_dict) | make_list_chain
    output = chain.batch(data_cleaned)
    return(output)

In [33]:
# Output formating
def output_formating_3(data,output):
    #data['Value'] = [item for sublist in output for item in ast.literal_eval(sublist)]
    data['Value'] = [item for sublist in output for item in sublist]
    #data['Value'] = [dict(item.split(": ", 1) for item in entry.split("\n") if ": " in item) for entry in output]
    data = remove_duplicate_values(data,'SKU Name','Value')
    data = data[['SKU Name', 'SKU Description', 'Atrribute Name','Value']]
    data = data.pivot(index=['SKU Name','SKU Description'], columns='Atrribute Name', values='Value').reset_index()
    data.columns.name = None
    return(data)

In [34]:
# Creating a pipeline which takes both file names, model and prompt to produce the output
def pipeline_3(product_description_file_name, atrributes_description_file_name,model_file_name,api_key,Run_semantic_similarity_option,example_file_name,number_of_examples,Atrribute_Name,Examples_for_model,Example):
    model = model_settings(model_file_name,api_key)
    data = data_importing(product_description_file_name,atrributes_description_file_name) #Data importing
    if Run_semantic_similarity_option == 'Y': data = example_semantic_similary_pipeline(product_description_file_name,atrributes_description_file_name,example_file_name,api_key,number_of_examples,Atrribute_Name,Examples_for_model,Example) #Example semantic similarity
    data_cleaned = data_cleaning_3(data) #Formating the input data
    output = extracting_attributes_using_open_ai_3(model,data,data_cleaned) #Running the llm for answers
    output_formated = output_formating_3(data,output) #Fromating the output from llm
    return(output_formated) #Output of the functionoutput_formating_3(data,output)

In [35]:
output_3 = pipeline_3(product_description_file_name, atrributes_description_file_name,model_details,api_key,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')

### Approach 4: Simple sequential chain for all attribute connected parallely for each product

In [36]:
# Generating prompt
def extracting_attributes_using_open_ai_4(model,data,data_cleaned):
    runnable_parallel_dict = {}
    attribute_name_list = []
    for product_description in data['SKU Description'].unique():
        product_name = str(data[data['SKU Description'] == product_description]['SKU Name'].unique())
        chain_combine = RunnableLambda(lambda x:x)
        for attribute_name in data['Atrribute Name'].unique():
            attribute_name_list.append(attribute_name)
            human_prompt = 'Extract a one word respose for the following attribute: Attribute name:'+attribute_name+', Attribute description: '+data[data['Atrribute Name']==attribute_name]['Attribute Description'].unique()+', Attribute example: '+data[data['Atrribute Name']==attribute_name]['Example'].unique()+' and Other information: '+data[data['Atrribute Name']==attribute_name]['Other Information'].unique()+' from Product description: '+ product_description
            prompt_atrribute_extractor = ChatPromptTemplate.from_messages([
                ('system',"You are a data entry operator, append your answer in the folliwng list: {list_of_output} and only provide this list as output"),
                ('human',human_prompt[0]),
                ('human',"if the answer is already part of :{list_of_output}, change your answer to 'Not available'"),
                ('human',"In case the attribute is not present in the product description append 'Not available' to the list:{list_of_output}. In all other cases make your asnwer 'Not available'")
            ])
            chain = prompt_atrribute_extractor | model | StrOutputParser()
            chain_combine = chain_combine | chain
        runnable_parallel_dict = runnable_parallel_dict | {product_name:chain_combine}

    chain_final = RunnableParallel(branches = runnable_parallel_dict)
    #chain = RunnableParallel(branches = runnable_parallel_dict) | RunnableLambda(lambda x: combination_chains(x["branches"]))
    output = chain_final.invoke(data_cleaned)
    return(output)

In [37]:
# Output formating
def output_formating_4(data,output):
    import ast
    data['Value'] = [item for sublist in list(output['branches'].values()) for item in ast.literal_eval(sublist)]
    #data = remove_duplicate_values(data,'SKU Name','Value')
    data = data[['SKU Name', 'SKU Description', 'Atrribute Name','Value']]
    data = data.pivot(index=['SKU Name','SKU Description'], columns='Atrribute Name', values='Value').reset_index()
    data.columns.name = None
    return(data)

In [38]:
# Creating a pipeline which takes both file names, model and prompt to produce the output
def pipeline_4(product_description_file_name, atrributes_description_file_name,model_file_name,api_key,Run_semantic_similarity_option,example_file_name,number_of_examples,Atrribute_Name,Examples_for_model,Example):
    output = [] # Creating an outputlist
    model = model_settings(model_file_name,api_key)
    data = data_importing(product_description_file_name,atrributes_description_file_name) #Data importing
    if Run_semantic_similarity_option == 'Y': data = example_semantic_similary_pipeline(product_description_file_name,atrributes_description_file_name,example_file_name,api_key,number_of_examples,Atrribute_Name,Examples_for_model,Example) #Example semantic similarity
    data_cleaned = [] #Formating the input data
    output = extracting_attributes_using_open_ai_4(model,data,data_cleaned) #Running the llm for answers
    output_formated = output_formating_4(data,output) #Fromating the output from llm
    return(output_formated) #Output of the functionoutput_formating_3(data,output)

In [39]:
output_4 = pipeline_4(product_description_file_name, atrributes_description_file_name,model_details,api_key,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')

In [40]:
source_file_data = pd.read_excel('nuts.xlsx')
source_file_data ['Type'] = source_file_data['Type'].fillna('Not available')
source_file_data ['Flavour'] = source_file_data['Flavour'].fillna('Not available')
source_file_data ['Brand'] = source_file_data['Brand'].fillna('Not available')
source_file_data.head()

Unnamed: 0,Sub-department,Category,Type,Flavour,Brand,Description,Pack size,Price tier
0,250 - SNACKS,0473 - CORN SNACKS,Not available,Not available,Not available,0821614601 - KOREAN COLOR CURRY POPCORN 80G,Small,Medium
1,250 - SNACKS,0477 - NUTS,Seeds,Not available,Tong Garden,0821620101 - TONG GARDEN SUNFLOWER SEEDS NO SH...,Medium,Low
2,250 - SNACKS,0477 - NUTS,Seeds,Not available,Unbranded,0820277101 - WHITE PUMPKIN SEED 500G,Large,Low
3,250 - SNACKS,0477 - NUTS,Mixed nuts,Not available,Taisun,0821069201 - TAISUN TREATZ VEGGIE STICKS 250G,Large,Medium
4,250 - SNACKS,0477 - NUTS,Kacang,Not available,Alwi,0820088501 - ALWI KACANG CORNFLAKES 40G,Small,Low


### Accuracy Calculation

In [41]:
def accuracy(source_file_data,output):
    source_file_data = source_file_data.rename(columns={'Type': 'Type_source', 'Flavour': 'Flavour_source', 'Brand': 'Brand_source'})
    merged_data = pd.merge(source_file_data, output, left_on='Description', right_on='SKU Name', how='inner')
    type_matches = merged_data['Type'] == merged_data['Type_source']
    brand_matches = merged_data['Brand'] == merged_data['Brand_source']
    flavour_matches = merged_data['Flavour'] == merged_data['Flavour_source']

    # Step 2: Calculate the number of matches
    total_matches = (type_matches.sum() + brand_matches.sum() + flavour_matches.sum())  # Total matching rows for columns
    total_comparisons = len(merged_data) * 3  # Total comparisons (3 per row)

    # Step 3: Calculate accuracy
    accuracy = (total_matches / total_comparisons) * 100
    accuracy_type = (type_matches.sum() / len(merged_data)) * 100
    accuracy_brand = (brand_matches.sum() / len(merged_data)) * 100
    accuracy_flavour = (flavour_matches.sum() / len(merged_data)) * 100

    print(f"Total Accuracy: {accuracy:.2f}%")
    print(f"Accuracy for Type column: {accuracy_type:.2f}%")
    print(f"Accuracy for Brand column: {accuracy_brand:.2f}%")
    print(f"Accuracy Flavour column: {accuracy_flavour:.2f}%")

In [52]:
print("Approach 1:\n")
print(accuracy(source_file_data,output_1))
print("Approach 2: \n")
print(accuracy(source_file_data,output_2))
print("Approach 3: \n")
print(accuracy(source_file_data,output_3))
print("Approach 4: \n")
print(accuracy(source_file_data,output_4))

Approach 1:

Total Accuracy: 47.36%
Accuracy for Type column: 78.67%
Accuracy for Brand column: 25.20%
Accuracy Flavour column: 38.20%
None
Approach 2: 

Total Accuracy: 30.91%
Accuracy for Type column: 50.73%
Accuracy for Brand column: 13.87%
Accuracy Flavour column: 28.13%
None
Approach 3: 

Total Accuracy: 57.31%
Accuracy for Type column: 80.60%
Accuracy for Brand column: 45.00%
Accuracy Flavour column: 46.33%
None
Approach 4: 

Total Accuracy: 74.33%
Accuracy for Type column: 77.73%
Accuracy for Brand column: 66.07%
Accuracy Flavour column: 79.20%
None


In [43]:
# data = data_importing(product_description_file_name,atrributes_description_file_name)
# model = model_settings(model_details,api_key)
# data_cleaned = []
# output = extracting_attributes_using_open_ai_4(model,data,data_cleaned)
# import ast
# flattened_list = [item for sublist in list(output['branches'].values()) for item in ast.literal_eval(sublist)]
# len(flattened_list)

In [44]:
#output = pipeline_2(product_description_file_name, atrributes_description_file_name,model_details,api_key,prompt_to_extract_attrbutes_2)
#output.to_csv('sku example.csv',index = False)

### Stress testing the pipeplines

In [45]:
# for i in range(100):
#     pipeline(product_description_file_name,atrributes_description_file_name,model_details,api_key,prompt_to_extract_attrbutes,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')
#     print("Completed: ",i,"%",end='\r')

In [46]:
# for i in range(100):
#     pipeline_2(product_description_file_name, atrributes_description_file_name,model_details,api_key,prompt_to_extract_attrbutes_2,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')
#     print("Completed: ",i,"%",end='\r')

In [47]:
# for i in range(100):
#     pipeline_3(product_description_file_name, atrributes_description_file_name,model_details,api_key,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')
#     print("Completed: ",i,"%",end='\r')

In [48]:
# for i in range(100):
#     pipeline_4(product_description_file_name, atrributes_description_file_name,model_details,api_key,Run_semantic_similarity_option,example_file_name,2,'Atrribute Name','Examples_for_model','Example')
#     print("Completed: ",i,"%",end='\r')