In [23]:
# import lightgbm
# from ollama import chat
# from sentence_transformers import SentenceTransformer



import lightgbm
import streamlit as st
#from ollama import chat
import google.generativeai as genai
from sentence_transformers import SentenceTransformer

In [24]:
# Create a SentenceTransformer instance with the stella_en_1.5B_v5 model
word_embedding = SentenceTransformer("dunzhang/stella_en_400M_v5", device="mps", config_kwargs={"use_memory_efficient_attention": False, "unpad_inputs": False}, trust_remote_code=True)



Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [25]:
class Text2Embedding():
    def __init__(self, model):
        """
        The constructor takes in a text-embedding model object (e.g., 
        a SentenceTransformer instance or any other encoding model).
        
        :param model: A text embedding model with a .encode() method.
        """
        self.model = model

    def sentence2vector(self, sentences, name='product_description'):
        """
        Encodes a given text (or list of texts) into an embedding.
        
        :param sentences: The text input(s) to encode.
        :param name: Optional identifier for debugging/logging purposes.
        :return: The raw embedding vector (or a list of vectors).
        """
        print(f"Encoding sentences for feature '{name}'...")
        vector = self.model.encode(sentences)
        return vector

    def transform(self, input):
        """
        A convenience method that calls sentence2vector() and reshapes 
        the resulting embedding(s) to a fixed shape of (1, 1024). 
        This implies the model outputs a 1024-dimensional embedding.
        
        :param input: The text input to encode.
        :return: A (1, 1024) NumPy array representing the single text embedding.
        """
        embedding = self.sentence2vector(input)
        return embedding.reshape(1, 1024)


In [26]:
# Load the model from 'model_gbm.txt' file

gbm = lightgbm.Booster(model_file='lightgbm_model_light_we.txt')

In [46]:
genai.configure(api_key= st.secrets["api"]["key"])
model = genai.GenerativeModel("gemini-2.0-flash")

def call_gemini(messages):
    """
    Calls Gemini 2 Flash API to improve the product description.
    """
    #user_message = messages[-1]["parts"]  # Get the last user input

    # Get response from Gemini
    response = model.generate_content(messages)

    # Extract only the improved description from the response
    #new_description = response.text.split(':')[-1].strip()

    new_description = response.text.strip()  # Directly get the improved text without splitting

    return new_description


# Define the initial description text
example = "Authentic vintage Chanel made out of luxurious black lambskin. Featuring gold CC closure. Size W: 25cm H: 17cm Size D: 2cm. Shoulder height: 94cm. Inside lining has been fully replaced. Comes with ribbon."
# example = 'int pedro = 9'
# Transform the text into an embedding 
transform_embedding = Text2Embedding(word_embedding)
example_embedding = transform_embedding.transform(example)

# Define a threshold for the score and the maximum number of iterations
score_threshold = 0.9
max_iterations = 6

# Use a GBM model to predict the "score" for the current description
best_score = gbm.predict(example_embedding)
best_score = float(best_score[0])

# Keep track of the first (original) description and its score
first_example = example
first_score = best_score
best_text = first_example
# Start iterating until we meet our threshold or reach the max_iterations
i = 1
while best_score < score_threshold and i <= max_iterations:
    # Prepare the prompt as messages for the chat model
    #  - The system message instructs the model to improve the description
    #  - The user message includes the current score and the text to improve
    messages = (
        "Your task is to improve the given product description while keeping all key details intact. "
         "You MUST maximize the 'score' by making the description more engaging, informative, and persuasive. "
        "IMPORTANT: Your response should ONLY contain the improved product description. "
        "You are NOT allowed to provide any commentary, explanations, or suggestions—just return the improved text. "
        "The response must not contain any additional information. "
        "dont hallucinate or make up things only base of known information"
        "dont add extra features only improve on desciptiveness"
        "Return ONLY the improved product description and nothing else. Do NOT provide any commentary, context, or services."
        f"Score: {best_score:.2f} | Description: {example}"
        #"reply understood if you understand"
    )
        
    


    # Call the chat model to get a new/improved description
        # response = chat(model="llama3.2", messages=messages)
    


    # Extract the new description from the response
    # Assumes the response content is in the format: "Description: ..."
        #example = response['message']['content'].split(':')[-1]

    example = call_gemini(messages)
    print(example)
    # Make sure the response isn't None before processing
    if not example:
        print("No response from Gemini API.")
        break

    #example = response['message']['content'].split(':')[-1]

    # Transform the new description to get it
    example_embedding = transform_embedding.transform(example)
    
    # Predict the score of this new description
    new_score = gbm.predict(example_embedding)
    new_score = float(new_score[0])

    # Print debug information: how the score changed from old to new
    print(f"Iteration {i}: Old Score={best_score:.4f}, New Score={new_score:.4f}")

    # If the new score is better, update our best_score and best_text
    if new_score > best_score:
        best_score = new_score
        best_text = example

    i += 1  # Move to the next iteration

# Once we exit the loop, track the final score
last_score = best_score

# Calculate the percentage improvement over the first/original score
percent_of_change = (((last_score - first_score) / first_score) * 100)

# Print out the old and best descriptions with their corresponding scores
print(f'\nOld description: {first_example}')
print(f"\nBest description: {best_text}")
print(f"\nBest score: {best_score}")
print(f"\nPercent of improvment: {percent_of_change:.3f}%")


Encoding sentences for feature 'product_description'...




Indulge in timeless elegance with this authentic vintage Chanel handbag, crafted from supple black lambskin leather. The iconic gold-tone CC turn-lock closure adds a touch of classic sophistication. Perfectly sized at W 25cm x H 17cm x D 2cm, it's ideal for day or evening. The gracefully long 94cm shoulder strap allows for comfortable over-the-shoulder wear. To ensure lasting quality, the interior lining has been expertly replaced. This exquisite piece is presented with a delicate Chanel ribbon.
Encoding sentences for feature 'product_description'...
Iteration 1: Old Score=0.3502, New Score=0.5059
Experience the allure of enduring style with this authentic vintage Chanel handbag, meticulously crafted from luxuriously soft black lambskin leather. A hallmark of Parisian chic, the iconic gold-tone CC turn-lock closure secures your essentials with a satisfying click and adds an unmistakable touch of classic Chanel sophistication. Perfectly proportioned at W 25cm x H 17cm x D 2cm, this cove

In [20]:
best_score

0.3501659505781453