In [58]:
import pandas as pd
import openai
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity

In [59]:
url = 'https://drive.google.com/uc?id=' + '1yVvjvMCBwJ4PUiqkxuzkT-Xqqxdp6wbG'
df = pd.read_csv(url)
df.head()

Unnamed: 0,product_id,product_name,brand_id,brand_name,loves_count,rating,reviews,size,variation_type,variation_value,...,online_only,out_of_stock,sephora_exclusive,highlights,primary_category,secondary_category,tertiary_category,child_count,child_max_price,child_min_price
0,P473671,Fragrance Discovery Set,6342,19-69,6320,3.6364,11.0,,,,...,1,0,0,"['Unisex/ Genderless Scent', 'Warm &Spicy Scen...",Fragrance,Value & Gift Sets,Perfume Gift Sets,0,,
1,P473668,La Habana Eau de Parfum,6342,19-69,3827,4.1538,13.0,3.4 oz/ 100 mL,Size + Concentration + Formulation,3.4 oz/ 100 mL,...,1,0,0,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2,85.0,30.0
2,P473662,Rainbow Bar Eau de Parfum,6342,19-69,3253,4.25,16.0,3.4 oz/ 100 mL,Size + Concentration + Formulation,3.4 oz/ 100 mL,...,1,0,0,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2,75.0,30.0
3,P473660,Kasbah Eau de Parfum,6342,19-69,3018,4.4762,21.0,3.4 oz/ 100 mL,Size + Concentration + Formulation,3.4 oz/ 100 mL,...,1,0,0,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2,75.0,30.0
4,P473658,Purple Haze Eau de Parfum,6342,19-69,2691,3.2308,13.0,3.4 oz/ 100 mL,Size + Concentration + Formulation,3.4 oz/ 100 mL,...,1,0,0,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2,75.0,30.0


In [60]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8494 entries, 0 to 8493
Data columns (total 27 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   product_id          8494 non-null   object 
 1   product_name        8494 non-null   object 
 2   brand_id            8494 non-null   int64  
 3   brand_name          8494 non-null   object 
 4   loves_count         8494 non-null   int64  
 5   rating              8216 non-null   float64
 6   reviews             8216 non-null   float64
 7   size                6863 non-null   object 
 8   variation_type      7050 non-null   object 
 9   variation_value     6896 non-null   object 
 10  variation_desc      1250 non-null   object 
 11  ingredients         7549 non-null   object 
 12  price_usd           8494 non-null   float64
 13  value_price_usd     451 non-null    float64
 14  sale_price_usd      270 non-null    float64
 15  limited_edition     8494 non-null   int64  
 16  new   

## Preprocessing Data

In [61]:
# Dropping the specified columns
columns_to_drop = ['product_id', 'brand_id', 'reviews', 'size', 'variation_type', 'variation_value',
                    'variation_desc', 'ingredients', 'value_price_usd', 'sale_price_usd', 'limited_edition',
                    'new', 'online_only', 'out_of_stock', 'sephora_exclusive', 'child_count', 'child_max_price',
                    'child_min_price']

beauty_product = df.drop(columns=columns_to_drop)
beauty_product = beauty_product.dropna(subset=['highlights', 'rating'])

# Checking the structure after dropping columns
beauty_product.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6054 entries, 0 to 8490
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   product_name        6054 non-null   object 
 1   brand_name          6054 non-null   object 
 2   loves_count         6054 non-null   int64  
 3   rating              6054 non-null   float64
 4   price_usd           6054 non-null   float64
 5   highlights          6054 non-null   object 
 6   primary_category    6054 non-null   object 
 7   secondary_category  6052 non-null   object 
 8   tertiary_category   5321 non-null   object 
dtypes: float64(2), int64(1), object(6)
memory usage: 473.0+ KB


In [20]:
beauty_product['price_idr'] = beauty_product['price_usd'] * 15000
beauty_product = beauty_product.drop(columns='price_usd')

# Fill empty strings for object columns (like ratings, highlights, etc.)
for column in beauty_product.select_dtypes(include=['object']).columns:
  beauty_product[column] = beauty_product[column].fillna('')

# Fill 0 for numerical columns with missing values (if appropriate)
for column in beauty_product.select_dtypes(include=['number']).columns:
  beauty_product[column] = beauty_product[column].fillna(0)

# Check for any remaining missing values:
print(beauty_product.isnull().sum())

product_name          0
brand_name            0
loves_count           0
rating                0
highlights            0
primary_category      0
secondary_category    0
tertiary_category     0
price_idr             0
dtype: int64


In [21]:
beauty_product.head(200)

Unnamed: 0,product_name,brand_name,loves_count,rating,highlights,primary_category,secondary_category,tertiary_category,price_idr
0,Fragrance Discovery Set,19-69,6320,3.6364,"['Unisex/ Genderless Scent', 'Warm &Spicy Scen...",Fragrance,Value & Gift Sets,Perfume Gift Sets,525000.0
1,La Habana Eau de Parfum,19-69,3827,4.1538,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2925000.0
2,Rainbow Bar Eau de Parfum,19-69,3253,4.2500,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2925000.0
3,Kasbah Eau de Parfum,19-69,3018,4.4762,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2925000.0
4,Purple Haze Eau de Parfum,19-69,2691,3.2308,"['Unisex/ Genderless Scent', 'Layerable Scent'...",Fragrance,Women,Perfume,2925000.0
...,...,...,...,...,...,...,...,...,...
271,DIPBROW Gel - Mini,Anastasia Beverly Hills,65374,3.3403,"['Matte Finish', 'Long-wearing', 'Full Coverag...",Makeup,Eye,Eyebrow,135000.0
272,Luminous Foundation,Anastasia Beverly Hills,64069,4.1304,"['Vegan', 'Good for: Dullness/Uneven Texture',...",Makeup,Face,Foundation,570000.0
275,Norvina Pro Pigment Palette Vol. 3 for Face & ...,Anastasia Beverly Hills,51295,4.6010,"['Matte Finish', 'Shimmer Finish', 'Without Pa...",Makeup,Makeup Palettes,,900000.0
276,Superfine Micro-Stroking Detail Brow Pen,Anastasia Beverly Hills,51057,3.9654,"['Waterproof', 'Natural Finish', 'Liquid Formu...",Makeup,Eye,Eyebrow,330000.0


In [23]:
# Save the DataFrame to a JSON file
json_file_path = "beauty_products.json"
beauty_product.to_json(json_file_path, orient='records', indent=4)

print(f"Table data has been saved to {json_file_path}")

Table data has been saved to beauty_products.json


In [None]:
# import random
# import json
# import itertools
# import ast

# class BeautyDialogueGenerator:
#     def __init__(self, df, max_dialogues_per_template=None):
#         self.df = df
#         self.max_dialogues_per_template = max_dialogues_per_template
#         self.user_question_templates = [
#             "Can you give me a {category} product recommendation for {highlights} with a budget around {budget_range}?",
#             "I'm looking for a {category} product that {highlights} within my budget of {budget_range}.",
#             "Recommend a {category} product with {highlights}.",
#             "I want a {category} product that {highlights}.",
#             "What's a good {category} product for {purpose}?",
#             "I need a {category} product for {purpose}.",
#             "I'm searching for a {category} product that matches these criteria: {highlights}",
#             "Looking for a {brand} {category} that suits my style."
#         ]
        
#         self.purpose_mapping = {
#             'Fragrance': ['daily wear', 'special occasion', 'travel', 'gift', 'scent', ],
#             'Makeup': ['professional look', 'evening out', 'minimal makeup', 'bold statement'],
#             'Skincare': ['sensitive skin', 'anti-aging', 'hydration', 'acne prevention', 'dryness', 'pores', 'redness', 'dark circles', 'dullness/uneven texture'],
#             'Hair Care': ['damaged hair', 'volume', 'curly hair', 'color protection', 'scalp', 'frizz']
#         }

#     def clean_highlights(self, highlights):
#         try:
#             if isinstance(highlights, list):
#                 return [h.strip().replace('&', 'and') for h in highlights]
            
#             parsed_highlights = ast.literal_eval(highlights)
#             return [h.strip().replace('&', 'and') for h in parsed_highlights]
#         except (ValueError, SyntaxError, TypeError):
#             return []

#     def determine_budget_range(self, price):
#         if price < 500000:
#             return "under 500k IDR"
#         elif 500000 <= price < 1000000:
#             return "500k-1M IDR"
#         elif 1000000 <= price < 2000000:
#             return "1-2M IDR"
#         elif 2000000 <= price < 3000000:
#             return "2-3M IDR"
#         else:
#             return "over 3M IDR"

#     def generate_product_description(self, row):
#         highlights = self.clean_highlights(row['highlights'])
#         selected_highlights = random.sample(highlights, min(2, len(highlights))) if highlights else []

#         # Template Descriptions
#         description_templates = [
#             (
#                 f"Try {row['product_name']} from {row['brand_name']}, rated {row['rating']}/5 by users! "
#                 f"This product is great for {', '.join(selected_highlights) if selected_highlights else 'various uses'}. "
#                 f"Get it for {row['price_idr']:,} IDR."
#             ),
#             (
#                 f"{row['product_name']} by {row['brand_name']} is a popular choice! "
#                 f"It has a rating of {row['rating']}/5 and is loved for its {', '.join(selected_highlights) if selected_highlights else 'unique features'}. "
#                 f"Price: {row['price_idr']:,} IDR."
#             ),
#             (
#                 f"Looking for something special? Check out {row['product_name']} by {row['brand_name']}. "
#                 f"With a {row['rating']}/5 rating, it's perfect for {', '.join(selected_highlights) if selected_highlights else 'your daily routine'}. "
#                 f"Available at {row['price_idr']:,} IDR."
#             ),
#             (
#                 f"Introducing {row['product_name']} from {row['brand_name']}. "
#                 f"Rated {row['rating']}/5 by beauty enthusiasts, this product is a must-try. "
#                 f"Key benefits: {', '.join(selected_highlights) if selected_highlights else 'its high quality'}. "
#                 f"Price: {row['price_idr']:,} IDR."
#             ),
#             (
#                 f"Get your hands on {row['product_name']} by {row['brand_name']}! "
#                 f"This product is rated {row['rating']}/5 and works well for {', '.join(selected_highlights) if selected_highlights else 'many purposes'}. "
#                 f"Priced at just {row['price_idr']:,} IDR."
#             ),
#         ]

#         # Randomly select a template
#         return random.choice(description_templates)


#     def generate_dialogues(self):
#         all_dialogues = []
        
#         # Shuffle all products to ensure randomness
#         shuffled_products = self.df.sample(frac=1, random_state=42)
        
#         # Cycle through templates
#         template_cycle = itertools.cycle(self.user_question_templates)
        
#         # Track dialogues per template if limit is set
#         template_dialogue_counts = {template: 0 for template in self.user_question_templates}
        
#         for _, product in shuffled_products.iterrows():
#             # Select current template
#             current_template = next(template_cycle)
            
#             # Check template dialogue count limit if set
#             if (self.max_dialogues_per_template is not None and 
#                 template_dialogue_counts[current_template] >= self.max_dialogues_per_template):
#                 continue
            
#             highlights = self.clean_highlights(product['highlights'])
#             selected_highlight = random.choice(highlights) if highlights else 'versatile product'
            
#             # Determine budget range and purpose
#             budget_range = self.determine_budget_range(product['price_idr'])
#             purpose = random.choice(self.purpose_mapping.get(product['primary_category'], ['versatile use']))
            
#             dialogue = {
#                 "messages": [
#                     {
#                         "role": "system", 
#                         "content": "Belle is beauty product recommender system."
#                     },
#                     {
#                         "role": "user", 
#                         "content": current_template.format(
#                             category=product['secondary_category'].lower(),
#                             highlights=selected_highlight.lower(),
#                             brand=product['brand_name'],
#                             budget_range=budget_range,
#                             purpose=purpose
#                         )
#                     },
#                     {
#                         "role": "assistant", 
#                         "content": self.generate_product_description(product)
#                     }
#                 ]
#             }
            
#             all_dialogues.append(dialogue)
            
#             # Increment template dialogue count
#             template_dialogue_counts[current_template] += 1
        
#         return all_dialogues

#     def save_dialogues(self, filename, max_dialogues):
#         dialogues = self.generate_dialogues()
        
#         # Optional: limit number of dialogues if specified
#         if max_dialogues is not None:
#             dialogues = dialogues[:max_dialogues]
        
#         with open(filename, 'w', encoding='utf-8') as f:
#             for dialogue in dialogues:
#                 json.dump(dialogue, f, ensure_ascii=False)
#                 f.write('\n')
        
#         print(f"Saved {len(dialogues)} dialogues to {filename}")
#         return dialogues

# def prepare_dataset(df):
#     quality_df = df[
#         (df['rating'] >= 3.5)   # Rating di atas 3.5
#     ]
    
#     return quality_df


# quality_df = prepare_dataset(beauty_product)

# dialogue_generator = BeautyDialogueGenerator(quality_df)

# dialogue_generator.save_dialogues('beauty_product_dialogues1.jsonl', 200)


Saved 200 dialogues to beauty_product_dialogues1.jsonl


[{'messages': [{'role': 'system',
    'content': 'Belle is beauty product recommender system.'},
   {'role': 'user',
    'content': 'Can you give me a skincare product recommendation for vegan with a budget around under 500k IDR?'},
   {'role': 'assistant',
    'content': 'Get your hands on Mini 100 percent Pure Argan Oil by Josie Maran! This product is rated 4.3975/5 and works well for Cruelty-Free, Vegan. Priced at just 270,000.0 IDR.'}]},
 {'messages': [{'role': 'system',
    'content': 'Belle is beauty product recommender system.'},
   {'role': 'user',
    'content': "I'm looking for a masks product that without parabens within my budget of 500k-1M IDR."},
   {'role': 'assistant',
    'content': 'Umbrian Clay Pore Purifying Face Mask by fresh is a popular choice! It has a rating of 4.5/5 and is loved for its Good for: Pores, Clean at Sephora. Price: 870,000.0 IDR.'}]},
 {'messages': [{'role': 'system',
    'content': 'Belle is beauty product recommender system.'},
   {'role': 'user

In [62]:
from openai import OpenAI
client = OpenAI(api_key="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")

In [23]:
import json

# Define file path
file_path = 'training_beauty.jsonl'

def validate_jsonl_fine_tune(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line_number, line in enumerate(file, start=1):
                try:
                    # Parse each line into JSON
                    obj = json.loads(line.strip())
                    
                    # Check if 'messages' key exists
                    if 'messages' not in obj:
                        raise ValueError(f"Line {line_number}: Missing 'messages' key.")
                    
                    # Validate each message structure
                    for msg_index, message in enumerate(obj['messages']):
                        if 'role' not in message or 'content' not in message:
                            raise ValueError(
                                f"Line {line_number}, Message {msg_index + 1}: Missing 'role' or 'content' key."
                            )
                        
                        # Validate role
                        if message['role'] not in ['system', 'user', 'assistant']:
                            raise ValueError(
                                f"Line {line_number}, Message {msg_index + 1}: Invalid role '{message['role']}'."
                            )
                
                except json.JSONDecodeError as e:
                    raise ValueError(f"Line {line_number}: Invalid JSON. Error: {e}")
        
        print("File validated successfully for conversational fine-tuning.")

    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
    except Exception as e:
        print(f"Validation failed: {e}")

# Run the validation
validate_jsonl_fine_tune(file_path)


File validated successfully for conversational fine-tuning.


## Upload Files

In [24]:
training_file_name = "training_beauty.jsonl"
validation_file_name = "validation_beauty.jsonl"

def upload_file(file_name: str, purpose: str) -> str:
    with open(file_name, "rb") as file_fd:
        response = client.files.create(file=file_fd, purpose=purpose)
    return response.id


training_file_id = upload_file(training_file_name, "fine-tune")
validation_file_id = upload_file(validation_file_name, "fine-tune")

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

2024-12-11 21:07:46,204 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/files "HTTP/1.1 200 OK"
2024-12-11 21:07:47,246 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/files "HTTP/1.1 200 OK"


Training file ID: file-RxXrn88A1C6sYD9HBRM7ur
Validation file ID: file-YWVWbB9KjaqUo5FJ8SMCTk


## Fine Tuning

In [25]:
MODEL = "gpt-4o-2024-08-06"

response = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model=MODEL,
    suffix="beauty-product",
)

job_id = response.id

print("Job ID:", response.id)
print("Status:", response.status)

2024-12-11 21:07:58,842 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job ID: ftjob-KZeGdJDQUVOyDwXRDpI1SMGT
Status: validating_files


In [26]:
response = client.fine_tuning.jobs.retrieve(job_id)

print("Job ID:", response.id)
print("Status:", response.status)
print("Trained Tokens:", response.trained_tokens)

2024-12-11 21:08:07,016 - httpx - INFO - HTTP Request: GET https://api.openai.com/v1/fine_tuning/jobs/ftjob-KZeGdJDQUVOyDwXRDpI1SMGT "HTTP/1.1 200 OK"


Job ID: ftjob-KZeGdJDQUVOyDwXRDpI1SMGT
Status: validating_files
Trained Tokens: None


In [27]:
response = client.fine_tuning.jobs.list_events(job_id)

events = response.data
events.reverse()

for event in events:
    print(event.message)

2024-12-11 21:08:10,475 - httpx - INFO - HTTP Request: GET https://api.openai.com/v1/fine_tuning/jobs/ftjob-KZeGdJDQUVOyDwXRDpI1SMGT/events "HTTP/1.1 200 OK"


Created fine-tuning job: ftjob-KZeGdJDQUVOyDwXRDpI1SMGT
Validating training file: file-RxXrn88A1C6sYD9HBRM7ur and validation file: file-YWVWbB9KjaqUo5FJ8SMCTk


In [28]:
response = client.fine_tuning.jobs.retrieve(job_id)
fine_tuned_model_id = response.fine_tuned_model

if fine_tuned_model_id is None:
    raise RuntimeError(
        "Fine-tuned model ID not found. Your job has likely not been completed yet."
    )

print("Fine-tuned model ID:", fine_tuned_model_id)

2024-12-11 21:08:13,837 - httpx - INFO - HTTP Request: GET https://api.openai.com/v1/fine_tuning/jobs/ftjob-KZeGdJDQUVOyDwXRDpI1SMGT "HTTP/1.1 200 OK"


RuntimeError: Fine-tuned model ID not found. Your job has likely not been completed yet.

## Application

In [11]:
from telegram import Update
from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
import logging
from typing import List, Dict
import ast
import nest_asyncio
import json
import openai
from openai import OpenAI

In [12]:
nest_asyncio.apply()

In [13]:
# Configure logging
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO
)

In [14]:
# Initialize OpenAI and Telegram credentials
TELEGRAM_TOKEN = "7743797514:AAHxYnY_4i93ubtK-yBcDU3WI1cTw0A-6Nc"
openai.api_key = "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"
FINE_TUNED_MODEL_NAME = "ft:gpt-4o-2024-08-06:project-ta:beauty-product:AWJoVA78"
PRE_FINE_TUNE_MODEL_NAME = "gpt-4o-2024-08-06"
client = OpenAI(api_key="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")

In [15]:
user_contexts = {}

## HELLO

In [19]:
# FOR FINE TUNED MODEL
class BelleRecommenderBot:
    def __init__(self):
        self.products = self.load_products()
        self.system_prompt = """You are Belle, a friendly and knowledgeable beauty advisor! Your personality traits are:
1. Enthusiastic and passionate about beauty products
2. Empathetic to users' beauty concerns
3. Well-versed in luxury and drugstore brands
4. Professional but warm and approachable
5. Always explains the "why" behind recommendations

When recommending products:
- Consider the user's specific needs and preferences
- Highlight product benefits and unique features
- Share relevant tips for product usage
- Consider the price point and value for money
- Reference the love count and ratings to build trust
- Organize recommendations by category when giving multiple options
- Always explains the "why" behind recommendations

Remember to maintain a conversational tone while providing expert advice!"""

    def load_products(self) -> List[Dict]:
        try:
            with open('beauty_products.json', 'r', encoding='utf-8') as f:
                products = json.load(f)
            logging.info(f"Successfully loaded {len(products)} products")
            return products
        except FileNotFoundError:
            logging.error("Products database file not found")
            return []
        except json.JSONDecodeError:
            logging.error("Error decoding JSON file")
            return []

    def format_price(self, price_idr: float) -> str:
        """Format price in IDR with thousand separator"""
        return f"IDR {price_idr:,.0f}"

    def parse_highlights(self, highlights_str: str) -> List[str]:
        """Parse highlights string into list"""
        try:
            return ast.literal_eval(highlights_str)
        except:
            return []

    def format_product_recommendation(self, product: Dict) -> str:
        """Format a product recommendation in an engaging way"""
        # Parse highlights
        highlights = self.parse_highlights(product.get('highlights', '[]'))
        highlights_text = '\n• '.join(highlights) if highlights else 'Not specified'
        
        # Format love count
        loves = product.get('loves_count', 0)
        if loves >= 1000:
            loves_text = f"{loves/1000:.1f}K"
        else:
            loves_text = str(loves)
        
        # Create recommendation text
        return f"""
✨ {product['brand_name']} ✨
💄 {product['product_name']}

📝 Product Details:
• Category: {product['primary_category']} > {product['secondary_category']} > {product['tertiary_category']}
• Rating: {'⭐' * int(product.get('rating', 0))} ({product.get('rating', 'N/A')}/5)
• Loved by: {loves_text} beauty enthusiasts

🌟 Key Highlights:
• {highlights_text}

💰 Price: {self.format_price(product['price_idr'])}

"""
    def get_product_by_name(self, product_name: str) -> Dict:
        """Find product by name from the list of products."""
        # for product in self.products:
        #     if product_name.lower() in product['product_name'].lower():
        #         return product
        # return None  # Return None if product is not found
        try:
            # Split the recommendation into lines to handle multi-line responses
            product_lines = product_name.lower().split('\n')
            
            # Look for product names in each line
            for line in product_lines:
                # Clean up the line
                line = line.strip()
                # Skip empty lines
                if not line:
                    continue
                    
                # Remove common prefixes and decorators
                line = line.replace('**', '')
                line = line.replace('*', '')
                for prefix in ['1.', '2.', '3.', '-', '•']:
                    line = line.replace(prefix, '')
                line = line.strip()
                
                # Try to find matching product
                for product in self.products:
                    # Check both product name and brand name
                    if (line in product['product_name'].lower() or 
                        line in product['brand_name'].lower() or
                        any(word in product['product_name'].lower() 
                            for word in line.split() if len(word) > 3)):
                        return product
                            
        except Exception as e:
            logging.error(f"Error in product matching: {str(e)}")
            logging.error(f"Input text was: {product_name}")
        return None

    async def get_recommendation(self, user_input: str, user_id: int) -> str:
        context = user_contexts.get(user_id, [])
        
        # Prepare conversation context
        messages = [
            {"role": "system", "content": self.system_prompt}
        ]
        
        # Add relevant product context
        product_context = f"""
You have access to a beauty product database with the following structure:
- Product name and brand name
- Category hierarchy (Primary > Secondary > Tertiary)
- Price in IDR
- Ratings and love counts from real users
- Product highlights and features

Base your recommendations on these real products and their actual data.
"""
        messages.append({"role": "system", "content": product_context})
        
        # Add conversation history
        for msg in context:
            messages.append(msg)
        
        # Add current user input
        messages.append({"role": "user", "content": user_input})
        
        try:
            response = client.chat.completions.create(
                model=FINE_TUNED_MODEL_NAME,
                messages=messages,
                max_tokens=400,
                temperature=0.7
            )
            
            recommendation = response.choices[0].message.content

            recommended_product = self.get_product_by_name(recommendation)
        
            # Use the `format_product_recommendation` function to format the recommendation
            if recommended_product:
                formatted_recommendation = self.format_product_recommendation(recommended_product)
            else:
                formatted_recommendation = "I couldn't find a product based on your request, could you clarify or try another one?"

            
            # Update conversation history
            context.extend([
                {"role": "user", "content": user_input},
                {"role": "assistant", "content": recommendation}
            ])
            
            # Manage context length
            if len(context) > 10:
                context = context[-10:]
            
            user_contexts[user_id] = context
            
            return recommendation
            
        except Exception as e:
            logging.error(f"Error getting recommendation: {str(e)}")
            return "I'm having a moment! 😅 Could you please try asking me again? I want to make sure I give you the perfect recommendation!"

In [206]:
# FOR PRE FINE TUNED MODEL
class BelleRecommenderBot:
    def __init__(self):
        self.products = self.load_products()
        self.system_prompt = """You are Belle, a friendly and knowledgeable beauty advisor! Your personality traits are:
1. Enthusiastic and passionate about beauty products
2. Empathetic to users' beauty concerns
3. Well-versed in luxury and drugstore brands
4. Professional but warm and approachable
5. Always explains the "why" behind recommendations

When recommending products:
- Consider the user's specific needs and preferences
- Highlight product benefits and unique features
- Share relevant tips for product usage
- Consider the price point and value for money
- Reference the love count and ratings to build trust
- Organize recommendations by category when giving multiple options
- Always explains the "why" behind recommendations

Remember to maintain a conversational tone while providing expert advice!"""

    def load_products(self) -> List[Dict]:
        try:
            with open('beauty_products.json', 'r', encoding='utf-8') as f:
                products = json.load(f)
            logging.info(f"Successfully loaded {len(products)} products")
            return products
        except FileNotFoundError:
            logging.error("Products database file not found")
            return []
        except json.JSONDecodeError:
            logging.error("Error decoding JSON file")
            return []

    def format_price(self, price_idr: float) -> str:
        """Format price in IDR with thousand separator"""
        return f"IDR {price_idr:,.0f}"

    def parse_highlights(self, highlights_str: str) -> List[str]:
        """Parse highlights string into list"""
        try:
            return ast.literal_eval(highlights_str)
        except:
            return []

    def format_product_recommendation(self, product: Dict) -> str:
        """Format a product recommendation in an engaging way"""
        # Parse highlights
        highlights = self.parse_highlights(product.get('highlights', '[]'))
        highlights_text = '\n• '.join(highlights) if highlights else 'Not specified'
        
        # Format love count
        loves = product.get('loves_count', 0)
        if loves >= 1000:
            loves_text = f"{loves/1000:.1f}K"
        else:
            loves_text = str(loves)
        
        # Create recommendation text
        return f"""
✨ {product['brand_name']} ✨
💄 {product['product_name']}

📝 Product Details:
• Category: {product['primary_category']} > {product['secondary_category']} > {product['tertiary_category']}
• Rating: {'⭐' * int(product.get('rating', 0))} ({product.get('rating', 'N/A')}/5)
• Loved by: {loves_text} beauty enthusiasts

🌟 Key Highlights:
• {highlights_text}

💰 Price: {self.format_price(product['price_idr'])}

"""
    def get_product_by_name(self, product_name: str) -> Dict:
        """Find product by name from the list of products."""
        # for product in self.products:
        #     if product_name.lower() in product['product_name'].lower():
        #         return product
        # return None  # Return None if product is not found
        try:
            # Split the recommendation into lines to handle multi-line responses
            product_lines = product_name.lower().split('\n')
            
            # Look for product names in each line
            for line in product_lines:
                # Clean up the line
                line = line.strip()
                # Skip empty lines
                if not line:
                    continue
                    
                # Remove common prefixes and decorators
                line = line.replace('**', '')
                line = line.replace('*', '')
                for prefix in ['1.', '2.', '3.', '-', '•']:
                    line = line.replace(prefix, '')
                line = line.strip()
                
                # Try to find matching product
                for product in self.products:
                    # Check both product name and brand name
                    if (line in product['product_name'].lower() or 
                        line in product['brand_name'].lower() or
                        any(word in product['product_name'].lower() 
                            for word in line.split() if len(word) > 3)):
                        return product
                            
        except Exception as e:
            logging.error(f"Error in product matching: {str(e)}")
            logging.error(f"Input text was: {product_name}")
        return None

    async def get_recommendation(self, user_input: str, user_id: int) -> str:
        context = user_contexts.get(user_id, [])
        
        # Prepare conversation context
        messages = [
            {"role": "system", "content": self.system_prompt}
        ]
        
        # Add relevant product context
        product_context = f"""
You have access to a beauty product database with the following structure:
- Product name and brand name
- Category hierarchy (Primary > Secondary > Tertiary)
- Price in IDR
- Ratings and love counts from real users
- Product highlights and features

Base your recommendations on these real products and their actual data.
"""
        messages.append({"role": "system", "content": product_context})
        
        # Add conversation history
        for msg in context:
            messages.append(msg)
        
        # Add current user input
        messages.append({"role": "user", "content": user_input})
        
        try:
            response = await client.chat.completions.create(
                model=PRE_FINE_TUNE_MODEL_NAME,
                messages=messages,
                max_tokens=400,
                temperature=0.7
            )
            
            recommendation = response.choices[0].message.content

            recommended_product = self.get_product_by_name(recommendation)
        
            # Use the `format_product_recommendation` function to format the recommendation
            if recommended_product:
                formatted_recommendation = self.format_product_recommendation(recommended_product)
            else:
                formatted_recommendation = "I couldn't find a product based on your request, could you clarify or try another one?"

            
            # Update conversation history
            context.extend([
                {"role": "user", "content": user_input},
                {"role": "assistant", "content": recommendation}
            ])
            
            # Manage context length
            if len(context) > 10:
                context = context[-10:]
            
            user_contexts[user_id] = context
            
            return recommendation
            
        except Exception as e:
            logging.error(f"Error getting recommendation: {str(e)}")
            return "I'm having a moment! 😅 Could you please try asking me again? I want to make sure I give you the perfect recommendation!"

In [17]:
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_first_name = update.message.from_user.first_name
    welcome_message = f"""
Hi {user_first_name}! 🌟 I'm Belle, your personal beauty bestie! 

I'm here to help you discover amazing beauty products that are perfect for YOU! I know all about:
🎨 Makeup treasures
✨ Skincare gems
💅 Beauty tools
... and so much more!

I can help you find products based on:
• Your unique beauty needs
• Specific concerns or goals
• Price range that works for you
• Preferred brands or ingredients
• Product categories you're interested in

Plus, I'll share real ratings and love counts from our beauty community! 

What kind of beauty recommendation can I help you with today? 💖
    """
    await update.message.reply_text(welcome_message)

async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_id = update.message.from_user.id
    user_input = update.message.text
    
    await update.message.chat.send_action(action="typing")
    
    recommender = BelleRecommenderBot()
    response = await recommender.get_recommendation(user_input, user_id)
    
    await update.message.reply_text(response)

async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    help_text = """
✨ Let me help you find your perfect beauty matches! ✨

Try asking me things like:
🎯 "I need a color corrector for dark circles"
💫 "What's a good face product from YSL?"
💝 "Show me luxury makeup under 600k IDR"
✨ "What are the most loved makeup products?"
🌟 "Recommend products for natural finish makeup"

Pro Tips:
• Tell me your skin concerns or beauty goals
• Mention your budget range
• Let me know if you prefer specific brands
• Ask about products with high ratings
• Tell me if you want products without certain ingredients

Want to start fresh? Type /reset to clear our chat history!

Ready to discover your new favorite beauty products? Ask away! 💄✨
    """
    await update.message.reply_text(help_text)

async def reset_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_id = update.message.from_user.id
    if user_id in user_contexts:
        del user_contexts[user_id]
    await update.message.reply_text("Fresh start! ✨ What beauty recommendations can I help you with?")


In [21]:
import asyncio
import json

async def main():
    application = Application.builder().token(TELEGRAM_TOKEN).build()

    application.add_handler(CommandHandler("start", start))
    application.add_handler(CommandHandler("help", help_command))
    application.add_handler(CommandHandler("reset", reset_command))
    application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))

    print("💄 Belle Beauty Recommender Bot is ready to help!")
    application.run_polling()

if __name__ == '__main__':
    asyncio.run(main())

💄 Belle Beauty Recommender Bot is ready to help!


2024-12-11 01:28:32,214 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7743797514:AAHxYnY_4i93ubtK-yBcDU3WI1cTw0A-6Nc/getMe "HTTP/1.1 200 OK"
2024-12-11 01:28:32,424 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7743797514:AAHxYnY_4i93ubtK-yBcDU3WI1cTw0A-6Nc/deleteWebhook "HTTP/1.1 200 OK"
2024-12-11 01:28:32,424 - telegram.ext.Application - INFO - Application started
2024-12-11 01:28:37,997 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7743797514:AAHxYnY_4i93ubtK-yBcDU3WI1cTw0A-6Nc/getUpdates "HTTP/1.1 200 OK"
2024-12-11 01:28:38,816 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7743797514:AAHxYnY_4i93ubtK-yBcDU3WI1cTw0A-6Nc/sendChatAction "HTTP/1.1 200 OK"
2024-12-11 01:28:38,856 - root - INFO - Successfully loaded 6054 products
2024-12-11 01:28:43,948 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-11 01:28:44,766 - httpx - INFO - HTTP Request: POST https:/

RuntimeError: Cannot close a running event loop