In [None]:
import os
import pandas as pd
import json
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Filepaths
DATASET_FILE = "/content/temp_cleaned_laptops.csv"
JSON_FILE = "/content/merged_file.json"
MODEL_DIR = "./laptop_recommendation_model"

# Ensure model directory exists
os.makedirs(MODEL_DIR, exist_ok=True)

def preprocess_data(dataset_file, json_file):
    """
    Preprocess dataset and intents for model training

    Args:
        dataset_file (str): Path to laptop dataset CSV
        json_file (str): Path to intents JSON file

    Returns:
        tuple: Processed training dataframe, intent mappings, and original laptop dataframe
    """
    print("\n--- Preprocessing Data ---")

    # Load laptop dataset
    df = pd.read_csv(dataset_file)
    print(f"Dataset loaded with {len(df)} rows.")

    # Load intents
    with open(json_file, 'r') as file:
        intents = json.load(file)["intents"]
    print(f"Loaded intents with {len(intents)} tags.")

    # Create training data
    training_data = []
    intent_to_label = {}
    label_to_intent = {}

    # Map intents to numeric labels
    for idx, intent in enumerate(intents):
        intent_to_label[intent["tag"]] = idx
        label_to_intent[idx] = intent["tag"]

        # Add intent patterns
        for pattern in intent["patterns"]:
            training_data.append({"text": pattern, "label": idx})

    # Add laptop category and price hints as training data
    for _, row in df.iterrows():
        category = row["Category"]
        price = row["Price"]
        training_data.append({
            "text": f"{category} laptop recommendation under ${price}",
            "label": intent_to_label.get("recommendation", 0)
        })

    return pd.DataFrame(training_data), intent_to_label, label_to_intent, df

def compute_metrics(pred):
    """
    Compute evaluation metrics for the model

    Args:
        pred (EvalPrediction): Predictions and labels

    Returns:
        dict: Computed metrics
    """
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    accuracy = accuracy_score(labels, preds)

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def train_intent_model(training_df, num_labels):
    """
    Train BERT model for intent classification

    Args:
        training_df (pd.DataFrame): Training data
        num_labels (int): Number of intent labels

    Returns:
        tuple: Trained model, tokenizer, and trainer
    """
    # Initialize tokenizer
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    # Prepare dataset
    dataset = Dataset.from_pandas(training_df)
    dataset = dataset.map(tokenize_function, batched=True)
    dataset = dataset.train_test_split(test_size=0.2)

    # Set format for training
    dataset["train"].set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
    dataset["test"].set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

    # Initialize model
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",
        num_labels=num_labels
    )

    # Training arguments
    training_args = TrainingArguments(
        output_dir=MODEL_DIR,
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir=f"{MODEL_DIR}/logs",
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1"
    )

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        compute_metrics=compute_metrics
    )

    # Train model
    trainer.train()

    # Save model and tokenizer
    trainer.save_model(MODEL_DIR)
    tokenizer.save_pretrained(MODEL_DIR)

    return model, tokenizer, trainer

def recommend_laptop(prompt, model, tokenizer, intent_to_label, df):
    """
    Generate laptop recommendation based on user prompt

    Args:
        prompt (str): User query
        model (BertForSequenceClassification): Trained intent classification model
        tokenizer (BertTokenizer): Tokenizer used for encoding
        intent_to_label (dict): Mapping of intents to numeric labels
        df (pd.DataFrame): Laptop dataset

    Returns:
        str: Recommended laptop
    """
    # Encode input
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)

    # Predict intent
    with torch.no_grad():
        intent_logits = model(**inputs).logits
        intent_idx = torch.argmax(intent_logits).item()
        intent = list(intent_to_label.keys())[intent_idx]

    # Extract budget if mentioned
    budget = None
    if "under $" in prompt:
        try:
            budget = int(prompt.split("under $")[-1].split()[0])
        except (ValueError, IndexError):
            pass

    # Filter recommendations
    recommendations = df[
        (df["Category"].str.contains(intent.split("_")[0], case=False)) &
        (df["Price"] <= budget) if budget else True
    ]

    return recommendations.iloc[0]["Product"] if not recommendations.empty else "No matching laptops found."

def main():
    """
    Main execution function for laptop recommendation system
    """
    # Preprocess data
    training_df, intent_to_label, label_to_intent, df = preprocess_data(
        DATASET_FILE, JSON_FILE
    )

    # Train model
    model, tokenizer, trainer = train_intent_model(
        training_df,
        len(intent_to_label)
    )

    # Example recommendations
    test_prompts = [
        "Recommend a gaming laptop under $1000",
        "Professional laptop for creative work",
        "Affordable laptop for students"
    ]

    print("\n--- Laptop Recommendations ---")
    for prompt in test_prompts:
        recommendation = recommend_laptop(
            prompt, model, tokenizer, intent_to_label, df
        )
        print(f"Prompt: {prompt}")
        print(f"Recommendation: {recommendation}\n")

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'datasets'

In [None]:
# Corrected commands for installing dependencies and downloading the spaCy model
!pip install nltk spacy torch transformers sentence-transformers pandas numpy scikit-learn optuna tqdm
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m75.4 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import os
import re
import json
import numpy as np
import pandas as pd
import torch
import logging
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
    AutoConfig
)
from torch.utils.data import Dataset as TorchDataset, DataLoader
import torch.nn.functional as F
import random
from typing import List, Tuple, Dict, Any, Optional, Union
import warnings
from tqdm import tqdm
from dataclasses import dataclass
from torch import nn
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer, util
import spacy
from sklearn.metrics import precision_recall_fscore_support
import optuna

# Set up paths for Colab
DATASET_FILE = "/content/temp_cleaned_laptops.csv"
JSON_FILE = "/content/merged_file.json"
MODEL_DIR = "/content/laptop_recommendation_model"
LOG_DIR = "/content/logs"

# Create necessary directories
for directory in [os.path.dirname(DATASET_FILE), MODEL_DIR, LOG_DIR]:
    os.makedirs(directory, exist_ok=True)

# Install required packages and download resources
def setup_environment():
    try:
        # Install required packages
        os.system('pip install -q transformers nltk spacy pandas scikit-learn sentence-transformers torch')

        # Download NLTK resources
        nltk.download('punkt')
        nltk.download('wordnet')
        nltk.download('averaged_perceptron_tagger')

        # Download spaCy model
        os.system('python -m spacy download en_core_web_sm')

        print("Environment setup completed successfully!")
    except Exception as e:
        print(f"Error setting up environment: {e}")

# Set up logging with proper error handling
try:
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(os.path.join(LOG_DIR, "chatbot.log")),
            logging.StreamHandler()
        ]
    )
except Exception as e:
    print(f"Error setting up logging: {e}")
    logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)

# Suppress warnings and logging
warnings.filterwarnings('ignore')
os.environ["WANDB_DISABLED"] = "true"
logging.getLogger("transformers").setLevel(logging.ERROR)

# Configuration optimized for Colab
CONFIG = {
    'max_length': 512,
    'batch_size': 16,  # Increased for Colab's GPU
    'num_epochs': 5,   # Increased for better training
    'warmup_ratio': 0.1,
    'weight_decay': 0.01,
    'learning_rate': 2e-5,
    'dropout': 0.1,
    'focal_loss_alpha': 0.25,
    'focal_loss_gamma': 2.0
}

def setup_nltk():
    """Download required NLTK resources"""
    resources = ['wordnet', 'averaged_perceptron_tagger', 'omw-1.4']
    for resource in resources:
        try:
            nltk.download(resource, quiet=True)
        except Exception as e:
            print(f"Error downloading NLTK resource {resource}: {e}")

# Call setup at import time
setup_nltk()

# Try loading spaCy model
try:
    nlp = spacy.load('en_core_web_sm')
except:
    os.system('python -m spacy download en_core_web_sm')
    nlp = spacy.load('en_core_web_sm')

@dataclass
class LaptopSpecs:
    ram: Optional[int] = None
    gpu: Optional[str] = None
    storage: Optional[str] = None
    refresh_rate: Optional[int] = None
    processor: Optional[str] = None
    screen_size: Optional[float] = None
    battery_life: Optional[int] = None
    weight: Optional[float] = None

def extract_specs(features_str: str) -> LaptopSpecs:
    """Enhanced specification extraction with more features"""
    specs = LaptopSpecs()

    # Enhanced RAM extraction
    ram_match = re.search(r'(\d+)\s*GB\s*(?:DDR\d)?\s*RAM', features_str, re.IGNORECASE)
    if ram_match:
        specs.ram = int(ram_match.group(1))

    # Enhanced GPU extraction with more patterns
    gpu_patterns = [
        r'(NVIDIA|AMD)\s+(RTX|GTX|Radeon)\s*\d*\s*\w*\s*(?:Ti|Super)?',
        r'Intel\s+(?:UHD|Iris)\s+Graphics\s+\w*'
    ]
    for pattern in gpu_patterns:
        gpu_match = re.search(pattern, features_str, re.IGNORECASE)
        if gpu_match:
            specs.gpu = gpu_match.group(0)
            break

    # Enhanced storage extraction
    storage_matches = re.findall(r'(\d+)\s*(GB|TB)\s*(SSD|HDD|NVMe|PCIe)', features_str, re.IGNORECASE)
    if storage_matches:
        storage_list = []
        for size, unit, type_ in storage_matches:
            size = int(size)
            if unit.upper() == 'TB':
                size *= 1000
            storage_list.append(f"{size}GB {type_.upper()}")
        specs.storage = ', '.join(storage_list)

    # Enhanced refresh rate extraction
    refresh_match = re.search(r'(\d+)\s*Hz', features_str, re.IGNORECASE)
    if refresh_match:
        specs.refresh_rate = int(refresh_match.group(1))

    # New: Processor extraction
    processor_match = re.search(r'(Intel|AMD)\s+\w+\s+\w+(?:-\w+)?', features_str)
    if processor_match:
        specs.processor = processor_match.group(0)

    # New: Screen size extraction
    screen_match = re.search(r'(\d+\.?\d?)"', features_str)
    if screen_match:
        specs.screen_size = float(screen_match.group(1))

    # New: Battery life extraction
    battery_match = re.search(r'(\d+)\s*(?:hr|hour|hrs)', features_str, re.IGNORECASE)
    if battery_match:
        specs.battery_life = int(battery_match.group(1))

    # New: Weight extraction
    weight_match = re.search(r'(\d+\.?\d?)\s*(?:kg|lbs)', features_str, re.IGNORECASE)
    if weight_match:
        weight = float(weight_match.group(1))
        if 'lbs' in weight_match.group().lower():
            weight *= 0.453592  # Convert to kg
        specs.weight = weight

    return specs

class EnhancedScoring:
    @staticmethod
    def compute_laptop_score(row: pd.Series, requirements: str, required_specs: Dict) -> float:
        """Enhanced scoring system with weighted features and dynamic scoring"""
        score = 0.0
        features = extract_specs(row['Features'])

        # Base score components
        if 'Amazon Rating' in row and not pd.isna(row['Amazon Rating']):
            score += float(row['Amazon Rating']) * 15  # Increased weight for ratings

        # Price efficiency bonus
        if 'Price' in row and not pd.isna(row['Price']):
            price = float(row['Price'])
            if price > 0:
                price_efficiency = 1000 / price  # Normalized price efficiency
                score += price_efficiency * 10

        # Dynamic requirement scoring
        req_lower = requirements.lower()

        # Gaming laptop scoring with enhanced GPU considerations
        if 'gaming' in req_lower:
            if features.gpu:
                gpu_upper = features.gpu.upper()
                if 'RTX 30' in gpu_upper or 'RTX 40' in gpu_upper:
                    score += 50
                elif 'RTX 20' in gpu_upper:
                    score += 40
                elif 'RTX' in gpu_upper:
                    score += 30
                elif 'GTX' in gpu_upper:
                    score += 20

            if features.refresh_rate:
                if features.refresh_rate >= 240:
                    score += 25
                elif features.refresh_rate >= 144:
                    score += 20
                elif features.refresh_rate >= 120:
                    score += 15

            if features.ram:
                if features.ram >= 32:
                    score += 30
                elif features.ram >= 16:
                    score += 20

        # Professional/Business laptop scoring with enhanced criteria
        if any(x in req_lower for x in ['business', 'professional', 'work']):
            if features.weight and features.weight < 1.5:
                score += 25
            elif features.weight and features.weight < 2.0:
                score += 15

            if features.battery_life:
                if features.battery_life >= 10:
                    score += 25
                elif features.battery_life >= 8:
                    score += 15

            if features.processor and ('Intel Core i7' in features.processor or 'Ryzen 7' in features.processor):
                score += 20

        # Student laptop scoring
        if 'student' in req_lower:
            if features.weight and features.weight < 2.0:
                score += 20
            if features.battery_life and features.battery_life >= 8:
                score += 20
            if 'Price' in row and row['Price'] < 800:
                score += 25

        # Content creation scoring
        if any(x in req_lower for x in ['video editing', 'content creation', 'creative']):
            if features.ram and features.ram >= 32:
                score += 30
            if features.gpu and 'RTX' in features.gpu.upper():
                score += 25
            if features.processor and any(x in features.processor for x in ['i7', 'i9', 'Ryzen 7', 'Ryzen 9']):
                score += 25

        # Required specs matching with weighted scoring
        for spec, required_value in required_specs.items():
            if spec == 'ram' and features.ram:
                if features.ram >= required_value:
                    score += 30
                elif features.ram >= required_value * 0.75:
                    score += 15
            elif spec == 'gpu' and features.gpu:
                if required_value.upper() in features.gpu.upper():
                    score += 30
            elif spec == 'storage' and features.storage:
                storage_sizes = [int(s.split('GB')[0]) for s in features.storage.split(',')]
                if any(size >= required_value for size in storage_sizes):
                    score += 20

        return score

class CustomModel(nn.Module):
    def __init__(self, base_model, num_labels: int):
        super().__init__()
        self.base_model = base_model
        self.dropout = nn.Dropout(CONFIG['dropout'])
        self.classifier = nn.Linear(base_model.config.hidden_size, num_labels)

    def forward(self, **inputs):
        outputs = self.base_model(**inputs)
        pooled_output = outputs.last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

    def gradient_checkpointing_enable(self, **kwargs):
        # Pass all keyword arguments to the base model's method
        self.base_model.gradient_checkpointing_enable(**kwargs)

class LaptopDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

class EnhancedLaptopRecommendationModel:
    def __init__(self, model_name: str = 'microsoft/deberta-v3-base'):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.label_encoder = LabelEncoder()
        self.tokenizer = None
        self.model = None
        self.model_name = model_name
        self.sentence_transformer = SentenceTransformer('all-MiniLM-L6-v2')
        self.intent_labels = {
            'greeting': 0,
            'gaming_laptop': 1,
            'business_laptop': 2,
            'programming_laptop': 3,
            'compare_laptops': 4,
            'high_performance_laptop': 5
        }

    def prepare_training_data(self) -> Tuple[List[str], List[int]]:
        """Prepare training data from JSON file"""
        try:
            with open(JSON_FILE, 'r') as f:
                data = json.load(f)

            texts = []
            labels = []

            for intent in data['intents']:
                tag = intent['tag']
                if tag in self.intent_labels:
                    label_id = self.intent_labels[tag]
                    patterns = intent['patterns']
                    texts.extend(patterns)
                    labels.extend([label_id] * len(patterns))

            if not texts or not labels:
                raise ValueError("No training data found in JSON file")

            return texts, labels

        except Exception as e:
            logger.error(f"Error preparing training data: {e}")
            raise

    def train_model(self):
        """Train the laptop recommendation model"""
        try:
            logger.info("Starting model training...")

            # Load laptop dataset
            laptop_df = pd.read_csv(DATASET_FILE)
            if laptop_df.empty:
                raise ValueError("Empty dataset loaded")

            logger.info(f"Loaded dataset with {len(laptop_df)} rows")

            # Load and preprocess intent data
            texts, labels = self.prepare_training_data()
            logger.info(f"Prepared {len(texts)} training examples with {len(set(labels))} unique intents")

            if len(texts) == 0 or len(labels) == 0:
                raise ValueError("No training data available")

            # Split data into train and validation sets
            train_texts, val_texts, train_labels, val_labels = train_test_split(
                texts, labels, test_size=0.2, random_state=42
            )

            # Initialize tokenizer and model
            self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
            self.model = AutoModelForSequenceClassification.from_pretrained(
                'bert-base-uncased',
                num_labels=len(self.intent_labels)
            )

            # Move model to GPU if available
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            self.model.to(device)
            logger.info(f"Using device: {device}")

            # Create datasets
            train_dataset = self.create_dataset(train_texts, train_labels)
            val_dataset = self.create_dataset(val_texts, val_labels)

            logger.info(f"Train dataset size: {len(train_dataset)}")
            logger.info(f"Validation dataset size: {len(val_dataset)}")

            # Set up training arguments
            training_args = TrainingArguments(
                output_dir=MODEL_DIR,
                num_train_epochs=CONFIG['num_epochs'],
                per_device_train_batch_size=CONFIG['batch_size'],
                per_device_eval_batch_size=CONFIG['batch_size'],
                warmup_ratio=CONFIG['warmup_ratio'],
                weight_decay=CONFIG['weight_decay'],
                learning_rate=CONFIG['learning_rate'],
                evaluation_strategy="epoch",
                save_strategy="epoch",
                load_best_model_at_end=True,
                push_to_hub=False,
                fp16=torch.cuda.is_available(),
                report_to="none",
                logging_dir=os.path.join(LOG_DIR, 'runs'),
                logging_steps=10
            )

            # Initialize trainer
            trainer = Trainer(
                model=self.model,
                args=training_args,
                train_dataset=train_dataset,
                eval_dataset=val_dataset
            )

            # Train model
            logger.info("Training started...")
            trainer.train()
            logger.info("Training completed!")

            # Save model and tokenizer
            self.model.save_pretrained(MODEL_DIR)
            self.tokenizer.save_pretrained(MODEL_DIR)
            logger.info(f"Model and tokenizer saved to {MODEL_DIR}")

            # Save intent labels mapping
            with open(os.path.join(MODEL_DIR, 'intent_labels.json'), 'w') as f:
                json.dump(self.intent_labels, f)

            # Evaluate model
            eval_results = trainer.evaluate()
            logger.info(f"Evaluation results: {eval_results}")

            return laptop_df

        except Exception as e:
            logger.error(f"Error during model training: {e}")
            raise

    def create_dataset(self, texts: List[str], labels: List[int]) -> Dataset:
        """Create a dataset from texts and labels"""
        encodings = self.tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=CONFIG['max_length'],
            return_tensors='pt'
        )

        return LaptopDataset(
            {k: v.numpy() for k, v in encodings.items()},
            labels
        )

    def advanced_text_augmentation(self, texts: List[str], labels: List[str]) -> Tuple[List[str], List[str]]:
        """Advanced text augmentation with multiple techniques"""
        augmented_data = []

        for text, label in tqdm(zip(texts, labels), desc="Augmenting data"):
            try:
                augmented_data.append((text, label))

                # Simple word-based augmentation
                words = text.split()

                # Synonym replacement
                for i, word in enumerate(words):
                    synsets = wordnet.synsets(word)
                    if synsets:
                        synonyms = []
                        for syn in synsets:
                            for lemma in syn.lemmas():
                                if lemma.name() != word:
                                    synonyms.append(lemma.name())
                        if synonyms:
                            new_words = words.copy()
                            new_words[i] = random.choice(synonyms)
                            augmented_data.append((' '.join(new_words), label))

                # Word dropout (randomly remove words)
                if len(words) > 3:
                    dropped = [w for w in words if random.random() > 0.1]
                    if dropped:
                        augmented_data.append((' '.join(dropped), label))

                # Word order perturbation
                if len(words) > 4:
                    mid_words = words[1:-1]
                    random.shuffle(mid_words)
                    perturbed = [words[0]] + mid_words + [words[-1]]
                    augmented_data.append((' '.join(perturbed), label))

            except Exception as e:
                logger.warning(f"Error augmenting text '{text}': {e}")
                continue

        augmented_texts, augmented_labels = zip(*augmented_data)
        return list(augmented_texts), list(augmented_labels)

    def recommend_laptop(self, prompt: str, laptop_df: pd.DataFrame) -> str:
        """
        Generate laptop recommendations based on user prompt
        """
        if not self.model or not self.tokenizer:
            raise ValueError("Model not trained or loaded.")

        try:
            # Tokenize input
            inputs = self.tokenizer(
                prompt,
                truncation=True,
                padding=True,
                max_length=CONFIG['max_length'],
                return_tensors='pt'
            )

            # Move inputs to the same device as model
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

            # Get prediction
            with torch.no_grad():
                outputs = self.model(**inputs)
                predicted_intent = torch.argmax(outputs.logits, dim=1).item()

            intent_label = list(self.intent_labels.keys())[list(self.intent_labels.values()).index(predicted_intent)]

            # Filter laptops based on intent and prompt
            filtered_df = self.filter_laptops(laptop_df, intent_label, prompt)

            if filtered_df.empty:
                return "Sorry, I couldn't find any laptops matching your criteria."

            # Format recommendations
            recommendations = []
            for _, laptop in filtered_df.iterrows():
                # Extract key features from the Features column
                features = laptop['Features'].split(',') if isinstance(laptop['Features'], str) else []
                features_formatted = "\n  • ".join(features)

                rec = (
                    f"Product: {laptop['Product']}\n"
                    f"Category: {laptop['Category']}\n"
                    f"Price: ${laptop['Price']:,.2f}\n"
                    f"Features:\n  • {features_formatted}\n"
                )

                # Add rating if available
                if 'Amazon Rating' in laptop and pd.notnull(laptop['Amazon Rating']):
                    rec += f"Rating: {laptop['Amazon Rating']}/5.0\n"

                recommendations.append(rec)

            # Add a header based on the intent
            headers = {
                'gaming_laptop': "🎮 Gaming Laptop Recommendations:",
                'business_laptop': "💼 Professional Laptop Recommendations:",
                'programming_laptop': "💻 Programming Laptop Recommendations:",
                'high_performance_laptop': "⚡ High-Performance Laptop Recommendations:"
            }

            header = headers.get(intent_label, "🔍 Laptop Recommendations:")
            return f"{header}\n\n" + "\n---\n".join(recommendations)

        except Exception as e:
            logger.error(f"Error generating recommendation: {e}")
            return f"Sorry, an error occurred while generating recommendations: {str(e)}"

    def filter_laptops(self, df, intent, prompt):
        """Filter laptops based on intent and user prompt"""
        filtered_df = df.copy()

        # Convert price to numeric, removing currency symbols and commas
        filtered_df['Price'] = pd.to_numeric(filtered_df['Price'].astype(str).str.replace('$', '').str.replace(',', ''), errors='coerce')

        # Extract budget from prompt if present
        budget_match = re.search(r'(?:under|below|less than|up to)?\s*\$?(\d+(?:,\d{3})*(?:\.\d{2})?)', prompt, re.IGNORECASE)
        budget = float(budget_match.group(1).replace(',', '')) if budget_match else float('inf')

        if budget != float('inf'):
            filtered_df = filtered_df[filtered_df['Price'] <= budget]

        # Video editing specific filters
        if 'video' in intent.lower() or 'creative' in intent.lower():
            filtered_df = filtered_df[
                # Require dedicated GPU
                (filtered_df['Features'].str.contains('RTX 30|RTX 40|RX 6800', case=False, regex=True)) &
                # Prefer higher resolution displays
                (filtered_df['Features'].str.contains('QHD|4K|2K|1440p', case=False, regex=True))
            ]
            # Extract RAM size and filter for 32GB or higher
            filtered_df['RAM_Size'] = filtered_df['Features'].str.extract('(\d+)\s*GB').astype(float)
            filtered_df = filtered_df[filtered_df['RAM_Size'] >= 32]

        # High performance specific filters
        elif 'high' in intent.lower() and 'performance' in intent.lower():
            filtered_df = filtered_df[
                # Premium price range
                (filtered_df['Price'] >= 2000) &
                # High-end processors
                (filtered_df['Features'].str.contains('i9|Ryzen 9|M2 Max', case=False, regex=True)) &
                # Dedicated GPU
                (filtered_df['Features'].str.contains('RTX|RX', case=False, regex=True))
            ]
            # Extract RAM size and filter for 32GB or higher
            filtered_df['RAM_Size'] = filtered_df['Features'].str.extract('(\d+)\s*GB').astype(float)
            filtered_df = filtered_df[filtered_df['RAM_Size'] >= 32]

        # Gaming specific filters
        elif 'gaming' in intent.lower():
            filtered_df = filtered_df[
                filtered_df['Features'].str.contains('RTX 30|RTX 40|RX 6800', case=False, regex=True)
            ]
            # High refresh rate for gaming
            filtered_df['Refresh_Rate'] = filtered_df['Features'].str.extract('(\d+)\s*Hz').astype(float)
            filtered_df = filtered_df[filtered_df['Refresh_Rate'] >= 144]

        # Student/Budget specific filters
        elif 'student' in intent.lower() or 'budget' in intent.lower():
            max_price = min(float(budget if budget != float('inf') else 1000), 1000)
            filtered_df = filtered_df[filtered_df['Price'] <= max_price]

        return filtered_df.sort_values(['Amazon Rating', 'Price'], ascending=[False, True]).head(5)

    def score_laptop(self, row, prompt):
        """Score a laptop based on its features and the user's prompt"""
        score = float(row['Amazon Rating']) * 10  # Base score from rating

        features = str(row['Features']).lower()
        prompt = prompt.lower()

        # Extract key specifications
        ram_match = re.search(r'(\d+)\s*GB', features)
        ram_size = int(ram_match.group(1)) if ram_match else 8

        refresh_match = re.search(r'(\d+)\s*Hz', features)
        refresh_rate = int(refresh_match.group(1)) if refresh_match else 60

        # GPU scoring
        if 'RTX 40' in features:
            score += 40  # Latest gen GPU
        elif 'RTX 30' in features:
            score += 30  # Previous gen GPU
        elif 'RX 6800' in features:
            score += 25  # AMD high-end GPU

        # RAM scoring (exponential up to 64GB)
        score += min(ram_size / 8, 8) * 5

        # Display scoring
        if 'QHD' in features or '2K' in features or '1440p' in features:
            score += 15
        elif '4K' in features:
            score += 20

        # Refresh rate scoring
        score += min(refresh_rate / 60, 4) * 5

        # CPU scoring
        if 'i9' in features or 'ryzen 9' in features:
            score += 25
        elif 'i7' in features or 'ryzen 7' in features:
            score += 20

        # Storage scoring
        if 'PCIe' in features or 'NVMe' in features:
            score += 15
        if '2 TB' in features:
            score += 20
        elif '1 TB' in features:
            score += 15

        # Task-specific scoring
        if 'video' in prompt or 'creative' in prompt:
            if ram_size >= 32:
                score += 30
            if 'RTX' in features:  # NVIDIA GPUs better for creative work
                score += 20
        elif 'gaming' in prompt:
            if refresh_rate >= 144:
                score += 25
            if ram_size >= 16:
                score += 15
        elif 'high' in prompt and 'performance' in prompt:
            if ram_size >= 32:
                score += 30
            if 'i9' in features or 'ryzen 9' in features:
                score += 25

        return score

def main():
    try:
        # Initialize the recommendation model
        recommendation_model = EnhancedLaptopRecommendationModel()

        # Train the model
        laptop_df = recommendation_model.train_model()

        # Example recommendations
        test_prompts = [
            "Recommend a gaming laptop under $1000",
            "Professional laptop for creative work",
            "Budget laptop for students",
            "High-performance laptop for video editing"
        ]

        for prompt in test_prompts:
            recommendation = recommendation_model.recommend_laptop(prompt, laptop_df)
            print(f"\nPrompt: {prompt}")
            print(f"Recommendation:\n{recommendation}")

    except Exception as e:
        logger.error(f"Error in main execution: {e}")
        raise

if __name__ == "__main__":
    main()

{'eval_loss': 1.726220726966858, 'eval_runtime': 0.0676, 'eval_samples_per_second': 295.924, 'eval_steps_per_second': 29.592, 'epoch': 1.0}
{'loss': 1.7411, 'grad_norm': 6.103487014770508, 'learning_rate': 1.3636363636363637e-05, 'epoch': 2.0}
{'eval_loss': 1.6626465320587158, 'eval_runtime': 0.0287, 'eval_samples_per_second': 696.092, 'eval_steps_per_second': 69.609, 'epoch': 2.0}
{'eval_loss': 1.5907714366912842, 'eval_runtime': 0.0701, 'eval_samples_per_second': 285.394, 'eval_steps_per_second': 28.539, 'epoch': 3.0}
{'loss': 1.5119, 'grad_norm': 6.250085353851318, 'learning_rate': 5.4545454545454545e-06, 'epoch': 4.0}
{'eval_loss': 1.510888695716858, 'eval_runtime': 0.039, 'eval_samples_per_second': 512.557, 'eval_steps_per_second': 51.256, 'epoch': 4.0}
{'eval_loss': 1.489843726158142, 'eval_runtime': 0.0344, 'eval_samples_per_second': 581.275, 'eval_steps_per_second': 58.127, 'epoch': 5.0}
{'train_runtime': 163.1476, 'train_samples_per_second': 2.452, 'train_steps_per_second': 0.