In [2]:
import spacy
import pandas as pd
import difflib

# Load laptop data from CSV
laptops_df = pd.read_csv("laptops_cleaned.csv")
nlp = spacy.load("en_core_web_lg")

def preprocess_text(sentence):
    """Preprocess the text using spaCy."""
    doc = nlp(sentence)
    # Extract price, RAM, and GPU information
    price = None
    ram = None
    gpu = None
    word_list = sentence.split()
    # Initialize variables to store the words between "and" and "GPU"
    words_between = []
    # Iterate through tokens in the document
    for i, token in enumerate(doc):
        # Check if the token text is 'price'
        if token.text.lower() == '$':
            # Check if the next token resembles a number
            if i + 1 < len(doc) and doc[i + 1].like_num:
                # Extract the numerical value of the next token
                price = float(doc[i + 1].text)
        elif token.text.lower() == 'gb':
            # Check if the next token resembles a number
            if i + 1 < len(doc) and doc[i - 1].like_num:
                # Extract the numerical value of the next token
                ram = int(doc[i - 1].text)

        # # Check if the token text is 'nvidia' or 'amd'
        # if token.text.lower() in ['nvidia', 'amd', 'intel']:
        #     words_between.append(token.text)
        #     for j in range(i + 1, len(doc)):
        #         if doc[j].text.lower() in [".", "and", "gpu", ",", "cpu"]:
        #             break
        #         else:
        #             words_between.append(doc[j].text)
        
        # Check if the token text is 'nvidia' or 'amd'
        if token.text.lower() in ['nvidia', 'amd', 'intel']:
            for j in range(0, len(word_list)):
                if word_list[j].lower() in ['nvidia', 'amd', 'intel']:
                    words_between.append(word_list[j])
                    for k in range(j+1, len(word_list)):
                        if word_list[k].lower() in [".", "and", "gpu", ",", "gpu,", "cpu,", "gpu.", "cpu."]:
                            break
                        else:
                            words_between.append(word_list[k])
                    break 
            if len(words_between) >= 6:
                words_between.clear()
                words_between.append(token.text)
                for j in range(i + 1, len(doc)):
                    if doc[j].text.lower() in [".", "and", "gpu", ",", "cpu"]:
                        break
                    else:
                        words_between.append(doc[j].text)
                

    # Convert the list of words between 'and' and 'GPU' to a string
    if len(words_between) == 0:
        gpu = "None"
    else:
        gpu = ' '.join(words_between)
    # gpu = ' '.join(words_between)
    
    if price == None:
        price = 0.0
        
    if ram == None:
        ram = 0
    return price, ram, gpu

def recommend_laptop_nlp(price, ram, gpu, similar_gpus):
    """Recommend a laptop based on price, RAM, and GPU."""
    # Filter laptops based on user input
    filtered_laptops = laptops_df
    if price:
        filtered_laptops = filtered_laptops[filtered_laptops['Price_euros'] <= price]
    if ram:
        filtered_laptops = filtered_laptops[filtered_laptops['Ram'] >= ram]
    # if gpu:
    #     filtered_laptops = filtered_laptops[filtered_laptops['Gpu'] == gpu]   
    if gpu:
        if not len(similar_gpus) == 0:
            filtered_laptops = filtered_laptops[filtered_laptops['Gpu'].isin(similar_gpus)]
        else:
            similar_gpus1 = []
            words = gpu.split()
            user_gpu = words[0]
            for gpu in gpus_list:
                similarity = difflib.SequenceMatcher(None, user_gpu.lower(), gpu.lower()).ratio()
                if similarity >= 0.4:
                    similar_gpus1.append(gpu)
            filtered_laptops = filtered_laptops[filtered_laptops['Gpu'].isin(similar_gpus1)]
    print(len(filtered_laptops))
    # Sort laptops by price and return the top recommendation
    if not filtered_laptops.empty:
        return filtered_laptops.sort_values(by='Price_euros').iloc[0]
        #return filtered_laptops
    else:
        return "No laptops match the specified criteria."

sample_text = "I want a laptop with a ram of 1GB, around $1200 and Intel HD Graphics 6000 gpu"
sample_price, sample_ram, sample_gpu = preprocess_text(sample_text)

def find_similar_gpu(user_gpu, gpus_list, threshold=0.8):
    """
    Find GPUs in the list that are similar to the user-specified GPU.
    """
    similar_gpus = []
    words = user_gpu.split()
    for gpu in gpus_list:
        similarity = difflib.SequenceMatcher(None, user_gpu.lower(), gpu.lower()).ratio()
        if len(words) == 1:
            if similarity >= 0.3:
                similar_gpus.append(gpu)
        elif len(words) == 2:
            if similarity >= 0.5:
                similar_gpus.append(gpu)
        elif len(words) == 3:
            if similarity >= 0.8:
                similar_gpus.append(gpu)
        else:
            if similarity >= 0.9:
                similar_gpus.append(gpu)
    return similar_gpus

gpus_list = laptops_df['Gpu'].unique().tolist()
print(sample_gpu)
similar_gpus = []
if sample_gpu in gpus_list:
    similar_gpus.append(sample_gpu)
else:
    similar_gpus = find_similar_gpu(sample_gpu, gpus_list)

# if (len(similar_gpus) == 0):
#     similar_gpus.append(sample_gpu)
print("\n",similar_gpus)
# Recommend a laptop based on sample inputs
recommendation = recommend_laptop_nlp(sample_price, sample_ram, sample_gpu, similar_gpus)
print(recommendation)

Intel HD Graphics 6000

 ['Intel HD Graphics 6000']
0
No laptops match the specified criteria.
