In [1]:
from google.colab import drive
drive.mount("/content/drive")

ModuleNotFoundError: No module named 'google.colab'

In [12]:
pip install transformers



In [13]:
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.6.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


In [14]:
pip install sentence-transformers



In [15]:
import time
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel

In [16]:
import torch

In [17]:
if torch.cuda.is_available():
    device = "cuda"
    print("Using GPU!")
else:
    device = "cpu"
    print("Using CPU!")

Using CPU!


In [None]:
df = pd.read_csv("/content/drive/MyDrive/hnm_dataset/articles.csv")
df['detail_desc'].fillna('', inplace=True)
df['combined_text'] = df['prod_name'] + ' ' + df['product_type_name'] + ' ' + df['product_group_name'] + ' ' + df['department_name'] + ' ' + df['index_group_name'] + ' ' + df['colour_group_name'] + ' ' + df['detail_desc']

model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) 

product_texts = df['combined_text'].tolist()
product_embeddings = model.encode(product_texts, convert_to_tensor=True).to(device)

disliked_items = set()

def get_top_recommendations(query, top_k=5, exclude_indices=[]):
    query_embedding = model.encode(query, convert_to_tensor=True).to(device)

    similarities = util.pytorch_cos_sim(query_embedding, product_embeddings)[0]

    top_indices = torch.topk(similarities, k=top_k + len(exclude_indices)).indices.cpu().numpy()

    top_indices = [idx for idx in top_indices if idx not in exclude_indices][:top_k]
    top_products = df.iloc[top_indices]

    return top_products

def display_image(article_id, prod_name, ax):
    article_id_str = str(article_id)
    subfolder = '0' + article_id_str[:2]
    picture_name = '0' + article_id_str
    path = f'/content/drive/MyDrive/hnm_dataset/images/{subfolder}/{picture_name}.jpg'

    if os.path.exists(path):
        img = plt.imread(path)
        ax.imshow(img)
        ax.set_title(f"{article_id} - {prod_name}")
        ax.axis('off')
    else:
        ax.imshow(np.zeros((200, 200, 3)))
        ax.set_title(f"Error: {prod_name} ({article_id}) missing!")
        ax.axis('off')

def get_closest_match(name, top_matches):
    name_embedding = model.encode(name, convert_to_tensor=True).to(device)

    top_names = top_matches['prod_name'].tolist()
    top_name_embeddings = model.encode(top_names, convert_to_tensor=True).to(device)

    similarities = util.pytorch_cos_sim(name_embedding, top_name_embeddings)[0]

    top_index = torch.argmax(similarities).item()

    return top_matches.iloc[top_index]['article_id']

In [19]:
model_name = "gpt2-medium"  
gpt_model = GPT2LMHeadModel.from_pretrained(model_name)
gpt_tokenizer = GPT2Tokenizer.from_pretrained(model_name)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

In [20]:
def generate_fashion_suggestion(prompt, max_length=100):
    """Generate a fashion suggestion using GPT model."""

    input_ids = gpt_tokenizer.encode(prompt, return_tensors="pt")
    output = gpt_model.generate(input_ids, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2)
    generated_text = gpt_tokenizer.decode(output[0], skip_special_tokens=True)

    suggestion = generated_text[len(prompt):]

    return suggestion.strip()

In [21]:
def fashion_bot():
    print("Welcome to the fashion recommendation bot! Ask me for outfit suggestions.")
    while True:
        user_query = input("\nYou: ")
        if user_query.lower() in ["quit", "exit"]:
            print("Fashion bot: Goodbye!")
            break

        exclude_indices = [] 
        top_matches = get_top_recommendations(user_query)


        prompt = f"Suggest an outfit based on: '{user_query}'"
        suggestion = generate_fashion_suggestion(prompt)
        print(f"\nFashion bot: {suggestion}")

        print("\nFashion bot: Here are some suggestions from our wadrobe:\n")
        fig, axes = plt.subplots(1, len(top_matches), figsize=(15, 5))
        for i, (index, row) in enumerate(top_matches.iterrows()):
            display_image(row['article_id'], row['prod_name'], axes[i])
        plt.tight_layout()
        plt.show()
        time.sleep(2)

        while True:
            feedback = input("Would you like to provide feedback on the items? (yes/no) ")
            if feedback.lower() != 'yes':
                break

            disliked_name = input("Type the name of the item you dislike: ")
            closest_match_idx = df.index[df['prod_name'].str.contains(disliked_name, case=False)].tolist()[0]
            exclude_indices.append(closest_match_idx)

            top_matches = get_top_recommendations(user_query, top_k=5, exclude_indices=exclude_indices)

            print("\nFashion bot: Here are new suggestions based on your feedback:\n")
            fig, axes = plt.subplots(1, len(top_matches), figsize=(15, 5))
            for i, (index, row) in enumerate(top_matches.iterrows()):
                display_image(row['article_id'], row['prod_name'], axes[i])
            plt.tight_layout()
            plt.show()
            time.sleep(2)

In [None]:
fashion_bot()