## INSTRUCTION:

Please download and un-zip all files in the same folder with this notebook.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import nltk
import string
import scipy
from fuzzywuzzy import fuzz 
from fuzzywuzzy import process
import spacy
nlp = spacy.load('en_core_web_md')

import pickle
from keras.models import load_model

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

Using TensorFlow backend.


In [2]:
vectorizer = load_obj("tfidf_vectorizer")
SVD = load_obj("SVD")
merge = pd.read_csv("vectors.csv",index_col=0)
outfit = pd.read_csv("outfit_combinations.csv")

In [3]:
def recommendbyID(id):
    
    #Use fuzzy-matching to find product_id that most similar to the input id
    foundID = process.extractOne(id,outfit['product_id'],scorer=fuzz.token_set_ratio)[0]
    
    #Get all outfit_id for which involve matched product
    outfits = outfit[outfit.product_id==foundID].outfit_id
    
    #Select first outfit
    products = outfit[outfit.outfit_id==outfits.values[0]]
    
    #Formatting output of function
    for i in products.index:
        item_type = products.loc[i,"outfit_item_type"]
        full_name = products.loc[i,"product_full_name"]
        prod_id = products.loc[i,"product_id"]
        print(item_type+": "+full_name+" ("+prod_id+")")
        
        

def preprocess_text(sen):
    # Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sen)

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

    # Removing multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)
    
    #remove stopwords and do lemmatization
    doc = nlp(sentence)
    tokens = [token.lemma_ for token in doc if not token.is_stop]
    
    return " ".join(tokens)



def similarproduct(query):
    query = preprocess_text(query)
    query_vector = vectorizer.transform([query])
    query_vector = SVD.transform(query_vector)
    cos = 0
    for i in range(len(merge)):
        similar = 1 - scipy.spatial.distance.cosine(query_vector, merge.iloc[i,4:])
        if similar > cos:
            cos = similar
            productid = merge.index[i]
    return recommendbyID(productid)

## User Input

In [4]:
# Test Example: Input ID

inputID = "01DMBRYVA2ZFDYRYY5TRQZJTBD"
recommendbyID(inputID)

bottom: Slim Knit Skirt (01DMBRYVA2P5H24WK0HTK4R0A1)
top: Rib Mock Neck Tank (01DMBRYVA2PEPWFTT7RMP5AA1T)
accessory1: medium margaux leather satchel (01DMBRYVA2S5T9W793F4CY41HE)
shoe: Penelope Mid Cap Toe Pump (01DMBRYVA2ZFDYRYY5TRQZJTBD)


In [5]:
# Test Example: Input Description/Brand/Brand Category/Details

inputDescription = "slim fitting, straight leg pant with a center back zipper and slightly cropped leg"
inputBrand = "Reformation"
inputBrandCategory = ""
inputDetails = ""

input_text = inputDescription+" "+inputBrand+" "+inputBrandCategory+" "+inputDetails

similarproduct(input_text)

accessory1: Cassi Belt Bag (01DPEHS0XH9PDD1GH5ZE4P43A2)
bottom: Marlon Pant (01DPKMH0D252JKMAA27MFCT5GM)
top: Jane Sweater (01DPKN20Q3J0BE3CS896DQB6ER)
shoe: Giulia Satin Heel (01DPKNHQDG6GPTKV97CFQRJDHE)
