In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS 
from sklearn.metrics.pairwise import cosine_similarity,linear_kernel

In [2]:
DEBUG_MODE="N"

In [28]:
# final simplified version 
# Oct 2021

# here let us get user input
color_choice = "Red"
fabric_choice = "Silk"
pattern_choice = "Solid"
number_of_recommendations=5


In [29]:
result = getRecommendation(color_choice, fabric_choice, pattern_choice, number_of_recommendations)

# what if I have duplicates.. delete them
# sorting by first name
result.sort_values("id", inplace = True)
 
# dropping ALL duplicate values
result.drop_duplicates(subset ="id", keep = False, inplace = True)

['Red Green Gold Silver Blue White Green Gold Red silk Plain Floral Geometric Sequins Embroidered Zari Gotta work Mirror']


In [30]:
# print results
#result

# return data as json for the lambda function
#json = result.to_json(orient ='records')
#print(json)

# show image
from IPython.core.display import Image, display

for ind in result.index:
     print(result['id'][ind], result['desc'][ind])
     img_to_display = result['img_url'][ind]
     display(Image(url=img_to_display, width=300, unconfined=True))

148 Red with Red and Zari brocade on silk


163 Midnight Blue with white and pink bhandhni and gold flower embroidery on silk


165 Navy blue with white and pink bandhani and gold long motif embroidery on silk


214 Fuchia pink with all over gold, purple, green and red motif brocade silk


222 Teal blue with a gold and green wreath motif brocade on silk


In [10]:
if DEBUG_MODE==True:
    print("Vocabulary size: {}".format(len(vect.vocabulary_))) 
    print("Vocabulary content:\n {}".format(vect.vocabulary_))
    print("User Vocabulary size: {}".format(len(user_vect.vocabulary_))) 
    print("User Vocabulary content:\n {}".format(user_vect.vocabulary_))
    print(scores)

In [23]:
def getRecommendation(color_choice, fabric_choice, pattern_choice, number_of_recommendations=2):
    # begin model

    user_choice = []
    
    
    comp_color = getComplimentaryColor(color_choice)
    contrast_color = getContrastColor(color_choice)
    mono_color = getMonoChromaticColor(color_choice)
    fabric_type = getFabricType(fabric_choice)
    pattern_type = getPatternType(pattern_choice)
    
    user_choice.append(color_choice  + " " + comp_color + " "  + contrast_color + " " + mono_color + " " + fabric_type + " " + pattern_type) 
    #user_choice.append( " " + comp_color) 
    print(user_choice)
    
    # read data file
    df=pd.read_csv('./data/updated_saree_data_v2.csv')

    # create model
    # vectorize the data
    vect = TfidfVectorizer(min_df=2, ngram_range=(1,2),stop_words='english')
    vect.fit(df['desc'])
    org = vect.transform(df['desc'])

    # do the same magic to the user words
    user_vect = TfidfVectorizer(ngram_range=(1,2),stop_words='english')
    user_vect.fit(user_choice)
    user = vect.transform(user_choice)

    # perform similarity test
    # perform co-sine similarilty
    cos_sim = map(lambda x: cosine_similarity(user, x), org)
    #cos_sim = map(lambda x: linear_kernel(user, x), org)
    scores = list(cos_sim)

    # get recommendations
    top = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:number_of_recommendations]

    # create a result data frame
    recommendation = pd.DataFrame(columns = ['id', 'desc', 'score','img_url'], dtype=object)

    count = 0
    for i in top:
        recommendation.at[count, 'id'] = df['id'][i]
        recommendation.at[count, 'img_url'] = df['img_url'][i]
        recommendation.at[count, 'desc'] = df['desc'][i]
        recommendation.at[count, 'score'] = "{:.3f}".format(float(scores[i])) #error here?
        count += 1
    
    return recommendation

In [24]:
def getComplimentaryColor(color_name):
    color_name = color_name.lower()
    if color_name == "red":
        comp = 'Green Gold Silver Blue'
    elif color_name == "yellow":
        comp = 'Violet'
    elif color_name == "blue":
        comp = 'Orange'
    elif color_name == "green":
        comp = 'Red Yellow Blue'
    elif color_name == "orange":
        comp = 'Red Yellow Blue'
    elif color_name == "violet":
        comp = 'Yellow Orange Green'
    elif color_name == "pink":
        comp = 'Yellow Green'
    elif color_name == "white":
        comp = 'Black Red Blue'
    elif color_name == "black":
        comp = 'White Red'
    elif color_name == "gray":
        comp = 'Red White Black'
    else:
        comp = 'White'
    
    return comp

def getContrastColor(color_name):
    color_name = color_name.lower()
    if color_name == "red":
        comp = 'White Green Gold'
    elif color_name == "yellow":
        comp = 'Red Blue Green'
    elif color_name == "blue":
        comp = 'Orange Pink White Silver'
    elif color_name == "green":
        comp = 'Red Silver Gold White'
    elif color_name == "orange":
        comp = 'Blue Yellow Purple Pink'
    elif color_name == "violet":
        comp = 'White Pink Red'
    elif color_name == "pink":
        comp = 'Red White Blue Gray'
    elif color_name == "white":
        comp = 'Red Gray Black Pink Blue Green Yellow Silver'
    elif color_name == "black":
        comp = 'White Gold Red Yellow'
    elif color_name == "gray":
        comp = 'Silver White Black Pink Yellow Blue'
    else:
        comp = 'White'
    
    return comp

def getMonoChromaticColor(color_name):
    return color_name



def getFabricType(fabric_choice):
    # Silk
    #Cotton
    #Linen
    #Satin
    #Georgette

    return "silk"

def getPatternType(pattern_choice):
    pattern_choice = pattern_choice.lower()
    if pattern_choice == "solid":
        p_choice="Plain Floral Geometric Sequins Embroidered Zari Gotta work Mirror"

    else:
        p_choice = "Solid"
        
    return p_choice