In [235]:
#importing all the required libraries
import pandas as pd
import numpy as np
import pickle
from textwrap import wrap
import re

import matplotlib.pyplot as plt
from skimage import io

import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from nltk.tokenize import RegexpTokenizer

from gensim.models.doc2vec import Doc2Vec
from gensim.test.utils import get_tmpfile
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from scipy import sparse

from smart_open import open

In [236]:
'''doc2Vec = Doc2Vec.load("doc2vecModel")
tfidf = pickle.load(open("tfidf_model.pkl", "rb"))
svd = pickle.load(open("svd_model.pkl","rb"))
doc2VecFeatureMatrix = pickle.load(open("doc2vecEmbeddings.pkl","rb"))
df = df = pd.read_pickle("MusicData.pkl")
svdFeatureMatrix = pickle.load(open("lsa_embedding.pkl","rb"))
hal = sia()'''

#Loading all the dumped pickles while training
doc2Vec = Doc2Vec.load("NewModels/doc2vecModel")
tfidf = pickle.load(open("NewModels/tfidf_model.pkl", "rb"))
svd = pickle.load(open("NewModels/svd_model.pkl","rb"))
doc2VecFeatureMatrix = pickle.load(open("NewModels/doc2vecEmbeddings.pkl","rb"))
df = df = pd.read_pickle("MusicData.pkl")
svdFeatureMatrix = pickle.load(open("NewModels/lsa_embedding.pkl","rb"))
hal = sia()

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [237]:
#For preprocessing the text query through chatbot
def clean_message(message):
        message = make_lower_case(message)
        message = remove_stop_words(message)
        message = remove_punctuation(message)
        message = stem_words(message)
        return message

def stem_words(text):
    text = text.split()
    stemmer = SnowballStemmer('english')
    stemmed_words = [stemmer.stem(word) for word in text]
    text = " ".join(stemmed_words)
    return text


def make_lower_case(text):
    return text.lower()


def remove_stop_words(text):
    text = text.split()
    stops = set(stopwords.words("english"))
    text = [w for w in text if not w in stops]
    text = " ".join(text)
    return text


def remove_punctuation(text):
    tokenizer = RegexpTokenizer(r'\w+')
    text = tokenizer.tokenize(text)
    text = " ".join(text)
    return text

In [238]:
#To get sentiments in the search query
def get_message_sentiment(message):
        sentences = re.split('\.|\but',message)
        sentences = [x for x in sentences if x != ""]
        love_message = ""
        hate_message = ""
        for s in sentences:
            sentiment_scores = hal.polarity_scores(s)
            if sentiment_scores['neg'] > 0:
                hate_message = hate_message + s
            else:
                love_message = love_message + s
        return(love_message, hate_message)

In [239]:
#Getting tfidf of the message
def get_message_tfidf_embedding_vector(message):
        #print(message)
        message_array = tfidf.transform([message]).toarray()
        #print(message_array)
        message_array = svd.transform(message_array)
        message_array = message_array[:,0:25].reshape(1, -1)
        #print(message_array)
        return message_array




In [240]:
#infer vector for the search query against the trained doc2vec model
def get_message_doctovec_embedding_vector(message):
        #print(message)
        message_array = doc2Vec.infer_vector(doc_words=message.split(" "), epochs=100)
        message_array = message_array.reshape(1, -1)
        return message_array
#semantic_message_array = get_message_doctovec_embedding_vector(message)

In [241]:
#getting Similarity scores based on the search query and document embedding
def get_similarity_scores(message_array, embeddings):
        cosine_sim_matrix = pd.DataFrame(cosine_similarity(X=embeddings,
                                                           Y=message_array,
                                                           dense_output=True))
        cosine_sim_matrix.set_index(embeddings.index, inplace=True)
        cosine_sim_matrix.columns = ["cosine_similarity"]
        return cosine_sim_matrix

In [242]:
#calculating the ensemble similarity scores

def get_ensemble_similarity_scores(message):
        message = clean_message(message)
        bow_message_array = get_message_tfidf_embedding_vector(message)
        semantic_message_array = get_message_doctovec_embedding_vector(message)

        bow_similarity = get_similarity_scores(bow_message_array, svdFeatureMatrix)
        semantic_similarity = get_similarity_scores(semantic_message_array, doc2VecFeatureMatrix)
        ensemble_similarity = pd.merge(semantic_similarity, bow_similarity, left_index=True, right_index=True)
        ensemble_similarity.columns = ["semantic_similarity", "bow_similarity"]
        ensemble_similarity['ensemble_similarity'] = (ensemble_similarity["semantic_similarity"] + ensemble_similarity["bow_similarity"])/2
        ensemble_similarity.sort_values(by="ensemble_similarity", ascending=False, inplace=True)
        return ensemble_similarity

In [244]:
#calculating the disimilarity scores for the search query
def get_dissimilarity_scores(message):
        message = clean_message(message)
        #print(message)
        bow_message_array = get_message_tfidf_embedding_vector(message)
        semantic_message_array = get_message_doctovec_embedding_vector(message)
        #dissimilarity = get_similarity_scores(semantic_message_array, doc2VecFeatureMatrix)

        dissimilarity = get_similarity_scores(bow_message_array, svdFeatureMatrix)
        dissimilarity.columns = ["dissimilarity"]
        dissimilarity.sort_values(by="dissimilarity", ascending=False, inplace=True)
        return dissimilarity

In [261]:
#function that returns similar instrument based on the searh query
def similar_instruments(message, n):
        love_message, hate_message = get_message_sentiment(message)

        similar_instrument = get_ensemble_similarity_scores(love_message)
        
        dissimilar_instrument = get_dissimilarity_scores(hate_message)
        
        dissimilar_instrument = dissimilar_instrument.query('dissimilarity > .3')
        print(dissimilar_instrument)
        similar_instrument = similar_instrument.drop(dissimilar_instrument.index)
        print(similar_instrument)
        return similar_instrument.head(n)



'similar_instruments("Beginners and young learners alike will appreciate the quality found in this Yamaha C series classical guitar."\n                    +"This quality instrument delivers outstanding cost performance with exceptional playability and tone."\n "The C40 is a full-size nylon-string guitar.", 3)'

In [256]:
#function to plot the  top 3 recommendations as output
def recommendations(similar):
        fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15,5))
        ax = axes.ravel()

        for i in range(len(similar)):
            single_title = similar.index.tolist()[i]
            #print(single_title)
            single_instrument = df.query('Title==@single_title')
            
            name = single_instrument.Title.values[0]
            
            instrument_image = single_instrument.Url.values[0]
            image = io.imread(instrument_image)
            ax[i].imshow(image)
            ax[i].set_yticklabels([])
            ax[i].set_xticklabels([])
            ax[i].set_title("\n".join(wrap(name, 20)))
            ax[i].axis('off')

        #plt.show()

# Chatbot Interface

In [1]:

from ipywidgets import widgets
from IPython.display import clear_output
print("Describe the Music instrument  you are interested in. You can give more detailed description of it! ")
text = widgets.Text()
display(text)
button = widgets.Button(description="Click here to restart!")
display(button)
def handle_submit(sender):
    message = text.value
    display(message)
    print("Please wait till i find relevant recommendations for you!")
    similar = similar_instruments(message, 3)
    recommendations(similar)

def button_click(b):
    #out.clear_output()
    clear_output()
    print("Describe the Music instrument  you are interested in. You can give more detailed description of it! ")
    display(text)
    display(button)


text.on_submit(handle_submit)

button.on_click(button_click)

Describe the Music instrument  you are interested in. You can give more detailed description of it! 


Text(value='')

Button(description='Click here to restart!', style=ButtonStyle())