# Fipkart-products Recommendation Systems
## Content-Based Recommender Systems

In [1]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

products = pd.read_csv('data/flipkart_com-ecommerce_sample.csv')
products = products[['uniq_id', 'description', 'retail_price']].dropna()


target = products.iloc[:,0].values

count_vector = CountVectorizer()
count_array = count_vector.fit_transform(products['description'])

tfidf        = TfidfTransformer()
tfidf_vector = tfidf.fit_transform(count_array)
tfitf_array  = tfidf_vector.toarray()

from sklearn import preprocessing
scaler = preprocessing.Normalizer().fit(products['retail_price'])
new_retail_price = scaler.transform(products['retail_price'])

new_feature = np.concatenate([tfitf_array, np.reshape(new_retail_price, [-1, 1])], axis=1)



In [2]:
new_feature.shape

(19920, 26387)

In [3]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=3)
neigh.fit(new_feature) 

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=3, p=2, radius=1.0)

## Predict with keyword + price

In [4]:
keyword = 'Women\'s Cycling'
price = 1000

In [5]:
count_array = count_vector.transform([keyword])
p_tfidf_vector = tfidf.transform(count_array)
p_tfitf_array  = p_tfidf_vector.toarray()

new_price = scaler.transform([price])
p_new_feature = np.concatenate([p_tfitf_array, np.reshape([new_price], [-1, 1])], axis=1)



In [6]:
p_data = scaler.transform(p_new_feature) 

In [7]:
distance, best_n = neigh.kneighbors(p_data, return_distance=True) 
distance

array([[ 1.16571646,  1.16717183,  1.16737749]])

In [8]:
best_target = []
for n in best_n:
    best_target.append(target[n])

best_target = list(best_target)
best_target

[array(['0973b37acd0c664e3de26e97e5571454',
        '9aacdecceb404c74abddc513fd2756a8',
        'd95b0456a0350bc42f2393c6e84b0f09'], dtype=object)]

In [9]:
products[products['uniq_id'] == best_target[0][2]]

Unnamed: 0,uniq_id,description,retail_price
15,d95b0456a0350bc42f2393c6e84b0f09,Key Features of Alisha Solid Women's Cycling S...,999.0


In [10]:
products[products['uniq_id'] == best_target[0][1]]

Unnamed: 0,uniq_id,description,retail_price
13,9aacdecceb404c74abddc513fd2756a8,Key Features of Alisha Solid Women's Cycling S...,999.0


In [11]:
products[products['uniq_id'] == best_target[0][0]]

Unnamed: 0,uniq_id,description,retail_price
3,0973b37acd0c664e3de26e97e5571454,Key Features of Alisha Solid Women's Cycling S...,699.0
