# Recommendation

## Import all necessary libraries

In [1]:
#!fc-list :lang=zh family

In [2]:
import os
import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

#pd.set_option('display.max_rows', 10)
plt.rcParams['font.sans-serif'] = ['Noto Sans Mono CJK TC', 'sans-serif'] 
plt.rcParams['axes.unicode_minus'] = False

%matplotlib inline

## Load Data

In [3]:
try:
    from google.colab import drive

    # Mount the folder "drive" on google drive to Colab Notebook
    drive.mount('/content/drive')
    path = '/content/drive/My Drive/wids-taipei/2020-WiDS-Taipei-MLCC-Workshop/dataset/*.csv'
except ModuleNotFoundError:
    path = '../data/*.csv'

### Read data

In [4]:
# Read data
filenames = glob.glob(path)
pd_dict = {}

for filename in filenames:
    name = filename.split("/")[-1].split(".")[0]
    pd_dict[name] = pd.read_csv(os.path.join(filename))

purchase_data = pd_dict['customer_purchase_dataset']    

## Item-based Recommendation

In [5]:
purchase_data.head(5)

Unnamed: 0,customer_unique_id,product_sub_category,product_main_category,price,count
0,7ad04c71bfca958e6f2ec44bce34e2da,books_technical,書籍,24.0,1
1,4e58455924b97da4c44477abb0b030a5,cool_stuff,休閒生活,44.9,1
2,1f44054faaecb5ba43ca49625fb81767,auto,3C,24.8,1
3,d8764626d0d43e3f4fb34db9021a46ef,kitchen_dining_laundry_garden_furniture,家居生活,9.6,1
4,960438e3b93de6c449d45491534855a2,computers_accessories,3C,89.8,1


In [6]:
customer_product_data = purchase_data[['customer_unique_id', 'product_main_category', 'count']]
customer_product_data

Unnamed: 0,customer_unique_id,product_main_category,count
0,7ad04c71bfca958e6f2ec44bce34e2da,書籍,1
1,4e58455924b97da4c44477abb0b030a5,休閒生活,1
2,1f44054faaecb5ba43ca49625fb81767,3C,1
3,d8764626d0d43e3f4fb34db9021a46ef,家居生活,1
4,960438e3b93de6c449d45491534855a2,3C,1
...,...,...,...
236830,305adb7f869f2d07fa2170b042abefdf,家居生活,1
236831,49edfe73ea287d715eebfced06b0bea9,3C,1
236832,d87cc9520e3fd47ef88f7098e51afe8a,保健,1
236833,738ffcf1017b584e9d2684b36e07469c,服飾/配件,1


### pivot table

In [7]:
purchase_data_sum = customer_product_data.groupby(['customer_unique_id', 'product_main_category'],
                                                 as_index=False).count()
purchase_data_sum

Unnamed: 0,customer_unique_id,product_main_category,count
0,0000366f3b9a7992bf8c76cfdf3221e2,家居生活,1
1,0000b849f77a49e4a4ce2b2a4ca5be3f,保健,1
2,0000b849f77a49e4a4ce2b2a4ca5be3f,書籍,1
3,0000f46a3911fa3c0805444483337064,保健,1
4,0000f46a3911fa3c0805444483337064,文具,1
...,...,...,...
212524,ffff371b4d645b6ecea244b27531430a,美食,1
212525,ffff5962728ec6157033ef9805bacc48,休閒生活,1
212526,ffffd2657e2aad2907e67c3e9daecbeb,3C,1
212527,ffffd2657e2aad2907e67c3e9daecbeb,休閒生活,1


In [8]:
customer_product_pivot = pd.pivot(purchase_data_sum, 
                                  index='product_main_category',
                                  columns='customer_unique_id',
                                  values='count').fillna(0)
customer_product_pivot

customer_unique_id,0000366f3b9a7992bf8c76cfdf3221e2,0000b849f77a49e4a4ce2b2a4ca5be3f,0000f46a3911fa3c0805444483337064,0000f6ccb0745a6a4b88665a16c9f078,0004aac84e0df4da2b147fca70cf8255,0004bd2a26a76fe21f786e4fbd80607f,00050ab1314c0e55a6ca13cf7181fecf,00053a61a98854899e70ed204dd4bafe,0005e1862207bf6ccc02e4228effd9a0,0005ef4cd20d2893f0d9fbd94d3c0d97,...,fff7219c86179ca6441b8f37823ba3d3,fff96bc586f78b1f070da28c4977e810,fffa431dd3fcdefea4b1777d114144f2,fffb09418989a0dbff854a28163e47c6,fffbf87b7a1a6fa8b03f081c5f51a201,fffcf5a5ff07b0908bd4e2dbc735a684,fffea47cd6d3cc0a88bd621562a9d061,ffff371b4d645b6ecea244b27531430a,ffff5962728ec6157033ef9805bacc48,ffffd2657e2aad2907e67c3e9daecbeb
product_main_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
休閒生活,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
保健,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0
其他,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
商業用途,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
嬰兒用品,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
安全配件,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
家居生活,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
家電,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
文具,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
customer_product_matrix = csr_matrix(customer_product_pivot.values) # size is same as pivot table
model_knn = NearestNeighbors(n_neighbors=5, algorithm='brute', metric='cosine')
model_knn.fit(customer_product_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [10]:
# Select an item that we want to calculate similarity
# it repreasents that 'suppose an item is bought, ......'
#query_index = np.random.choice(customer_product_pivot.shape[0])
query_index = 0

distances, indices = model_knn.kneighbors(
    customer_product_pivot.iloc[query_index,:].values.reshape(1, -1), 
    n_neighbors=4)

for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:'.format(customer_product_pivot.index[query_index]))     
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, customer_product_pivot.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for 3C:
1: 休閒生活, with distance of 0.401609152919837:
2: 服飾/配件, with distance of 0.6214577231454532:
3: 書籍, with distance of 0.7380923681040784:
