## Importing packages

In [7]:
import os
import csv

import re
import pymysql
import numpy

from surprise import Dataset
from surprise import Reader
from surprise import SVD

from collections import defaultdict

### Function to get top n recommendations

In [8]:
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

### Retrieving data from table

In [9]:
pr_file_path="C:/apache-tomcat-7.0.34/webapps/HW5_Arockiasamy_Vishal/"
cnx = pymysql.connect(user='root', password='12345',
                              host='127.0.0.1',
                              database='homehub')
cursor = cnx.cursor()

query = ("SELECT Login_ID, Product_ID, Review_Rating FROM transactions where Transaction_Status = 'Approved' and Order_Returned = 0")
cursor.execute(query)

array_list = []
for cus in cursor:
    array_list.append(list(cus))
array_list

[['avishal', 'J1Pa1E-G9HB7a-FEY0AjjA', '5.0'],
 ['avishal', '38Vtawjq7QjTMvJrKecQ9Q', '4.5'],
 ['avishal', '7a5-JFxoAEOZHuVqHMC3qQ', '4.5'],
 ['avishal', 'AkJVp9tEHwmIIB4uE7OndA', '5.0'],
 ['avishal', 'r5Db5G35arj7MkoleDyJCQ143', '5.0'],
 ['manish', 'iGqjf0SUKGrwVPrbMkNB-w130', '5.0'],
 ['manish', 'Bn-__FnS0rq0xXamhtuelQ', '3.5'],
 ['manish', 'U1WVMLpkFl2DcyKnnMorJQ', '2.0'],
 ['manish', 'Rlh9ZxBJhcOrnUZjTGWSvQ180', '5.0'],
 ['manish', '9uizR5S6-w4b5zhOugjL_A', '5.0'],
 ['manish', 'oLeRKgLaHE4km3c8QIfNrg7', '5.0'],
 ['hari', 'XZwRGkwGwG61S9o5oYZnSA', '5.0'],
 ['hari', 'xHnw7rFlQI2OcLUgL8IgGg', '1.5'],
 ['hari', 'OATENevK0cexjZ5qC1q2-A67', '4.5'],
 ['hari', 'C0OO1iYxHZiaGsVeh8dF9Q30', '4.5']]

### Putting data into csv file

In [10]:
# field names  
fields = ['userName', 'productName', 'reviewRating']
    
# name of csv file  
filename = "C:/apache-tomcat-7.0.34/webapps/HW5_Arockiasamy_Vishal/train_data.csv"
    
# writing to csv file  
with open(filename, 'w') as csvfile:   
    csvwriter = csv.writer(csvfile)   
    csvwriter.writerow(fields)  
    csvwriter.writerows(array_list) 

In [11]:
with open(pr_file_path+"/train_data.csv", "r") as f:
    reader = csv.DictReader(f, delimiter=',')
    with open(pr_file_path+"/test_data.csv", "w",newline='') as f_out:
        writer = csv.DictWriter(f_out, fieldnames=reader.fieldnames, delimiter=",")
        for row in reader:
            writer.writerow(row)
            
file_path = os.path.expanduser(pr_file_path+'/test_data.csv')

# As we're loading a custom dataset, we need to define a reader. In the
# movielens-100k dataset, each line has the following format:
# 'user item rating timestamp', separated by '\t' characters.
reader = Reader(line_format='user item rating', sep=',')

### Retrieving top five recommendation for the user

In [13]:
# First train an SVD algorithm on the movielens dataset.
data = Dataset.load_from_file(file_path, reader=reader)
trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)

# Than predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

top_n = get_top_n(predictions, n=5)

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])
    
out = open(pr_file_path+'/MatrixFactorization.csv', 'w',newline='')
output=csv.writer(out)

for uid, user_ratings in top_n.items():
    output.writerow([uid, [iid for (iid, _) in user_ratings]])
    
out.close()

avishal ['Rlh9ZxBJhcOrnUZjTGWSvQ180', 'iGqjf0SUKGrwVPrbMkNB-w130', 'oLeRKgLaHE4km3c8QIfNrg7', 'XZwRGkwGwG61S9o5oYZnSA', '9uizR5S6-w4b5zhOugjL_A']
manish ['7a5-JFxoAEOZHuVqHMC3qQ', 'C0OO1iYxHZiaGsVeh8dF9Q30', 'XZwRGkwGwG61S9o5oYZnSA', 'r5Db5G35arj7MkoleDyJCQ143', 'AkJVp9tEHwmIIB4uE7OndA']
hari ['iGqjf0SUKGrwVPrbMkNB-w130', '9uizR5S6-w4b5zhOugjL_A', 'r5Db5G35arj7MkoleDyJCQ143', 'oLeRKgLaHE4km3c8QIfNrg7', 'Rlh9ZxBJhcOrnUZjTGWSvQ180']


In [22]:
file_path

'C:/apache-tomcat-7.0.34/webapps/HW5_Arockiasamy_Vishal/test_data.csv'