# Recommender System with Scikit Surprise SVD Model (Option 1)

In [3]:
import pandas as pd
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
import csv

In [None]:
# Unzip json data.
def unzip_json(filename):
    
    print('Unzipping json file...')
    
    unzipped_data = pd.read_json(gzip.open(filename), lines=True)
    
    return unzipped_data

In [None]:
unzip_json('reviews.training.json.gz')

In [None]:
# Output json training data as a Pandas dataframe.
def json_to_df(file_name):

    print('Converting json file to dataframe...')

    try:
        training_data = pd.read_json(file_name, lines=True)
        return training_data
    except:
        print('Please try another file name.')

        return None

In [None]:
training_df = json_to_df('reviews.training.json')

In [None]:
def convert_to_csv(dataframe, desired_filename):

    print('Converting dataframe to csv: ' + desired_filename + '...')

    try:
        return dataframe.to_csv(desired_filename, index=False)
    except:
        print('Please try another dataframe or file name.')

    return None

In [None]:
# Training file to CSV
convert_to_csv(training_df[['reviewerID', 'asin', 'overall']], 'reviews.training.shortened.csv')

In [4]:
product_info_df = pd.read_csv('reviews.training.csv')

In [5]:
product_info_df.head()

Unnamed: 0,reviewerID,asin,overall
0,AMFIPCYDYWGVT,B0090SI56Y,4
1,A3G602Z4DWDZKS,B00005JL99,5
2,A33BOYMVG3U58Y,B00109KN0M,5
3,ANEDXRFDZDL18,B00005JMPT,5
4,A1VN7IS16PY024,B00005AAA9,4


In [11]:
# Surprise Reader object.
reader = Reader(rating_scale=(1,5))

In [13]:
# Load data from dataframe.
data = Dataset.load_from_df(product_info_df, reader)

In [14]:
# Split the training data into a training and test set.
trainset, testset = train_test_split(data, test_size=.25)

In [15]:
# Build an SVD model.
model = SVD(n_factors=100)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x10707dbe0>

In [18]:
# Get right term
model.qi.shape

(49958, 100)

In [19]:
# Create movie title to vector index.
item_to_row_idx: dict() = model.trainset._raw2inner_id_items

In [20]:
# display(item_to_row_idx)

In [21]:
# Example product
B00FZM8Z7I_row_idx: int = item_to_row_idx['B00FZM8Z7I']

In [22]:
# Retrieve for example product.
model.qi[B00FZM8Z7I_row_idx]

array([ 1.41288895e-02, -2.35135568e-01,  3.69059200e-02,  1.76900962e-01,
        8.42053233e-02,  5.04901483e-02, -2.03473186e-01,  3.04355057e-01,
        2.81804427e-01,  3.13461231e-01, -1.73493454e-01, -2.41344800e-01,
       -2.78478097e-02,  2.41105261e-01,  3.08652211e-01, -2.39874775e-01,
        2.31400998e-01, -2.58180490e-01,  1.79929186e-01,  1.71072044e-01,
       -4.11862476e-01,  2.00915120e-01,  9.51611720e-02, -1.25761975e-02,
       -2.25683871e-01, -1.41079589e-01,  1.37814979e-01, -2.95477558e-01,
        2.52248801e-02, -1.22171242e-01, -1.00206810e-01, -1.04153978e-01,
        1.61086384e-01,  2.07255157e-01,  2.39464245e-01,  4.75921273e-02,
       -1.86969807e-01,  3.77990759e-02,  2.28628087e-03,  1.33699366e-01,
        9.24115763e-02, -1.89383260e-01,  9.94011410e-02,  3.19276582e-01,
        1.92084208e-01,  2.68771853e-01, -8.57109902e-03, -1.28815001e-02,
       -1.20220783e-01, -2.15844853e-01, -2.15876901e-01,  2.28814891e-05,
       -4.90951881e-01,  

In [24]:
# Example user
a_user = 'AMFIPCYDYWGVT'

In [25]:
# Example product
a_product = 'B0090SI56Y'

In [26]:
# Test prediction
prediction = model.predict(a_user, a_product)
prediction[3]

3.9732198756480486

In [27]:
with open('reviews.test.unlabeled.csv', 'r') as test_file:
    test_reader = csv.reader(test_file, delimiter=',')
    next(test_reader, None)
    with open('reviews.test.labeled.csv', 'w') as outfile:
        outfile_reader = csv.writer(outfile, delimiter=',')
        outfile_reader.writerow(['datapointID', 'overall'])
        
        for row in test_reader:
            prediction = model.predict(row[1], row[2])
            outfile_reader.writerow([row[0], prediction[3]])

In [38]:
import random

In [45]:
for ele in list(range(0, 10)):
    rand = random.randrange(1,5,1)
    print(rand)

3
1
4
3
1
4
4
4
3
2
