# Recommender System with Scikit Surprise SVD Model (Option 1)

In [2]:
import pandas as pd
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
import csv
import gzip

In [None]:
# Unzip json data.
def unzip_json(filename):
    
    print('Unzipping json file...')
    
    unzipped_data = pd.read_json(gzip.open(filename), lines=True)
    
    return unzipped_data

In [None]:
unzip_json('reviews.training.json.gz')

In [None]:
# Output json training data as a Pandas dataframe.
def json_to_df(file_name):

    print('Converting json file to dataframe...')

    try:
        training_data = pd.read_json(file_name, lines=True)
        return training_data
    except:
        print('Please try another file name.')

        return None

In [None]:
training_df = json_to_df('reviews.training.json')

In [None]:
def convert_to_csv(dataframe, desired_filename):

    print('Converting dataframe to csv: ' + desired_filename + '...')

    try:
        return dataframe.to_csv(desired_filename, index=False)
    except:
        print('Please try another dataframe or file name.')

    return None

In [None]:
# Training file to CSV
convert_to_csv(training_df[['reviewerID', 'asin', 'overall']], 'reviews.training.shortened.csv')

In [3]:
product_info_df = pd.read_csv('reviews.training.csv')

In [4]:
product_info_df.head()

Unnamed: 0,reviewerID,asin,overall
0,AMFIPCYDYWGVT,B0090SI56Y,4
1,A3G602Z4DWDZKS,B00005JL99,5
2,A33BOYMVG3U58Y,B00109KN0M,5
3,ANEDXRFDZDL18,B00005JMPT,5
4,A1VN7IS16PY024,B00005AAA9,4


In [5]:
# Surprise Reader object.
reader = Reader(rating_scale=(1,5))

In [6]:
# Load data from dataframe.
data = Dataset.load_from_df(product_info_df, reader)

In [7]:
# Split the training data into a training and test set.
trainset, testset = train_test_split(data, test_size=.25)

In [8]:
# Build an SVD model.
model = SVD(n_factors=150)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x110449e48>

In [None]:
# Get right term
model.qi.shape

In [None]:
# Create movie title to vector index.
item_to_row_idx: dict() = model.trainset._raw2inner_id_items

In [None]:
# display(item_to_row_idx)

In [None]:
# Example product
B00FZM8Z7I_row_idx: int = item_to_row_idx['B00FZM8Z7I']

In [None]:
# Retrieve for example product.
model.qi[B00FZM8Z7I_row_idx]

In [None]:
# Example user
a_user = 'AMFIPCYDYWGVT'

In [None]:
# Example product
a_product = 'B0090SI56Y'

In [None]:
# Test prediction
prediction = model.predict(a_user, a_product)
prediction[3]

In [None]:
with open('reviews.test.unlabeled.csv', 'r') as test_file:
    test_reader = csv.reader(test_file, delimiter=',')
    next(test_reader, None)
    with open('reviews.test.labeled.csv', 'w') as outfile:
        outfile_reader = csv.writer(outfile, delimiter=',')
        outfile_reader.writerow(['datapointID', 'overall'])
        
        for row in test_reader:
            prediction = model.predict(row[1], row[2])
            outfile_reader.writerow([row[0], prediction[3]])

In [None]:
import random

In [None]:
for ele in list(range(0, 10)):
    rand = random.randrange(1,5,1)
    print(rand)