In [None]:
# Package Imports
import pandas as pd
import numpy as np
import scipy as sp
from scipy.sparse import coo_matrix
import numpy_indexed as npi

In [None]:
# Read in book data
colnames = ['book_id','title','avg_rating','description']
books = pd.read_csv('books.csv', names=colnames)
books

In [None]:
# Read in review data
colnames = ['user_id','book_id','review_text','rating']
reviews = pd.read_csv('reviews.csv', names=colnames)
# Isolate reviews with non-zero rating (note if a rating was given, 0 was not an option)
reviews = reviews[reviews['rating']!=0]
reviews

In [None]:
# Map between user_ids and index
unique_users = np.array(reviews.user_id.unique())
user_index = np.array(range(len(unique_users)))
user_map = dict(zip(unique_users,user_index))

# Map between book_ids and index
book_ids = np.array(books.book_id)
book_index = np.array(range(len(book_ids)))
book_map = dict(zip(book_ids,book_index))

# Apply maps to review set
row = npi.remap(reviews.user_id, list(user_map.keys()), list(user_map.values()))
col = npi.remap(reviews.book_id, list(book_map.keys()), list(book_map.values()))
dat = np.array(reviews.rating)

In [None]:
# Create sparse matrix in coordinate format
users_size = unique_users.size
books_size = book_ids.size
users = coo_matrix((dat, (row,col)), shape=(users_size,books_size))

In [None]:
# Make dataframe of col, row, dat
users_sparse_df = pd.DataFrame(list(zip(row,col,dat)),
                              columns=['r_index','c_index','data'])

In [None]:
# Export to CSV
users_sparse_df.to_csv('users_sparse.csv',index=False)