In [None]:
# Package Imports and set platform
from io import StringIO
import numpy as np
import numpy_indexed as npi
import pandas as pd
import platform
import requests
import scipy as sp
from scipy.sparse import coo_matrix

curr_comp = platform.node()
curr_comp

In [None]:
# Read in book data
colnames = ['book_id','title','avg_rating','description']

if curr_comp == 'DESKTOP-ARTEMI5' or curr_comp == 'MANDEY-COMP-NAME':
    orig_url='https://drive.google.com/file/d/15DvRQIdkXVg3qXVkyDm2GsgXVcTmgzYj/view?usp=sharing'
    file_id = orig_url.split('/')[-2]
    dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
    url = requests.get(dwn_url).text
    books_raw = StringIO(url)
    books = pd.read_csv(books_raw,names=colnames)
elif curr_comp == 'sfort-laptop' or curr_comp == 'sfort-desktop':
    books = pd.read_csv('toobig/books.csv',names=colnames)
    
books

In [None]:
# Read in interactions data
colnames = ['user_id','book_id','is_read','rating']

if curr_comp == 'DESKTOP-ARTEMI5':
    interactions = pd.read_csv('PATH-TO-INTERACTIONS',names=colnames)
elif curr_comp == 'MANDEY-COMP-NAME':
    interactions = pd.read_csv('PATH-TO-INTERACTIONS',names=colnames)
elif curr_comp == 'sfort-laptop' or curr_comp == 'sfort-desktop':
    interactions = pd.read_csv('toobig/interactions.csv',names=colnames)

# Isolate interactions with non-zero rating and is_read status
interactions = interactions[interactions['is_read']==True]
interactions = interactions[interactions['rating']!=0]
interactions

In [None]:
# Map between user_ids and index
unique_users = np.array(interactions.user_id.unique())
user_index = np.array(range(len(unique_users)))
user_map = dict(zip(unique_users,user_index))

# Map between book_ids and index
book_ids = np.array(books.book_id)
book_index = np.array(range(len(book_ids)))
book_map = dict(zip(book_ids,book_index))

# Apply maps to interactions set
row = npi.remap(interactions.user_id, list(user_map.keys()), list(user_map.values()))
col = npi.remap(interactions.book_id, list(book_map.keys()), list(book_map.values()))
dat = np.array(interactions.rating)

In [None]:
# Create sparse matrix in coordinate format
users_size = unique_users.size
books_size = book_ids.size
users = coo_matrix((dat, (row,col)), shape=(users_size,books_size))

In [None]:
# Make dataframe of col, row, dat
users_sparse_df = pd.DataFrame(list(zip(row,col,dat)),
                              columns=['r_index','c_index','data'])

In [None]:
# Export to CSV
users_sparse_df.to_csv('users_sparse.csv',index=False)