In [1]:
import pandas as pd
import numpy as np

from surprise import Reader, Dataset, KNNBasic,model_selection

In [2]:
def load_train_test_data(file_name,test_fold,nfolds=5):
    """
    Loads the data from csv files to train-test dataframes as per the arguments.
    Arguments:
        file_name: file to load the data from. Should not contain the fold number or extension from original file.
        test_fold: an int. This specifies the file number that should be considered as test data.
        nfolds: an int specifying the number of folds of original file on disk.
    Returns:
        Two pandas dataframes.
        Train: Contains data from all fold files except test_fold.
        Test: Contains data only from specified test_fold file.
    
    """
    folds = list(range(1,nfolds+1))
    #test_fold = 1
    test = pd.DataFrame()
    train = pd.DataFrame()
    for fold in folds:

        if fold==test_fold:
            test = pd.read_csv('data//{}{}.csv'.format(file_name,fold),delimiter='|')
        else:
            d = pd.read_csv('data//{}{}.csv'.format(file_name,fold),delimiter='|',index_col=0)
            train = pd.concat([train,d],axis = 0)
    train.reset_index(inplace=True)
    return train,test

In [17]:
train,test = load_train_test_data('movies',None)

In [18]:
train.shape

(102152, 3)

In [15]:
train.shape

(191552, 3)

In [16]:
print(train.user_id.nunique())
print(train.item_id.nunique())

13368
5701


In [3]:
train,test = load_train_test_data('games',1,nfolds=5)

In [4]:
train.shape

(153241, 3)

In [5]:
test.shape

(38311, 3)

##### Load train, test data as folds

In [57]:
a = []
for item in testset:
    if item[0] =='doodlerman':
        a.append(item)
        print(item)
    

('doodlerman', 'playstation/tony-hawks-pro-skater-2', 7.3086841899193447)
('doodlerman', 'playstation-3/grand-theft-auto-iv', 7.3086841899193447)
('doodlerman', 'dreamcast/soulcalibur', 7.3086841899193447)
('doodlerman', 'xbox-360/grand-theft-auto-iv', 7.3086841899193447)
('doodlerman', 'wii/super-mario-galaxy', 7.3086841899193447)
('doodlerman', 'wii/super-mario-galaxy-2', 7.3086841899193447)
('doodlerman', 'xbox-one/grand-theft-auto-v', 7.3086841899193447)
('doodlerman', 'playstation-3/grand-theft-auto-v', 7.3086841899193447)
('doodlerman', 'xbox-360/grand-theft-auto-v', 7.3086841899193447)
('doodlerman', 'switch/the-legend-of-zelda-breath-of-the-wild', 7.3086841899193447)
('doodlerman', 'playstation-2/tony-hawks-pro-skater-3', 7.3086841899193447)
('doodlerman', 'nintendo-64/perfect-dark', 7.3086841899193447)
('doodlerman', 'playstation-4/grand-theft-auto-v', 7.3086841899193447)
('doodlerman', 'gamecube/metroid-prime', 7.3086841899193447)
('doodlerman', 'playstation-2/grand-theft-aut

('doodlerman', 'playstation-3/braid', 7.3086841899193447)
('doodlerman', 'playstation-3/mass-effect-3', 7.3086841899193447)
('doodlerman', 'nintendo-64/paper-mario', 7.3086841899193447)
('doodlerman', 'xbox/grand-theft-auto-san-andreas', 7.3086841899193447)
('doodlerman', 'xbox-one/inside', 7.3086841899193447)
('doodlerman', 'pc/grand-theft-auto-san-andreas', 7.3086841899193447)
('doodlerman', 'playstation-2/world-soccer-winning-eleven-7-international', 7.3086841899193447)
('doodlerman', 'playstation-2/soulcalibur-ii', 7.3086841899193447)
('doodlerman', 'playstation-3/the-elder-scrolls-v-skyrim', 7.3086841899193447)
('doodlerman', 'switch/celeste', 7.3086841899193447)
('doodlerman', 'xbox/prince-of-persia-the-sands-of-time', 7.3086841899193447)
('doodlerman', 'pc/call-of-duty-4-modern-warfare', 7.3086841899193447)
('doodlerman', 'playstation-3/god-of-war-iii', 7.3086841899193447)
('doodlerman', 'playstation-2/ssx-tricky', 7.3086841899193447)
('doodlerman', 'playstation-2/nhl-2002', 7.3

('doodlerman', 'pc/enter-the-gungeon', 7.3086841899193447)
('doodlerman', 'xbox-360/worms-2-armageddon', 7.3086841899193447)
('doodlerman', 'playstation-3/skate-2', 7.3086841899193447)
('doodlerman', 'pc/hitman---episode-2-sapienza', 7.3086841899193447)
('doodlerman', 'wii-u/child-of-light', 7.3086841899193447)
('doodlerman', 'game-boy-advance/classic-nes-series-super-mario-bros', 7.3086841899193447)
('doodlerman', 'pc/age-of-mythology-the-titans', 7.3086841899193447)
('doodlerman', 'pc/simcity-4', 7.3086841899193447)
('doodlerman', 'dreamcast/san-francisco-rush-2049', 7.3086841899193447)
('doodlerman', 'playstation-3/splitsecond', 7.3086841899193447)
('doodlerman', 'playstation-3/the-walking-dead-episode-1---a-new-day', 7.3086841899193447)
('doodlerman', '3ds/etrian-odyssey-iv-legends-of-the-titan', 7.3086841899193447)
('doodlerman', 'pc/final-fantasy-ix', 7.3086841899193447)
('doodlerman', 'pc/rise-of-nations-rise-of-legends', 7.3086841899193447)
('doodlerman', 'xbox-360/bulletstorm'

('doodlerman', 'playstation-4/nhl-16', 7.3086841899193447)
('doodlerman', 'playstation-4/rock-band-4', 7.3086841899193447)
('doodlerman', 'playstation-4/dragons-dogma-dark-arisen', 7.3086841899193447)
('doodlerman', 'playstation-vita/killzone-mercenary', 7.3086841899193447)
('doodlerman', 'playstation-3/dragons-dogma', 7.3086841899193447)
('doodlerman', 'pc/tropico-4-modern-times', 7.3086841899193447)
('doodlerman', 'playstation-4/dont-starve-console-edition', 7.3086841899193447)
('doodlerman', 'ds/mega-man-zx-advent', 7.3086841899193447)
('doodlerman', 'gamecube/burnout', 7.3086841899193447)
('doodlerman', 'playstation-2/drakan-the-ancients-gates', 7.3086841899193447)
('doodlerman', 'dreamcast/ncaa-college-football-2k2', 7.3086841899193447)
('doodlerman', 'pc/hand-of-fate', 7.3086841899193447)
('doodlerman', 'playstation-4/heavy-rain-beyond-two-souls-collection', 7.3086841899193447)
('doodlerman', 'pc/call-of-duty-modern-warfare-3', 7.3086841899193447)
('doodlerman', 'playstation-4/ri

('doodlerman', 'pc/blocks-that-matter', 7.3086841899193447)
('doodlerman', 'pc/dc-universe-online', 7.3086841899193447)
('doodlerman', 'playstation-2/the-getaway', 7.3086841899193447)
('doodlerman', 'xbox-360/omega-five', 7.3086841899193447)
('doodlerman', 'pc/anno-2205', 7.3086841899193447)
('doodlerman', 'pc/loadout', 7.3086841899193447)
('doodlerman', 'pc/ridge-racer-unbounded', 7.3086841899193447)
('doodlerman', 'pc/stubbs-the-zombie-in-rebel-without-a-pulse', 7.3086841899193447)
('doodlerman', 'xbox-360/tony-hawks-proving-ground', 7.3086841899193447)
('doodlerman', 'playstation-4/tales-of-zestiria', 7.3086841899193447)
('doodlerman', 'pc/shattered-horizon', 7.3086841899193447)
('doodlerman', 'pc/aarklash-legacy', 7.3086841899193447)
('doodlerman', 'wii/dragon-ball-z-budokai-tenkaichi-2', 7.3086841899193447)
('doodlerman', 'pc/mount-blade', 7.3086841899193447)
('doodlerman', 'pc/mass-effect-andromeda', 7.3086841899193447)
('doodlerman', 'pc/steep', 7.3086841899193447)
('doodlerman'

('doodlerman', 'pc/octodad-dadliest-catch', 7.3086841899193447)
('doodlerman', '3ds/heroes-of-ruin', 7.3086841899193447)
('doodlerman', 'playstation-2/crash-nitro-kart', 7.3086841899193447)
('doodlerman', 'playstation-2/clock-tower-3', 7.3086841899193447)
('doodlerman', 'pc/state-of-decay-2', 7.3086841899193447)
('doodlerman', 'xbox-360/silent-hill-hd-collection', 7.3086841899193447)
('doodlerman', 'playstation-4/resident-evil-5', 7.3086841899193447)
('doodlerman', 'pc/the-matrix-online', 7.3086841899193447)
('doodlerman', 'playstation-4/assassins-creed-chronicles-china', 7.3086841899193447)
('doodlerman', 'psp/medal-of-honor-heroes-2', 7.3086841899193447)
('doodlerman', 'playstation-4/game-of-thrones-a-telltale-games-series', 7.3086841899193447)
('doodlerman', 'pc/lichdom-battlemage', 7.3086841899193447)
('doodlerman', 'xbox-one/the-lego-movie-videogame', 7.3086841899193447)
('doodlerman', 'playstation-2/from-russia-with-love', 7.3086841899193447)
('doodlerman', 'playstation-3/conan',

('doodlerman', 'playstation-4/the-technomancer', 7.3086841899193447)
('doodlerman', 'ds/pokemon-mystery-dungeon-explorers-of-time', 7.3086841899193447)
('doodlerman', 'playstation-4/deadpool', 7.3086841899193447)
('doodlerman', 'playstation-vita/resistance-burning-skies', 7.3086841899193447)
('doodlerman', 'gamecube/gauntlet-dark-legacy', 7.3086841899193447)
('doodlerman', 'pc/carrier-command-gaea-mission', 7.3086841899193447)
('doodlerman', 'playstation-2/spyro-a-heros-tail', 7.3086841899193447)
('doodlerman', 'playstation-3/time-crisis-4', 7.3086841899193447)
('doodlerman', 'pc/zombie-driver', 7.3086841899193447)
('doodlerman', 'playstation-3/clive-barkers-jericho', 7.3086841899193447)
('doodlerman', 'playstation-3/dragon-ball-raging-blast-2', 7.3086841899193447)
('doodlerman', 'playstation-3/spider-man-3', 7.3086841899193447)
('doodlerman', 'game-boy-advance/yu-gi-oh!-the-sacred-cards', 7.3086841899193447)
('doodlerman', 'playstation-3/need-for-speed-undercover', 7.3086841899193447)

In [62]:
b = []
for item in testset:
    if item[0]== 'ChrnoTodd':
        b.append(item)
        print(item)
    

('ChrnoTodd', 'playstation/tony-hawks-pro-skater-2', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/grand-theft-auto-iv', 7.3086841899193447)
('ChrnoTodd', 'dreamcast/soulcalibur', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/grand-theft-auto-iv', 7.3086841899193447)
('ChrnoTodd', 'wii/super-mario-galaxy', 7.3086841899193447)
('ChrnoTodd', 'wii/super-mario-galaxy-2', 7.3086841899193447)
('ChrnoTodd', 'xbox-one/grand-theft-auto-v', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/grand-theft-auto-v', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/grand-theft-auto-v', 7.3086841899193447)
('ChrnoTodd', 'switch/the-legend-of-zelda-breath-of-the-wild', 7.3086841899193447)
('ChrnoTodd', 'playstation-2/tony-hawks-pro-skater-3', 7.3086841899193447)
('ChrnoTodd', 'nintendo-64/perfect-dark', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/grand-theft-auto-v', 7.3086841899193447)
('ChrnoTodd', 'gamecube/metroid-prime', 7.3086841899193447)
('ChrnoTodd', 'playstation-2/grand-theft-auto-iii', 7.30868

('ChrnoTodd', 'playstation-3/wipeout-hd', 7.3086841899193447)
('ChrnoTodd', 'pc/divinity-original-sin', 7.3086841899193447)
('ChrnoTodd', 'pc/company-of-heroes-opposing-fronts', 7.3086841899193447)
('ChrnoTodd', 'xbox-one/destiny-2', 7.3086841899193447)
('ChrnoTodd', 'switch/owlboy', 7.3086841899193447)
('ChrnoTodd', 'pc/psychonauts', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/dirt-2', 7.3086841899193447)
('ChrnoTodd', 'pc/gears-of-war', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/rayman-origins', 7.3086841899193447)
('ChrnoTodd', 'wii/sin-punishment-star-successor', 7.3086841899193447)
('ChrnoTodd', 'gamecube/super-monkey-ball', 7.3086841899193447)
('ChrnoTodd', 'pc/hollow-knight', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/spelunky', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/dark-souls-ii-scholar-of-the-first-sin', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/resistance-2', 7.3086841899193447)
('ChrnoTodd', 'pc/inside', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/fl

('ChrnoTodd', 'playstation-4/the-banner-saga', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/trials-fusion', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/prototype-2', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/final-fantasy-xiii-2', 7.3086841899193447)
('ChrnoTodd', 'pc/fallout-4-far-harbor', 7.3086841899193447)
('ChrnoTodd', 'pc/dying-light-the-following', 7.3086841899193447)
('ChrnoTodd', 'xbox/gladius', 7.3086841899193447)
('ChrnoTodd', 'pc/day-of-defeat', 7.3086841899193447)
('ChrnoTodd', 'playstation-2/tekken-4', 7.3086841899193447)
('ChrnoTodd', 'pc/space-empires-iv', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/mirrors-edge', 7.3086841899193447)
('ChrnoTodd', 'pc/tom-clancys-the-division', 7.3086841899193447)
('ChrnoTodd', 'pc/resident-evil-4-ultimate-hd-edition', 7.3086841899193447)
('ChrnoTodd', 'ds/mario-vs-donkey-kong-mini-land-mayhem', 7.3086841899193447)
('ChrnoTodd', 'ds/metroid-prime-pinball', 7.3086841899193447)
('ChrnoTodd', 'game-boy-advance/mega-man-bass', 7.

('ChrnoTodd', 'xbox/from-russia-with-love', 7.3086841899193447)
('ChrnoTodd', 'pc/transport-fever', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/gotham-city-impostors', 7.3086841899193447)
('ChrnoTodd', 'pc/minecraft-story-mode---episode-1-the-order-of-the-stone', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/the-simpsons-game', 7.3086841899193447)
('ChrnoTodd', '3ds/pilotwings-resort', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/flow', 7.3086841899193447)
('ChrnoTodd', 'playstation-2/harry-potter-and-the-chamber-of-secrets', 7.3086841899193447)
('ChrnoTodd', 'xbox/blinx-the-time-sweeper', 7.3086841899193447)
('ChrnoTodd', 'psp/the-3rd-birthday', 7.3086841899193447)
('ChrnoTodd', 'ds/resident-evil-deadly-silence', 7.3086841899193447)
('ChrnoTodd', 'pc/men-of-valor', 7.3086841899193447)
('ChrnoTodd', 'pc/sniper-elite-iii', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/chromehounds', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/gauntlet-slayer-edition', 7.3086841899193447)
('ChrnoTod

('ChrnoTodd', 'gamecube/tom-clancys-ghost-recon', 7.3086841899193447)
('ChrnoTodd', 'pc/007-nightfire', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/contrast', 7.3086841899193447)
('ChrnoTodd', 'pc/mars-war-logs', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/the-inpatient', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/loadout', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/dust-514', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/nba-live-15', 7.3086841899193447)
('ChrnoTodd', 'playstation-4/nba-live-16', 7.3086841899193447)
('ChrnoTodd', 'pc/postal-2-share-the-pain', 7.3086841899193447)
('ChrnoTodd', 'xbox-360/green-lantern-rise-of-the-manhunters', 7.3086841899193447)
('ChrnoTodd', 'psp/coded-arms', 7.3086841899193447)
('ChrnoTodd', 'xbox-one/nhl-15', 7.3086841899193447)
('ChrnoTodd', 'pc/lifeless-planet', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/defiance', 7.3086841899193447)
('ChrnoTodd', 'playstation-3/dark-void', 7.3086841899193447)
('ChrnoTodd', 'xbox-360

In [63]:
len(b)

5700

In [68]:
ts.ur

defaultdict(list,
            {0: [(0, 10.0),
              (16, 10.0),
              (150, 5.0),
              (383, 9.0),
              (2164, 9.0),
              (3294, 8.0),
              (4102, 7.0),
              (4167, 8.0),
              (4181, 6.0),
              (4459, 6.0)],
             1: [(0, 10.0)],
             2: [(0, 10.0), (674, 8.0), (2044, 10.0)],
             3: [(0, 10.0)],
             4: [(0, 10.0), (94, 1.0), (1270, 1.0), (1367, 4.0)],
             5: [(0, 10.0), (764, 9.0), (2373, 9.0), (5189, 9.0)],
             6: [(0, 10.0),
              (60, 10.0),
              (265, 4.0),
              (469, 0.0),
              (687, 10.0),
              (875, 10.0),
              (1060, 10.0),
              (2442, 9.0),
              (2464, 4.0),
              (2780, 4.0),
              (2912, 8.0),
              (3199, 8.0),
              (3559, 10.0),
              (4452, 6.0),
              (5489, 3.0)],
             7: [(0, 10.0)],
             8: [(0, 10.0),
    

In [66]:
print(ts.all_users())
print(ts.all_items())

range(0, 11574)
range(0, 5701)


In [34]:
#games test set with all non-existing u-i ratings
print(len(tset))
print(len(testset))

38311
65945235


In [28]:
print(len(mtestset))
print(len(mtset))

20431
10157918


In [None]:
predictions = algo.test(testset)


### Load saved predictions

In [42]:
from surprise import dump

In [6]:
p,_ = dump.load('data/dumps/ubcf_tv_fold1_knn2')

In [8]:
pg,_ = dump.load('data/dumps/test_games_pkl2')

In [22]:
for item in pg:
    print(item)
    print(type(item))
    break
    #for i in item:
    #    print(i)
    #    print(type(i))
        #break

user: 676        item: 2          r_ui = 7.31   est = 4.50   {'actual_k': 2, 'was_impossible': False}
<class 'surprise.prediction_algorithms.predictions.Prediction'>


In [28]:
print(pg)

[Prediction(uid=676, iid=2, r_ui=7.308684189919345, est=4.5, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=3, r_ui=7.308684189919345, est=5.0, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=4, r_ui=7.308684189919345, est=10, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=5, r_ui=7.308684189919345, est=7.0, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=6, r_ui=7.308684189919345, est=10, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=7, r_ui=7.308684189919345, est=10, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=8, r_ui=7.308684189919345, est=5.0, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=9, r_ui=7.308684189919345, est=5.5, details={'actual_k': 2, 'was_impossible': False}), Prediction(uid=676, iid=10, r_ui=7.308684189919345, est=10, details={'actual_k': 2, 'was_impossible': False}), Pre

In [43]:
import _pickle as pickle

In [35]:

with open('test_pkl','wb') as f:
    for i in range(0,10,3):
        data = pg[i:i+3]
        pickle.dump(data,f)
        #pg[i:i+3]
    

In [36]:
data = pickle.load(open('test_pkl','rb'))

In [37]:
data

[Prediction(uid=676, iid=2, r_ui=7.308684189919345, est=4.5, details={'actual_k': 2, 'was_impossible': False}),
 Prediction(uid=676, iid=3, r_ui=7.308684189919345, est=5.0, details={'actual_k': 2, 'was_impossible': False}),
 Prediction(uid=676, iid=4, r_ui=7.308684189919345, est=10, details={'actual_k': 2, 'was_impossible': False})]

### Only solution is Hashing the Games Data and try to reduce the file size as much as I can.
   This will not change the number of recommendations that we are creating, but it will just lower the memory usage and make the process of predicting recommendations faster.

In [4]:
train,test = load_train_test_data('games',1,5)

In [28]:
unique_user = train.user_id.unique()
user_to_uid = {}
uid_to_user = {}

In [29]:
#assign ids to users and items
for ID,user in enumerate(unique_user):
    user_to_uid[user] = ID+1
    uid_to_user[ID+1] = user

In [37]:
unique_item = train.item_id.unique()
item_to_iid = {}
iid_to_item = {}

In [40]:
#assign ids to items
for ID,item in enumerate(unique_item):
    item_to_iid[item] = ID+1
    iid_to_item[ID+1] = item

In [25]:
len(user_to_uid)

13368

In [45]:
len(item_to_iid)

5701

### Save user-item mapping dictionaries with pickle

In [49]:
import pickle

In [93]:
def save_user_item_mappings(user_to_uid,uid_to_user,item_to_iid,iid_to_item):
    pickle.dump(user_to_uid,open('data//user_to_uid.pkl','wb'))
    pickle.dump(uid_to_user,open('data//uid_to_user.pkl','wb'))
    pickle.dump(item_to_iid,open('data//item_to_iid.pkl','wb'))
    pickle.dump(iid_to_item,open('data//iid_to_item.pkl','wb'))


In [50]:
def load_user_item_mappings():
    user_to_uid = pickle.load(open('data//user_to_uid.pkl','rb'))
    uid_to_user = pickle.load(open('data//uid_to_user.pkl','rb'))
    item_to_iid = pickle.load(open('data//item_to_iid.pkl','rb'))
    iid_to_item = pickle.load(open('data//iid_to_item.pkl','rb'))
    return user_to_uid,uid_to_user,item_to_iid,iid_to_item

In [95]:
save_user_item_mappings(user_to_uid,uid_to_user,item_to_iid,iid_to_item)

In [51]:
user_to_uid,uid_to_user,item_to_iid,iid_to_item = load_user_item_mappings()

### Convert the test set values using hash table dictionaries

In [44]:
def transform_ids(df):
    df['user_id'] = [user_to_uid[user] for user in df.user_id]
    df['item_id'] = [item_to_iid[item] for item in df.item_id]
    return df

In [65]:
test['user_id'] = [user_to_uid[user] for user in test.user_id]
test['item_id'] = [item_to_iid[item] for item in test.item_id]

### Transform Games data files

In [57]:
for i in range(1,6):
    dfname = 'games{}.csv'.format(i)
    loc = 'data/'
    #load dataset
    df = pd.read_csv(loc+dfname,delimiter='|')
    df.head(1)
    #transform user-item data
    df_transformed = transform_ids(df)
    df_transformed.head(1)
    #save data on disk
    df_transformed.to_csv(loc+dfname,sep='|',index=False)
    

In [45]:
games1 = pd.read_csv('data/games1.csv',delimiter='|')

In [47]:
print(games1.shape)
games1.head()

(38311, 3)


Unnamed: 0,user_id,item_id,u_rating
0,doodlerman,nintendo-64/the-legend-of-zelda-ocarina-of-time,10.0
1,ChrnoTodd,nintendo-64/the-legend-of-zelda-ocarina-of-time,10.0
2,Wpnfire,nintendo-64/the-legend-of-zelda-ocarina-of-time,10.0
3,takepantsoff,nintendo-64/the-legend-of-zelda-ocarina-of-time,10.0
4,CarlosS.,nintendo-64/the-legend-of-zelda-ocarina-of-time,10.0


In [52]:
games1_transformed = transform_ids(games1)

In [53]:
print(games1_transformed.shape)
games1_transformed.head()

(38311, 3)


Unnamed: 0,user_id,item_id,u_rating
0,676,1,10.0
1,1828,1,10.0
2,3205,1,10.0
3,1732,1,10.0
4,792,1,10.0


In [56]:
games1_transformed.to_csv('abc',sep = '|',index = False)

##### Test the robustness

In [71]:
test.dtypes

user_id       int64
item_id       int64
u_rating    float64
dtype: object

In [75]:
test[test.user_id == 2]

Unnamed: 0,user_id,item_id,u_rating
2303,2,54,10.0
7319,2,304,10.0
7878,2,348,10.0


In [76]:
uid_to_user[2]

'ja52ng74'

In [81]:
iid_to_item[348]

'pc/portal'

In [69]:
test.head()

Unnamed: 0,user_id,item_id,u_rating
0,676,1,10.0
1,1828,1,10.0
2,3205,1,10.0
3,1732,1,10.0
4,792,1,10.0


In [78]:
t2 = pd.read_csv('data//games1.csv',delimiter = '|')

In [79]:
t2[t2.user_id=='ja52ng74']

Unnamed: 0,user_id,item_id,u_rating
2303,ja52ng74,pc/portal-2,10.0
7319,ja52ng74,3ds/the-legend-of-zelda-a-link-between-worlds,10.0
7878,ja52ng74,pc/portal,10.0


### Transform train data

In [85]:
train2 = transform_ids(train)

In [86]:
train2.head()

Unnamed: 0,user_id,item_id,u_rating
0,1,1,10.0
1,2,1,10.0
2,3,1,10.0
3,4,1,10.0
4,5,1,10.0


In [87]:
#save transformed data
train.to_csv('data//games_train_transformed.csv',index=False,sep='|')
test.to_csv('data//games_test_transformed.csv',index=False,sep='|')

In [89]:
train2,test2 = load_train_test_data('games',1,5)

In [90]:
#save untransformed data
train2.to_csv('data//games_train.csv',index=False,sep='|')
test2.to_csv('data//games_test.csv',index=False,sep='|')