作业一:练习UserCF和ItemCF代码

In [1]:
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
      'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
      'The Night Listener': 3.0},
     'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
      'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
      'You, Me and Dupree': 3.5},
     'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
      'Superman Returns': 3.5, 'The Night Listener': 4.0},
     'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
      'The Night Listener': 4.5, 'Superman Returns': 4.0,
      'You, Me and Dupree': 2.5},
     'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
      'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
      'You, Me and Dupree': 2.0},
     'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
      'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
     'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}
In [2]:
critics['Lisa Rose']['Lady in the Water']
Out[2]:
2.5
In [3]:
critics['Toby']['Snakes on a Plane']
Out[3]:
4.5
In [4]:
critics['Toby']
Out[4]:
{'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}
In [ ]:
#找到相似的users
In [5]:
# 欧几里得距离
import numpy as np
np.sqrt(np.power(5-4, 2) + np.power(4-1, 2))
Out[5]:
3.1622776601683795
In [6]:
1.0 /(1 + np.sqrt(np.power(5-4, 2) + np.power(4-1, 2)) )
Out[6]:
0.2402530733520421
In [8]:
def sim_distance(prefs,person1,person2):
    # Get the list of shared_items
    si={}
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item]=1
    # if they have no ratings in common, return 0
    if len(si)==0: return 0
    # Add up the squares of all the differences
    sum_of_squares=np.sum([np.power(prefs[person1][item]-prefs[person2][item],2)
                           for item in prefs[person1] if item in prefs[person2]])
                           #for item in si.keys()])# 
    return 1/(1+np.sqrt(sum_of_squares) )
In [9]:
sim_distance(critics, 'Lisa Rose','Toby')
Out[9]:
0.3483314773547883
In [ ]:
#Pearson相关系数
In [10]:
def sim_pearson(prefs,p1,p2):
    # Get the list of mutually rated items
    si={}
    for item in prefs[p1]:
        if item in prefs[p2]: si[item]=1
    # Find the number of elements
    n=len(si)
    # if they are no ratings in common, return 0
    if n==0: return 0
    # Add up all the preferences
    sum1=np.sum([prefs[p1][it] for it in si])
    sum2=np.sum([prefs[p2][it] for it in si])
    # Sum up the squares
    sum1Sq=np.sum([np.power(prefs[p1][it],2) for it in si])
    sum2Sq=np.sum([np.power(prefs[p2][it],2) for it in si])
    # Sum up the products
    pSum=np.sum([prefs[p1][it]*prefs[p2][it] for it in si])
    # Calculate Pearson score
    num=pSum-(sum1*sum2/n)
    den=np.sqrt((sum1Sq-np.power(sum1,2)/n)*(sum2Sq-np.power(sum2,2)/n))
    if den==0: return 0
    return num/den
In [11]:
sim_pearson(critics, 'Lisa Rose','Toby')
Out[11]:
0.9912407071619299
In [12]:
def topMatches(prefs,person,n=5,similarity=sim_pearson):
    scores=[(similarity(prefs,person,other),other)
        for other in prefs if other!=person]
    # Sort the list so the highest scores appear at the top 
    scores.sort( )
    scores.reverse( )
    return scores[0:n]
In [13]:
topMatches(critics,'Toby',n=3) # topN
Out[13]:
[(0.9912407071619299, 'Lisa Rose'),
 (0.9244734516419049, 'Mick LaSalle'),
 (0.8934051474415647, 'Claudia Puig')]
In [ ]:
#推荐项目
In [19]:
def getRecommendations(prefs,person,similarity=sim_pearson):
    totals={}
    simSums={}
    for other in prefs:
        if other==person: continue
        sim=similarity(prefs,person,other)
        if sim<=0: continue
        for item in prefs[other]:        
            if item not in prefs[person]:
                totals.setdefault(item,0)
                totals[item]+=prefs[other][item]*sim
                simSums.setdefault(item,0)
                simSums[item]+=sim
    rankings=[(total/simSums[item],item) for item,total in totals.items()]
    rankings.sort()
    rankings.reverse()
    return rankings
In [20]:
getRecommendations(critics,'Toby')
Out[20]:
[(3.3477895267131017, 'The Night Listener'),
 (2.8325499182641614, 'Lady in the Water'),
 (2.530980703765565, 'Just My Luck')]
In [21]:
getRecommendations(critics,'Toby',similarity=sim_distance)
Out[21]:
[(3.457128694491423, 'The Night Listener'),
 (2.778584003814924, 'Lady in the Water'),
 (2.422482042361917, 'Just My Luck')]
In [ ]:
#将item-user字典的键值翻转¶
In [24]:
def transformPrefs(prefs):
    result={}
    for person in prefs:
        for item in prefs[person]:
            result.setdefault(item,{})
            result[item][person]=prefs[person][item]
    return result

movies = transformPrefs(critics)
In [ ]:
#计算item的相似性¶
In [23]:
topMatches(movies,'Superman Returns')
Out[23]:
[(0.6579516949597695, 'You, Me and Dupree'),
 (0.4879500364742689, 'Lady in the Water'),
 (0.11180339887498941, 'Snakes on a Plane'),
 (-0.1798471947990544, 'The Night Listener'),
 (-0.42289003161103106, 'Just My Luck')]
In [ ]:
#给item推荐user
In [25]:
def calculateSimilarItems(prefs,n=10):
    # Create a dictionary of items showing which other items they
    # are most similar to.
    result={}
    # Invert the preference matrix to be item-centric
    itemPrefs=transformPrefs(prefs)
    c=0
    for item in itemPrefs:
        # Status updates for large datasets
        c+=1
        if c%100==0: 
            print("%d / %d" % (c,len(itemPrefs)))
        # Find the most similar items to this one
        scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
        result[item]=scores
    return result

itemsim=calculateSimilarItems(critics) 
itemsim['Superman Returns']
Out[25]:
[(0.3090169943749474, 'Snakes on a Plane'),
 (0.252650308587072, 'The Night Listener'),
 (0.2402530733520421, 'Lady in the Water'),
 (0.20799159651347807, 'Just My Luck'),
 (0.1918253663634734, 'You, Me and Dupree')]
In [26]:
def getRecommendedItems(prefs,itemMatch,user):
    userRatings=prefs[user]
    scores={}
    totalSim={}
    # Loop over items rated by this user
    for (item,rating) in userRatings.items( ):
        # Loop over items similar to this one
        for (similarity,item2) in itemMatch[item]:
            # Ignore if this user has already rated this item
            if item2 in userRatings: continue
            # Weighted sum of rating times similarity
            scores.setdefault(item2,0)
            scores[item2]+=similarity*rating
            # Sum of all the similarities
            totalSim.setdefault(item2,0)
            totalSim[item2]+=similarity
    # Divide each total score by total weighting to get an average
    rankings=[(score/totalSim[item],item) for item,score in scores.items( )]
    # Return the rankings from highest to lowest
    rankings.sort( )
    rankings.reverse( )
    return rankings

getRecommendedItems(critics,itemsim,'Toby')
Out[26]:
[(3.1667425234070894, 'The Night Listener'),
 (2.9366294028444346, 'Just My Luck'),
 (2.868767392626467, 'Lady in the Water')]
In [27]:
getRecommendations(movies,'Just My Luck')
Out[27]:
[(4.0, 'Michael Phillips'), (3.0, 'Jack Matthews')]
In [28]:
getRecommendations(movies, 'You, Me and Dupree')
Out[28]:
[(3.1637361366111816, 'Michael Phillips')]

作业二:使用GraphLab对于音乐数据进行电影推荐

In [ ]:
import turicreate as tc
In [32]:
train_file = '/Users/apple/Desktop/10000.txt'
sf = tc.SFrame.read_csv(train_file, header=False, delimiter='\t', verbose=False)
sf=sf.rename({'X1':'user_id', 'X2':'music_id', 'X3':'rating'})
sf
Out[32]:
user_id music_id rating
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOAKIMP12A8C130995 1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBBMDR12A8C13253B 2
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBXHDL12A81C204C0 1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBYHAJ12A6701BF1D 1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODACBL12A8C13C273 1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODDNQT12A6D4F5F7E 5
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODXRTY12AB0180F3B 1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOFGUAY12AB017B0A8 1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOFRQTD12A81C233C0 1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOHQWYZ12A6D4FA701 1
[2000000 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
In [33]:
train_set, test_set = sf.random_split(0.8, seed=1)
In [35]:
popularity_model = tc.popularity_recommender.create(train_set, 
                                                    'user_id', 'music_id', 
                                                    target = 'rating')
Preparing data set.
    Data has 1599753 observations with 76085 users and 10000 items.
    Data prepared in: 1.13403s
1599753 observations to process; with 10000 unique items.
In [37]:
item_sim_model = tc.item_similarity_recommender.create(train_set, 
                                                       'user_id', 'music_id', 
                                                       target = 'rating', 
                                                       similarity_type='cosine')
Preparing data set.
    Data has 1599753 observations with 76085 users and 10000 items.
    Data prepared in: 1.08712s
Training model from provided data.
Gathering per-item and per-user statistics.
+--------------------------------+------------+
| Elapsed Time (Item Statistics) | % Complete |
+--------------------------------+------------+
| 5.665ms                        | 1.25       |
| 36.965ms                       | 100        |
+--------------------------------+------------+
Setting up lookup tables.
Processing data in one pass using dense lookup tables.
+-------------------------------------+------------------+-----------------+
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
+-------------------------------------+------------------+-----------------+
| 257.677ms                           | 0                | 0               |
| 1.17s                               | 100              | 10000           |
+-------------------------------------+------------------+-----------------+
Finalizing lookup tables.
Generating candidate set for working with new users.
Finished training in 2.239s
In [38]:
factorization_machine_model = tc.recommender.factorization_recommender.create(train_set, 
                                                                              'user_id', 'music_id',
                                                                              target='rating')
Preparing data set.
    Data has 1599753 observations with 76085 users and 10000 items.
    Data prepared in: 1.11337s
Training factorization_recommender for recommendations.
+--------------------------------+--------------------------------------------------+----------+
| Parameter                      | Description                                      | Value    |
+--------------------------------+--------------------------------------------------+----------+
| num_factors                    | Factor Dimension                                 | 8        |
| regularization                 | L2 Regularization on Factors                     | 1e-08    |
| solver                         | Solver used for training                         | sgd      |
| linear_regularization          | L2 Regularization on Linear Coefficients         | 1e-10    |
| max_iterations                 | Maximum Number of Iterations                     | 50       |
+--------------------------------+--------------------------------------------------+----------+
  Optimizing model using SGD; tuning step size.
  Using 199969 / 1599753 points for tuning the step size.
+---------+-------------------+------------------------------------------+
| Attempt | Initial Step Size | Estimated Objective Value                |
+---------+-------------------+------------------------------------------+
| 0       | 25                | No Decrease (229.1 >= 38.7339)           |
| 1       | 6.25              | No Decrease (217.519 >= 38.7339)         |
| 2       | 1.5625            | No Decrease (187.08 >= 38.7339)          |
| 3       | 0.390625          | No Decrease (86.1325 >= 38.7339)         |
| 4       | 0.0976562         | 13.457                                   |
| 5       | 0.0488281         | 9.51623                                  |
| 6       | 0.0244141         | 22.2178                                  |
+---------+-------------------+------------------------------------------+
| Final   | 0.0488281         | 9.51623                                  |
+---------+-------------------+------------------------------------------+
Starting Optimization.
+---------+--------------+-------------------+-----------------------+-------------+
| Iter.   | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size   |
+---------+--------------+-------------------+-----------------------+-------------+
| Initial | 136us        | 43.795            | 6.61778               |             |
+---------+--------------+-------------------+-----------------------+-------------+
| 1       | 127.336ms    | 43.5109           | 6.59588               | 0.0488281   |
| 2       | 231.79ms     | 40.9494           | 6.39881               | 0.0290334   |
| 3       | 335.701ms    | 37.9295           | 6.15833               | 0.0214205   |
| 4       | 435.863ms    | 35.3075           | 5.94162               | 0.0172633   |
| 5       | 534.614ms    | 32.7871           | 5.72556               | 0.014603    |
| 10      | 1.02s        | 24.8325           | 4.98258               | 0.008683    |
| 50      | 5.13s        | 11.2202           | 3.34825               | 0.00259682  |
+---------+--------------+-------------------+-----------------------+-------------+
Optimization Complete: Maximum number of passes through the data reached.
Computing final objective value and training RMSE.
       Final objective value: 10.2398
       Final training RMSE: 3.19849
In [39]:
len(train_set)
Out[39]:
1599753
In [40]:
result = tc.recommender.util.compare_models(test_set, 
                                            [popularity_model, item_sim_model, factorization_machine_model],
                                            user_sample=.5, skip_set=train_set)
compare_models: using 34354 users to estimate model performance
PROGRESS: Evaluate model M0
recommendations finished on 1000/34354 queries. users per second: 16456.3
recommendations finished on 2000/34354 queries. users per second: 16355.6
recommendations finished on 3000/34354 queries. users per second: 18092.8
recommendations finished on 4000/34354 queries. users per second: 18939.5
recommendations finished on 5000/34354 queries. users per second: 19829.4
recommendations finished on 6000/34354 queries. users per second: 20176.2
recommendations finished on 7000/34354 queries. users per second: 20522.5
recommendations finished on 8000/34354 queries. users per second: 21016.3
recommendations finished on 9000/34354 queries. users per second: 21626.8
recommendations finished on 10000/34354 queries. users per second: 21989
recommendations finished on 11000/34354 queries. users per second: 21986.6
recommendations finished on 12000/34354 queries. users per second: 21788.6
recommendations finished on 13000/34354 queries. users per second: 22128.4
recommendations finished on 14000/34354 queries. users per second: 22428
recommendations finished on 15000/34354 queries. users per second: 22549.8
recommendations finished on 16000/34354 queries. users per second: 22912.7
recommendations finished on 17000/34354 queries. users per second: 23212.2
recommendations finished on 18000/34354 queries. users per second: 23492.5
recommendations finished on 19000/34354 queries. users per second: 23769
recommendations finished on 20000/34354 queries. users per second: 24025.9
recommendations finished on 21000/34354 queries. users per second: 24257.1
recommendations finished on 22000/34354 queries. users per second: 24389.6
recommendations finished on 23000/34354 queries. users per second: 24392.2
recommendations finished on 24000/34354 queries. users per second: 24503.5
recommendations finished on 25000/34354 queries. users per second: 24446.2
recommendations finished on 26000/34354 queries. users per second: 24516.4
recommendations finished on 27000/34354 queries. users per second: 24605.9
recommendations finished on 28000/34354 queries. users per second: 24757.6
recommendations finished on 29000/34354 queries. users per second: 24912.8
recommendations finished on 30000/34354 queries. users per second: 25052.5
recommendations finished on 31000/34354 queries. users per second: 25192.1
recommendations finished on 32000/34354 queries. users per second: 24953.1
recommendations finished on 33000/34354 queries. users per second: 25076
recommendations finished on 34000/34354 queries. users per second: 25214.9
/Users/apple/anaconda3/lib/python3.7/site-packages/turicreate/aggregate.py:91: UserWarning: Passing parameter(s) to COUNT is deprecated. This functionality will be removed in the next major release. Any passed parameters are ignored.
  warnings.warn('Passing parameter(s) to COUNT is deprecated. This functionality will be removed in '
Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    | 0.00046573906968620777 | 6.073283914950574e-05  |
|   2    | 0.0004366303778308205  | 0.00013132948789798516 |
|   3    | 0.0003881158914051731  | 0.00018963677607661288 |
|   4    | 0.00037841299412004465 | 0.0002703494425001777  |
|   5    | 0.0004832042847994397  | 0.0003970501053453089  |
|   6    | 0.0004414818264733849  | 0.0004303003801977263  |
|   7    | 0.0004283136087292791  | 0.0005218315696360003  |
|   8    | 0.0003966059265296618  |  0.000558450677178437  |
|   9    | 0.0004430989760209053  | 0.0007178822866413372  |
|   10   | 0.00042207603190312545 | 0.0007359954116706883  |
+--------+------------------------+------------------------+
[10 rows x 3 columns]

/Users/apple/anaconda3/lib/python3.7/site-packages/turicreate/aggregate.py:91: UserWarning: Passing parameter(s) to COUNT is deprecated. This functionality will be removed in the next major release. Any passed parameters are ignored.
  warnings.warn('Passing parameter(s) to COUNT is deprecated. This functionality will be removed in '
Overall RMSE: 6.587984865588465

Per User RMSE (best)
+-------------------------------+------+-------+
|            user_id            | rmse | count |
+-------------------------------+------+-------+
| 134813f29bd2a9a7ea3d2f615c... | 0.0  |   1   |
+-------------------------------+------+-------+
[1 rows x 3 columns]


Per User RMSE (worst)
+-------------------------------+-------------------+-------+
|            user_id            |        rmse       | count |
+-------------------------------+-------------------+-------+
| 50996bbabb6f7857bf0c801943... | 647.0133119244094 |   2   |
+-------------------------------+-------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (best)
+--------------------+---------------------+-------+
|      music_id      |         rmse        | count |
+--------------------+---------------------+-------+
| SOGMORP12A8C13EF63 | 0.03846153846153855 |   1   |
+--------------------+---------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (worst)
+--------------------+-------------------+-------+
|      music_id      |        rmse       | count |
+--------------------+-------------------+-------+
| SOUAGPQ12A8AE47B3A | 323.5179299034838 |   8   |
+--------------------+-------------------+-------+
[1 rows x 3 columns]

PROGRESS: Evaluate model M1
recommendations finished on 1000/34354 queries. users per second: 16587.3
recommendations finished on 2000/34354 queries. users per second: 20493.5
recommendations finished on 3000/34354 queries. users per second: 22104.5
recommendations finished on 4000/34354 queries. users per second: 22838
recommendations finished on 5000/34354 queries. users per second: 23641.3
recommendations finished on 6000/34354 queries. users per second: 23881.2
recommendations finished on 7000/34354 queries. users per second: 24343.6
recommendations finished on 8000/34354 queries. users per second: 24527.3
recommendations finished on 9000/34354 queries. users per second: 24747.3
recommendations finished on 10000/34354 queries. users per second: 24850.7
recommendations finished on 11000/34354 queries. users per second: 24651.6
recommendations finished on 12000/34354 queries. users per second: 24649.8
recommendations finished on 13000/34354 queries. users per second: 24867.8
recommendations finished on 14000/34354 queries. users per second: 24964.9
recommendations finished on 15000/34354 queries. users per second: 24924.6
recommendations finished on 16000/34354 queries. users per second: 25122
recommendations finished on 17000/34354 queries. users per second: 25192.6
recommendations finished on 18000/34354 queries. users per second: 24785.2
recommendations finished on 19000/34354 queries. users per second: 24835.6
recommendations finished on 20000/34354 queries. users per second: 24917.7
recommendations finished on 21000/34354 queries. users per second: 24889.9
recommendations finished on 22000/34354 queries. users per second: 24986.1
recommendations finished on 23000/34354 queries. users per second: 24892.5
recommendations finished on 24000/34354 queries. users per second: 24943.8
recommendations finished on 25000/34354 queries. users per second: 24840.5
recommendations finished on 26000/34354 queries. users per second: 24871.6
recommendations finished on 27000/34354 queries. users per second: 24934.6
recommendations finished on 28000/34354 queries. users per second: 24996.5
recommendations finished on 29000/34354 queries. users per second: 24958.5
recommendations finished on 30000/34354 queries. users per second: 24761.3
recommendations finished on 31000/34354 queries. users per second: 24817.1
recommendations finished on 32000/34354 queries. users per second: 24762.5
recommendations finished on 33000/34354 queries. users per second: 24839.3
recommendations finished on 34000/34354 queries. users per second: 24860.5
Precision and recall summary statistics by cutoff
+--------+---------------------+----------------------+
| cutoff |    mean_precision   |     mean_recall      |
+--------+---------------------+----------------------+
|   1    | 0.05082377597950747 | 0.014664961274351634 |
|   2    | 0.06267101356465044 |  0.0332196439784909  |
|   3    | 0.07420775843666945 | 0.05379188118026604  |
|   4    | 0.07667957152005575 | 0.07043384589695527  |
|   5    | 0.07579903359143023 |  0.0847957639719977  |
|   6    | 0.07374201936698313 | 0.09666948377263444  |
|   7    | 0.07152837265778957 | 0.10717233448407068  |
|   8    | 0.06934781975898001 |  0.1168652508571854  |
|   9    | 0.06686266519182545 |  0.1254361512632653  |
|   10   | 0.06470280025615649 | 0.13342951056671634  |
+--------+---------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 7.258436009629355

Per User RMSE (best)
+-------------------------------+-----------------------+-------+
|            user_id            |          rmse         | count |
+-------------------------------+-----------------------+-------+
| dad5cd4678a6f6df34932432bc... | 0.0009171451841081257 |   1   |
+-------------------------------+-----------------------+-------+
[1 rows x 3 columns]


Per User RMSE (worst)
+-------------------------------+-------------------+-------+
|            user_id            |        rmse       | count |
+-------------------------------+-------------------+-------+
| 50996bbabb6f7857bf0c801943... | 650.1213670045946 |   2   |
+-------------------------------+-------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (best)
+--------------------+--------------------+-------+
|      music_id      |        rmse        | count |
+--------------------+--------------------+-------+
| SOTCYEF12A8C1430DA | 0.7949437343691702 |   4   |
+--------------------+--------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (worst)
+--------------------+--------------------+-------+
|      music_id      |        rmse        | count |
+--------------------+--------------------+-------+
| SOUAGPQ12A8AE47B3A | 325.07273212538087 |   8   |
+--------------------+--------------------+-------+
[1 rows x 3 columns]

PROGRESS: Evaluate model M2
recommendations finished on 1000/34354 queries. users per second: 13921
recommendations finished on 2000/34354 queries. users per second: 16214.2
recommendations finished on 3000/34354 queries. users per second: 16993.4
recommendations finished on 4000/34354 queries. users per second: 17369.1
recommendations finished on 5000/34354 queries. users per second: 17815.1
recommendations finished on 6000/34354 queries. users per second: 17970.3
recommendations finished on 7000/34354 queries. users per second: 18229
recommendations finished on 8000/34354 queries. users per second: 18397.7
recommendations finished on 9000/34354 queries. users per second: 18556.9
recommendations finished on 10000/34354 queries. users per second: 18726.4
recommendations finished on 11000/34354 queries. users per second: 18525.3
recommendations finished on 12000/34354 queries. users per second: 18560.3
recommendations finished on 13000/34354 queries. users per second: 18603.7
recommendations finished on 14000/34354 queries. users per second: 18608.9
recommendations finished on 15000/34354 queries. users per second: 18477.6
recommendations finished on 16000/34354 queries. users per second: 18379.5
recommendations finished on 17000/34354 queries. users per second: 18372.6
recommendations finished on 18000/34354 queries. users per second: 18399.4
recommendations finished on 19000/34354 queries. users per second: 18392.2
recommendations finished on 20000/34354 queries. users per second: 18404.9
recommendations finished on 21000/34354 queries. users per second: 18324.1
recommendations finished on 22000/34354 queries. users per second: 18317.5
recommendations finished on 23000/34354 queries. users per second: 18306.8
recommendations finished on 24000/34354 queries. users per second: 18301.4
recommendations finished on 25000/34354 queries. users per second: 18254.2
recommendations finished on 26000/34354 queries. users per second: 18271.3
recommendations finished on 27000/34354 queries. users per second: 18271.5
recommendations finished on 28000/34354 queries. users per second: 18321.5
recommendations finished on 29000/34354 queries. users per second: 18336.9
recommendations finished on 30000/34354 queries. users per second: 18365.6
recommendations finished on 31000/34354 queries. users per second: 18411
recommendations finished on 32000/34354 queries. users per second: 18351.3
recommendations finished on 33000/34354 queries. users per second: 18356.4
recommendations finished on 34000/34354 queries. users per second: 18350.5
Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    | 0.0004075216859754317  | 8.884752468688035e-05  |
|   2    | 0.00040752168597543286 | 0.0001700872374105547  |
|   3    | 0.00046573906968620744 | 0.00026034232999434304 |
|   4    | 0.00045846189672235995 | 0.00034613281485795046 |
|   5    | 0.00048320428479944454 | 0.00048576402547070624 |
|   6    |  0.000509402107469289  | 0.0006471998899906366  |
|   7    |  0.000532273222498523  | 0.0008130749221225232  |
|   8    | 0.0005239564533969851  | 0.0008809637002051636  |
|   9    | 0.0005207221543019401  | 0.0009721268306068117  |
|   10   | 0.0005530651452523705  | 0.0011327732773084823  |
+--------+------------------------+------------------------+
[10 rows x 3 columns]


Overall RMSE: 8.158710705782875

Per User RMSE (best)
+-------------------------------+-----------------------+-------+
|            user_id            |          rmse         | count |
+-------------------------------+-----------------------+-------+
| ab7dc54016a0de3b07c6f0aadd... | 0.0006656301678318322 |   1   |
+-------------------------------+-----------------------+-------+
[1 rows x 3 columns]


Per User RMSE (worst)
+-------------------------------+-------------------+-------+
|            user_id            |        rmse       | count |
+-------------------------------+-------------------+-------+
| 50996bbabb6f7857bf0c801943... | 598.2569120703264 |   2   |
+-------------------------------+-------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (best)
+--------------------+----------------------+-------+
|      music_id      |         rmse         | count |
+--------------------+----------------------+-------+
| SOKPBQS12AF72A0D53 | 0.060478116339554244 |   1   |
+--------------------+----------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (worst)
+--------------------+--------------------+-------+
|      music_id      |        rmse        | count |
+--------------------+--------------------+-------+
| SOUAGPQ12A8AE47B3A | 299.12647022877536 |   8   |
+--------------------+--------------------+-------+
[1 rows x 3 columns]

In [42]:
K = 10
users = tc.SArray(sf['user_id'].unique().head(100))
In [43]:
recs = item_sim_model.recommend(users=users, k=K)
recs.head()
Out[43]:
user_id music_id score rank
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOXUQNR12AF72A69D6 3.022422651449839 1
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOUFAZA12AC3DFAB20 1.3368427753448486 2
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOSFSTC12A8C141219 1.091982126235962 3
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOVIWFP12A58A7D1BD 1.045163869857788 4
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOBMTQD12AB01833D0 1.0294516881306965 5
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOCMNRG12AB0189D3F 0.9756437937418619 6
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOXOHUM12A67ADC826 0.9506873289744059 7
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOWBFVW12A6D4F612B 0.9092370669047037 8
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOXFYTY127E9433E7D 0.8977278073628744 9
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOYBLYP12A58A79D32 0.8970928192138672 10
[10 rows x 4 columns]
In [ ]:
 
In [ ]: