In [1]:
#
#  FILTERINGDATA.py
#
#  Code file for the book Programmer's Guide to Data Mining
#  http://guidetodatamining.com
#  Ron Zacharski
#

from math import sqrt

In [2]:
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
        }

In [3]:
def manhattan(rating1, rating2):
    """Computes the Manhattan distance. Both rating1 and rating2 are dictionaries
       of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
    distance = 0
    commonRatings = False 
    for key in rating1:
        if key in rating2:
            distance += abs(rating1[key] - rating2[key])
            commonRatings = True
    if commonRatings:
        return distance
    else:
        return -1 #Indicates no ratings in common

In [4]:
def minkowski(rating1, rating2, r):
    """Computes the Minkowski distance. Both rating1 and rating2 are dictionaries
       of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
    distance = 0
    commonRatings = False
    for key in rating1:
        if key in rating2:
            distance += pow(abs(rating1[key] - rating2[key]), r)
            commonRatings = True
    if commonRatings:
        return pow(distance, 1/r)
    else:
        return 0 #Indicates no ratings in common

In [5]:
def computeNearestNeighbor(username, users):
    """creates a sorted list of users based on their distance to username"""
    distances = []
    for user in users:
        if user != username:
            # distance = manhattan(users[user], users[username])
            distance = minkowski(users[user], users[username], 2)
            distances.append((distance, user))
    # sort based on distance -- closest first
    distances.sort()
    return distances

In [6]:
def recommend(username, users):
    """Give list of recommendations"""
    # first find nearest neighbor
    nearest = computeNearestNeighbor(username, users)[0][1]

    recommendations = []
    # now find bands neighbor rated that user didn't
    neighborRatings = users[nearest]
    userRatings = users[username]
    for artist in neighborRatings:
        if not artist in userRatings:
            recommendations.append((artist, neighborRatings[artist]))
    # using the fn sorted for variety - sort is more efficient
    # return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
    recommendations.sort(key=lambda artistTuple: artistTuple[1], reverse = True)
    return recommendations

In [7]:
users["Veronica"]

{'Blues Traveler': 3.0,
 'Norah Jones': 5.0,
 'Phoenix': 4.0,
 'Slightly Stoopid': 2.5,
 'The Strokes': 3.0}

In [8]:
manhattan(users['Hailey'], users['Veronica']), manhattan(users['Hailey'], users['Jordyn'])

(2.0, 7.5)

In [9]:
computeNearestNeighbor("Hailey", users)

[(1.4142135623730951, 'Veronica'),
 (2.449489742783178, 'Sam'),
 (2.7386127875258306, 'Angelica'),
 (3.1622776601683795, 'Chan'),
 (3.640054944640259, 'Bill'),
 (3.640054944640259, 'Dan'),
 (4.387482193696061, 'Jordyn')]

In [10]:
recommend('Hailey', users)

[('Phoenix', 4.0), ('Blues Traveler', 3.0), ('Slightly Stoopid', 2.5)]

In [11]:
recommend('Chan', users)

[('The Strokes', 2.5), ('Vampire Weekend', 2.0)]

In [12]:
recommend('Sam', users)

[('Deadmau5', 1.0), ('Vampire Weekend', 1.0)]