In [172]:
import numpy as np
import matplotlib.pyplot as plt

In [178]:
def loadExData():
    return[[0, 0, 0, 2, 2],
           [0, 0, 0, 3, 3],
           [0, 0, 0, 1, 1],
           [1, 1, 1, 0, 0],
           [2, 2, 2, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 1, 0, 0]]
    
def loadExData2():
    return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]

def euclideanSim(inA, inB): #similarity
    return 1.0 / (1.0 + np.linalg.norm(inA - inB))
def pearsonSim(inA, inB):
    return 0.5 + 0.5 * np.corrcoef(inA, inB, rowvar=0)[0][1]
def cosineSim(inA, inB):
    num = float(inA.T * inB)
    return 0.5 + 0.5 * (num / (np.linalg.norm(inA) * np.linalg.norm(inB)))

def standEst(dataMat, user, simMeans, item):
    m, n = dataMat.shape
    simTotal = 0
    ratSimTotal = 0
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0:
            continue
        overLap = np.nonzero(np.logical_and(dataMat[:, item].A > 0, dataMat[:, j].A > 0))[0]
        if len(overLap) == 0:
            similarity = 0
        else:
            similarity = simMeans(dataMat[overLap, item], dataMat[overLap, j])
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal / simTotal

def getKSigma(sigma, percentage = 0.9):
    total = np.sum(sigma ** 2)
    accumulate = 0
    for i in range(len(sigma)):
        accumulate += sigma[i] ** 2
        if accumulate / total >= percentage:
            return i + 1
    return len(sigma)

def svdEst(dataMat, user, simMeans, item):
    m, n = dataMat.shape
    simTotal = 0
    ratSimTotal = 0
    U, Sigma, VT = np.linalg.svd(dataMat)
    k = getKSigma(Sigma)
    SigmaK = np.mat(np.eye(k) * Sigma[:k])
    xformedItems = dataMat.T * U[:, :k] * SigmaK.I
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0 or j == item:
            continue
        similarity = simMeans(xformedItems[item, :].T, xformedItems[j, :].T)
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal/simTotal

def recommend(dataMat, user, N = 3, simMeans = cosineSim, estMethod = standEst):
    unratedItems = np.nonzero(dataMat[user, :].A == 0)[1]
    if len(unratedItems) == 0:
        return 'All is rated'
    itemScore = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeans, item)
        itemScore.append((item, estimatedScore))
    return sorted(itemScore, key = lambda x:x[1], reverse = True)

def printMat(inMat, thresh=0.8):
    for i in range(32):
        value = '0' * 32
        for k in range(32):
            if float(inMat[i,k]) > thresh:
                value = value[:k] + '1' + value[k+1:]
        print(value)

def imgCompress(numSV=3, thresh=0.8):
    myl = []
    for line in open('0_5.txt').readlines():
        newRow = []
        for i in range(32):
            newRow.append(int(line[i]))
        myl.append(newRow)
    myMat = np.mat(myl)
    print ("****original matrix******")
    printMat(myMat, thresh)
    U, Sigma, VT = np.linalg.svd(myMat)
    SigRecon = np.mat(np.zeros((numSV, numSV)))
    for k in range(numSV):#construct diagonal matrix from vector
        SigRecon[k,k] = Sigma[k]
    reconMat = U[:,:numSV] * SigRecon *VT[:numSV,:]
    print ("****reconstructed matrix using {} singular values******".format(numSV))
    printMat(reconMat, thresh)

In [174]:
dataMat = np.mat(loadExData2())
U, sigma, VT = np.linalg.svd(dataMat)

In [175]:
recommend(dataMat, 1)

[(6, 3.3333333333333335),
 (9, 3.3333333333333335),
 (0, 3.0),
 (1, 3.0),
 (2, 3.0),
 (7, 3.0),
 (8, 3.0),
 (4, 0)]

In [176]:
recommend(dataMat, 1, estMethod=svdEst)

[(6, 3.332949990145985),
 (9, 3.331544717872839),
 (4, 3.331447487712862),
 (8, 3.3268848098453243),
 (0, 3.326828341851847),
 (2, 3.326817579698422),
 (1, 3.326809078158717),
 (7, 3.3267024074544325)]

In [179]:
imgCompress()

****original matrix******
00000000000000110000000000000000
00000000000011111100000000000000
00000000000111111110000000000000
00000000001111111111000000000000
00000000111111111111100000000000
00000001111111111111110000000000
00000000111111111111111000000000
00000000111111100001111100000000
00000001111111000001111100000000
00000011111100000000111100000000
00000011111100000000111110000000
00000011111100000000011110000000
00000011111100000000011110000000
00000001111110000000001111000000
00000011111110000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000001111100000000011111000000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000011111000000
00000000111110000000111111000000
00000000111111000001111110000000
00000000011111111111111110000000
00000000001111111111111110000000
00000000001111111111111110000000
00000000000111111

In [23]:
np.corrcoef(np.mat([[11, 25, 36], [10, 26, 67]]).T, rowvar = 1)

array([[ 1., -1., -1.],
       [-1.,  1.,  1.],
       [-1.,  1.,  1.]])

In [164]:
line = ' ' * 32