In [1]:
import pandas as pd
import numpy as np

from collections import Counter

In [2]:
def read_file(file):
    data = []
    with open(file, 'rt') as f:
        for line in f.readlines():
            view, buy = line.strip().split(';')  
            if len(buy) > 0:
                view = view.split(',')
                buy = buy.split(',')
                data.append((view,buy))
    return data

In [3]:
train_data = read_file('coursera_sessions_train.txt')
test_data = read_file('coursera_sessions_test.txt')

In [4]:
def freq_file(file):
    view_count = Counter()
    buy_count = Counter()
    with open(file, 'rt') as f:
        for line in f.readlines():
            view, buy = line.strip().split(';')
            view = view.split(',')
            buy = buy.split(',')
            for item in view:
                view_count[item] += 1
            for item in buy:
                buy_count[item] += 1
    return view_count, buy_count

In [5]:
view_count, buy_count = freq_file('coursera_sessions_train.txt')

In [6]:
def precision(predict, buy, k):
    rb = [x for x in buy if x in predict]
    return len(rb)/float(k)

In [7]:
def recall(predict, buy):
    rb = [x for x in buy if x in predict]
    return len(rb)/float(len(buy))

In [8]:
def predict(viewed, k, method='pop'):
    k = min(len(viewed), k)
    unique = np.array(viewed)[np.sort(np.unique(viewed, return_index=True)[1])]
    arr = [-view_count[item] if method == 'view' else -buy_count[item] for item in unique]
    sort = np.argsort(arr)
    return list(unique[sort])[0:k]

In [9]:
def get_stats(data, method):
    ar1 = np.mean([recall(predict(user[0], 1, method), user[1]) for user in data])
    ap1 = np.mean([precision(predict(user[0], 1, method), user[1], 1) for user in data])
    ar5 = np.mean([recall(predict(user[0], 5, method), user[1]) for user in data])
    ap5 = np.mean([precision(predict(user[0], 5, method), user[1], 5) for user in data])
    return round(ar1, 2), round(ap1,2), round(ar5, 2), round(ap5, 2)

In [10]:
print(get_stats(train_data, 'view'))
print(get_stats(test_data, 'view'))
print(get_stats(train_data, 'buy'))
print(get_stats(test_data, 'buy'))

(0.44, 0.51, 0.82, 0.21)
(0.42, 0.48, 0.8, 0.2)
(0.69, 0.8, 0.93, 0.25)
(0.46, 0.53, 0.82, 0.21)
