In [58]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
import matplotlib as plt
import numpy as np
import random
import starter
import importlib
importlib.reload(starter)

<module 'starter' from '/Users/chelseytao/CS349---Group8/HW2/starter.py'>

## Part I

In [9]:
# dimensionality reduction
def reduce(train, valid, test, method, var):
    if method == 'pca':
        return starter.pca(train, valid, test, var)
    elif method == 'bin':
        return starter.binarize(train, valid, test)
    elif method == 'none':
        return starter.make_float(train, valid, test)

In [10]:
mnist_train = starter.read_data('mnist_train.csv')
mnist_valid = starter.read_data('mnist_valid.csv')
mnist_test = starter.read_data('mnist_test.csv')

### KNN

In [14]:
# reduce dimensionality
dimensionality_reduction = 'pca'
retained_variance = 0.90 # only used for pca
train, valid, test = reduce(mnist_train, mnist_valid, mnist_test, dimensionality_reduction, retained_variance)
print('Reduced dimensionality by', len(mnist_train[0][1]) - len(train[0][1]), 'columns')

Reduced dimensionality by 631 columns


In [15]:
# evaluate the knn
def evaluate_knn(train, query, distance_metric):

    labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    query_labels = [i[0] for i in query]
    query_data = [i[1] for i in query]
    result_labels = starter.knn(train, query_data, distance_metric)

    # performance metrics
    query_accuracy = accuracy_score(query_labels, result_labels)
    # average='macro' takes the arithmetic mean of individual classes' scores
    query_precision = precision_score(query_labels, result_labels, average='macro')
    query_recall = recall_score(query_labels, result_labels, average='macro')
    query_f1 = f1_score(query_labels, result_labels, average='macro')
    confusion = confusion_matrix(query_labels, result_labels, labels=labels)

    return query_accuracy, query_precision, query_recall, query_f1, confusion

In [16]:
# test on validation set
valid_accuracy, valid_precision, valid_recall, valid_f1, confusion = evaluate_knn(train, valid, 'euclidean')
print('Validation Set Performance')
print('Accuracy:', valid_accuracy)
print('Precision:', valid_precision)
print('Recall:', valid_recall)
print('F1:', valid_f1)

Validation Set Performance
Accuracy: 0.83
Precision: 0.8305262487770229
Recall: 0.8328349488675576
F1: 0.8262261449538295


In [17]:
# test set performance
test_accuracy, test_precision, test_recall, test_f1, confusion = evaluate_knn(train, test, 'euclidean')
print('Test Set Performance')
print('Accuracy:', test_accuracy)
print('Precision:', test_precision)
print('Recall:', test_recall)
print('F1:', test_f1)

print(confusion)

Test Set Performance
Accuracy: 0.885
Precision: 0.892714608371754
Recall: 0.8834487559224401
F1: 0.8857523531600611
[[17  0  1  0  0  0  0  0  0  0]
 [ 0 27  0  0  0  0  0  0  0  0]
 [ 0  1 15  0  1  0  0  1  1  0]
 [ 0  0  2 15  0  0  0  0  1  0]
 [ 0  0  0  0 23  0  0  1  0  1]
 [ 0  0  0  1  0 11  0  0  1  0]
 [ 0  0  0  0  0  0 13  0  0  0]
 [ 0  1  0  0  1  0  0 22  0  0]
 [ 1  0  0  1  0  1  0  0 18  0]
 [ 1  2  0  0  1  0  0  2  0 16]]


### K-Means

In [18]:
# reduce dimensionality
dimensionality_reduction = 'pca'
retained_variance = 0.995 # only used for pca
train, valid, test = reduce(mnist_train, mnist_valid, mnist_test, dimensionality_reduction, retained_variance)
print('Reduced dimensionality by', len(mnist_train[0][1]) - len(train[0][1]), 'columns')

Reduced dimensionality by 331 columns


In [19]:
# train association of k-means clusters with digits
def associate_clusters(result_labels, labels):
    unique_result_labels = set(result_labels)
    counts = {result_label: [0 for integer in range(10)] for result_label in unique_result_labels}
    for result_label, label in zip(result_labels, labels):
        counts[result_label][int(label)] += 1
    mapping = {result_label: str(np.argmax(count)) for result_label, count in counts.items()}

    return mapping

def performance_kmeans(result_labels, labels):
    # performance metrics
    accuracy = accuracy_score(labels, result_labels)
    # average='macro' takes the arithmetic mean of individual classes' scores
    precision = precision_score(labels, result_labels, average='macro')
    recall = recall_score(labels, result_labels, average='macro')
    f1 = f1_score(labels, result_labels, average='macro')
    confusion = confusion_matrix(labels, result_labels, labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])

    return accuracy, precision, recall, f1, confusion

In [20]:
# train k-means
train_data = [i[1] for i in train]
train_labels = [i[0] for i in train]
k = 30
cluster_labels = [chr(ord('a') + i) for i in range(k)]
random_cluster_means = random.sample(train_data, k) # initialize initial means randomly
initial_guess = [[cluster_labels[i], random_cluster_means[i]] for i in range(k)]
trained_means = starter.kmeans_train(train_data, 'euclidean', initial_guess)

# evaluate k-means train data
train_result_labels = starter.kmeans_evaluate(train_data, trained_means, 'euclidean')

# train association function
mapping = associate_clusters(train_result_labels, train_labels)

In [64]:
# evaluate k-means on validation set
valid_data = [i[1] for i in valid]
valid_labels = [i[0] for i in valid]
valid_result_labels = starter.kmeans_evaluate(valid_data, trained_means, 'euclidean')
valid_result_labels_mapped = [mapping[label] for label in valid_result_labels]
# check performance
valid_accuracy, valid_precision, valid_recall, valid_f1, confusion = performance_kmeans(valid_result_labels_mapped, valid_labels)
print('Validation Set Performance')
print('Accuracy:', valid_accuracy)
print('Precision:', valid_precision)
print('Recall:', valid_recall)
print('F1:', valid_f1)
print('\n')

Validation Set Performance
Accuracy: 0.595
Precision: 0.6249397365891792
Recall: 0.6019263285024155
F1: 0.5929581992541278




In [65]:
# evaluate k-means on test set
test_data = [i[1] for i in test]
test_labels = [i[0] for i in test]
test_result_labels = starter.kmeans_evaluate(test_data, trained_means, 'euclidean')
test_result_labels_mapped = [mapping[label] for label in test_result_labels]
# check performance
test_accuracy, test_precision, test_recall, test_f1, confusion = performance_kmeans(test_result_labels_mapped, test_labels)
print('Test Set Performance')
print('Accuracy:', test_accuracy)
print('Precision:', test_precision)
print('Recall:', test_recall)
print('F1:', test_f1)
print(confusion)

Test Set Performance
Accuracy: 0.675
Precision: 0.6881093857832987
Recall: 0.6625137425926899
F1: 0.6576570280127599
[[14  0  1  1  1  0  1  0  0  0]
 [ 0 23  1  0  0  0  1  0  2  0]
 [ 0  0 12  2  0  0  1  1  3  0]
 [ 0  1  1 13  1  0  0  0  2  0]
 [ 0  0  2  0 16  0  0  2  0  5]
 [ 0  0  0  5  1  3  0  0  3  1]
 [ 0  0  2  0  0  0 11  0  0  0]
 [ 1  0  0  1  0  1  0 17  2  2]
 [ 1  0  0  1  1  0  1  1 16  0]
 [ 0  1  1  0  8  0  0  2  0 10]]


### Part 2

## Q1 movie rating collaborative filter

In [38]:
movielens_ratings = starter.read_ratings('movielens.txt')
train_ratings_a = starter.read_ratings('train_a.txt')
valid_ratings_a = starter.read_ratings('valid_a.txt')
test_ratings_a = starter.read_ratings('test_a.txt')

"""
Observations:
Higher M leads to higher recall and lower precision
Lower M leads to lower recall and higher precision
"""

K = 500  # Number of similar users
M = 100 # Number of movies to recommend
A_userid = "405"
B_userid = "655"
C_userid = "13"

similar_users_a = starter.get_similar_users(train_ratings_a, movielens_ratings, A_userid, "euclidean", K)
recommendations_a = starter.recommend_movies(movielens_ratings, A_userid, similar_users_a, M)

print(f"Recommendations for user a: {recommendations_a}")

ground_truth_valid_a = valid_ratings_a[A_userid]
print(f"Ground truth for validation (user a): {ground_truth_valid_a}")
precision, recall, f1 = starter.evaluate(recommendations_a, ground_truth_valid_a)
print(f'Validation for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

ground_truth_test_a = test_ratings_a[A_userid]
precision, recall, f1 = starter.evaluate(recommendations_a, ground_truth_test_a)
print(f'Test for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

# #for users b
# train_ratings_b = starter.read_ratings('train_b.txt')
# valid_ratings_b = starter.read_ratings('valid_b.txt')
# test_ratings_b = starter.read_ratings('test_b.txt')

# similar_users_b = starter.get_similar_users(train_ratings_b, movielens_ratings, B_userid, "euclidean", K)
# recommendations_b = starter.recommend_movies(movielens_ratings, B_userid, similar_users_b, M)

# ground_truth_valid_b = valid_ratings_b[B_userid]
# precision, recall, f1 = starter.evaluate(recommendations_b, ground_truth_valid_b)
# print(f'Validation for user b - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

# ground_truth_test_b = test_ratings_b[B_userid]
# precision, recall, f1 = starter.evaluate(recommendations_b, ground_truth_test_b)
# print(f'Test for user b - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

# #for users c
# train_ratings_c = starter.read_ratings('train_c.txt')
# valid_ratings_c = starter.read_ratings('valid_c.txt')
# test_ratings_c = starter.read_ratings('test_c.txt')

# similar_users_c = starter.get_similar_users(train_ratings_c, movielens_ratings, C_userid, "euclidean", K)
# recommendations_c = starter.recommend_movies(movielens_ratings, C_userid, similar_users_c, M)

# ground_truth_valid_c = valid_ratings_c[C_userid]
# precision, recall, f1 = starter.evaluate(recommendations_c, ground_truth_valid_c)
# print(f'Validation for user c - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

# ground_truth_test_c = test_ratings_c[C_userid]
# precision, recall, f1 = starter.evaluate(recommendations_c, ground_truth_test_c)
# print(f'Test for user c - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

Recommendations for user a: ['50', '258', '286', '100', '300', '288', '181', '127', '313', '302', '294', '269', '1', '237', '117', '748', '7', '328', '121', '276', '257', '268', '333', '222', '275', '15', '98', '9', '56', '174', '301', '272', '289', '245', '340', '405', '475', '318', '323', '307', '742', '327', '25', '111', '151', '172', '315', '282', '322', '118', '298', '326', '515', '271', '508', '285', '690', '273', '79', '124', '678', '471', '64', '751', '321', '12', '332', '303', '14', '255', '173', '125', '310', '304', '137', '750', '319', '242', '22', '270', '250', '357', '293', '591', '346', '597', '879', '483', '347', '292', '210', '274', '283', '96', '331', '150', '168', '845', '204', '69']
Ground truth for validation (user a): {'184': 1.0, '1193': 1.0, '1560': 1.0, '1206': 1.0, '802': 1.0, '1471': 1.0, '939': 5.0, '1063': 5.0, '537': 1.0, '520': 2.0, '1553': 1.0, '1517': 1.0, '199': 1.0, '1559': 1.0, '1032': 1.0, '415': 2.0, '645': 1.0, '461': 3.0, '807': 1.0, '82': 4.0, '7

## Q2 improved collaborative filter with other attributes (age and gender) (with normalization)

In [59]:
movielens_ratings, movielens_userdata, movielens_original, movielens_genres = starter.read_more_data('movielens.txt')
train_ratings_a, train_userdata_a, train_original_a, train_genres_a = starter.read_more_data('train_a.txt')
valid_ratings_a, valid_userdata_a, valid_original_a, valid_genres_a = starter.read_more_data('valid_a.txt')
test_ratings_a, test_userdata_a, test_original_a, test_genres_a = starter.read_more_data('test_a.txt')

"""
Observations:
Higher M leads to higher recall and lower precision
Lower M leads to lower recall and higher precision

with improved similarity function, the precision is higher
"""

K = 500  # Number of similar users
M = 100 # Number of movies to recommend
A_userid = "405"
B_userid = "655"
C_userid = "13"

similar_users_a = starter.get_similar_users_improved(train_ratings_a, movielens_ratings, train_userdata_a, movielens_userdata, train_genres_a, movielens_genres, A_userid, "euclidean", K)
recommendations_a = starter.recommend_movies(movielens_ratings, A_userid, similar_users_a, M)

print(f"Recommendations for user a: {recommendations_a}")

ground_truth_valid_a = valid_original_a[A_userid]
print(f"Ground truth for validation (user a): {ground_truth_valid_a}")
precision, recall, f1 = starter.evaluate(recommendations_a, ground_truth_valid_a)
print(f'Validation for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

ground_truth_test_a = test_original_a[A_userid]
precision, recall, f1 = starter.evaluate(recommendations_a, ground_truth_test_a)
print(f'Test for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

Recommendations for user a: ['258', '286', '300', '50', '313', '100', '288', '294', '302', '269', '181', '1', '127', '237', '328', '117', '748', '276', '333', '268', '121', '7', '272', '257', '9', '222', '15', '275', '301', '245', '475', '340', '307', '315', '289', '405', '151', '323', '322', '327', '742', '25', '326', '508', '690', '271', '118', '285', '124', '282', '471', '14', '298', '111', '310', '98', '515', '332', '304', '125', '321', '750', '751', '56', '303', '273', '242', '270', '319', '678', '292', '174', '318', '137', '316', '346', '347', '879', '147', '293', '331', '150', '255', '250', '172', '591', '546', '64', '283', '597', '306', '246', '259', '79', '12', '305', '845', '324', '274', '628']
Ground truth for validation (user a): {'184': 1.0, '1193': 1.0, '1560': 1.0, '1206': 1.0, '802': 1.0, '1471': 1.0, '939': 5.0, '1063': 5.0, '537': 1.0, '520': 2.0, '1553': 1.0, '1517': 1.0, '199': 1.0, '1559': 1.0, '1032': 1.0, '415': 2.0, '645': 1.0, '461': 3.0, '807': 1.0, '82': 4.0,