In [63]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
import matplotlib as plt
import numpy as np
import random
import starter
import importlib
importlib.reload(starter)

<module 'starter' from '/Users/minghui/Documents/COMP_SCI 349/CS349---Group8/HW2/starter.py'>

## Part I

In [9]:
# dimensionality reduction
def reduce(train, valid, test, method, var):
    if method == 'pca':
        return starter.pca(train, valid, test, var)
    elif method == 'bin':
        return starter.binarize(train, valid, test)
    elif method == 'none':
        return starter.make_float(train, valid, test)

In [10]:
mnist_train = starter.read_data('mnist_train.csv')
mnist_valid = starter.read_data('mnist_valid.csv')
mnist_test = starter.read_data('mnist_test.csv')

### KNN

In [14]:
# reduce dimensionality
dimensionality_reduction = 'pca'
retained_variance = 0.90 # only used for pca
train, valid, test = reduce(mnist_train, mnist_valid, mnist_test, dimensionality_reduction, retained_variance)
print('Reduced dimensionality by', len(mnist_train[0][1]) - len(train[0][1]), 'columns')

Reduced dimensionality by 631 columns


In [15]:
# evaluate the knn
def evaluate_knn(train, query, distance_metric):

    labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    query_labels = [i[0] for i in query]
    query_data = [i[1] for i in query]
    result_labels = starter.knn(train, query_data, distance_metric)

    # performance metrics
    query_accuracy = accuracy_score(query_labels, result_labels)
    # average='macro' takes the arithmetic mean of individual classes' scores
    query_precision = precision_score(query_labels, result_labels, average='macro')
    query_recall = recall_score(query_labels, result_labels, average='macro')
    query_f1 = f1_score(query_labels, result_labels, average='macro')
    confusion = confusion_matrix(query_labels, result_labels, labels=labels)

    return query_accuracy, query_precision, query_recall, query_f1, confusion

In [16]:
# test on validation set
valid_accuracy, valid_precision, valid_recall, valid_f1, confusion = evaluate_knn(train, valid, 'euclidean')
print('Validation Set Performance')
print('Accuracy:', valid_accuracy)
print('Precision:', valid_precision)
print('Recall:', valid_recall)
print('F1:', valid_f1)

Validation Set Performance
Accuracy: 0.83
Precision: 0.8305262487770229
Recall: 0.8328349488675576
F1: 0.8262261449538295


In [17]:
# test set performance
test_accuracy, test_precision, test_recall, test_f1, confusion = evaluate_knn(train, test, 'euclidean')
print('Test Set Performance')
print('Accuracy:', test_accuracy)
print('Precision:', test_precision)
print('Recall:', test_recall)
print('F1:', test_f1)

print(confusion)

Test Set Performance
Accuracy: 0.885
Precision: 0.892714608371754
Recall: 0.8834487559224401
F1: 0.8857523531600611
[[17  0  1  0  0  0  0  0  0  0]
 [ 0 27  0  0  0  0  0  0  0  0]
 [ 0  1 15  0  1  0  0  1  1  0]
 [ 0  0  2 15  0  0  0  0  1  0]
 [ 0  0  0  0 23  0  0  1  0  1]
 [ 0  0  0  1  0 11  0  0  1  0]
 [ 0  0  0  0  0  0 13  0  0  0]
 [ 0  1  0  0  1  0  0 22  0  0]
 [ 1  0  0  1  0  1  0  0 18  0]
 [ 1  2  0  0  1  0  0  2  0 16]]


### K-Means

In [18]:
# reduce dimensionality
dimensionality_reduction = 'pca'
retained_variance = 0.995 # only used for pca
train, valid, test = reduce(mnist_train, mnist_valid, mnist_test, dimensionality_reduction, retained_variance)
print('Reduced dimensionality by', len(mnist_train[0][1]) - len(train[0][1]), 'columns')

Reduced dimensionality by 331 columns


In [19]:
# train association of k-means clusters with digits
def associate_clusters(result_labels, labels):
    unique_result_labels = set(result_labels)
    counts = {result_label: [0 for integer in range(10)] for result_label in unique_result_labels}
    for result_label, label in zip(result_labels, labels):
        counts[result_label][int(label)] += 1
    mapping = {result_label: str(np.argmax(count)) for result_label, count in counts.items()}

    return mapping

def performance_kmeans(result_labels, labels):
    # performance metrics
    accuracy = accuracy_score(labels, result_labels)
    # average='macro' takes the arithmetic mean of individual classes' scores
    precision = precision_score(labels, result_labels, average='macro')
    recall = recall_score(labels, result_labels, average='macro')
    f1 = f1_score(labels, result_labels, average='macro')
    confusion = confusion_matrix(labels, result_labels, labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])

    return accuracy, precision, recall, f1, confusion

In [20]:
# train k-means
train_data = [i[1] for i in train]
train_labels = [i[0] for i in train]
k = 30
cluster_labels = [chr(ord('a') + i) for i in range(k)]
random_cluster_means = random.sample(train_data, k) # initialize initial means randomly
initial_guess = [[cluster_labels[i], random_cluster_means[i]] for i in range(k)]
trained_means = starter.kmeans_train(train_data, 'euclidean', initial_guess)

# evaluate k-means train data
train_result_labels = starter.kmeans_evaluate(train_data, trained_means, 'euclidean')

# train association function
mapping = associate_clusters(train_result_labels, train_labels)

In [64]:
# evaluate k-means on validation set
valid_data = [i[1] for i in valid]
valid_labels = [i[0] for i in valid]
valid_result_labels = starter.kmeans_evaluate(valid_data, trained_means, 'euclidean')
valid_result_labels_mapped = [mapping[label] for label in valid_result_labels]
# check performance
valid_accuracy, valid_precision, valid_recall, valid_f1, confusion = performance_kmeans(valid_result_labels_mapped, valid_labels)
print('Validation Set Performance')
print('Accuracy:', valid_accuracy)
print('Precision:', valid_precision)
print('Recall:', valid_recall)
print('F1:', valid_f1)
print('\n')

Validation Set Performance
Accuracy: 0.595
Precision: 0.6249397365891792
Recall: 0.6019263285024155
F1: 0.5929581992541278




In [65]:
# evaluate k-means on test set
test_data = [i[1] for i in test]
test_labels = [i[0] for i in test]
test_result_labels = starter.kmeans_evaluate(test_data, trained_means, 'euclidean')
test_result_labels_mapped = [mapping[label] for label in test_result_labels]
# check performance
test_accuracy, test_precision, test_recall, test_f1, confusion = performance_kmeans(test_result_labels_mapped, test_labels)
print('Test Set Performance')
print('Accuracy:', test_accuracy)
print('Precision:', test_precision)
print('Recall:', test_recall)
print('F1:', test_f1)
print(confusion)

Test Set Performance
Accuracy: 0.675
Precision: 0.6881093857832987
Recall: 0.6625137425926899
F1: 0.6576570280127599
[[14  0  1  1  1  0  1  0  0  0]
 [ 0 23  1  0  0  0  1  0  2  0]
 [ 0  0 12  2  0  0  1  1  3  0]
 [ 0  1  1 13  1  0  0  0  2  0]
 [ 0  0  2  0 16  0  0  2  0  5]
 [ 0  0  0  5  1  3  0  0  3  1]
 [ 0  0  2  0  0  0 11  0  0  0]
 [ 1  0  0  1  0  1  0 17  2  2]
 [ 1  0  0  1  1  0  1  1 16  0]
 [ 0  1  1  0  8  0  0  2  0 10]]


### Part 2

## Q1 movie rating collaborative filter

In [62]:
"""
Observations:
Higher M leads to higher recall and lower precision
Lower M leads to lower recall and higher precision
"""

K = 80 # Number of similar users
M = 180 # Number of movies to recommend

A_userid = "405"
B_userid = "655"
C_userid = "13"

#for users a
print("User a\n")
movielens_ratings = starter.read_ratings('movielens.txt')
train_ratings_a = starter.read_ratings('train_a.txt')
valid_ratings_a = starter.read_ratings('valid_a.txt')
test_ratings_a = starter.read_ratings('test_a.txt')


similar_users_a = starter.get_similar_users(train_ratings_a, movielens_ratings, A_userid, "euclidean", K)
recommendations_a = starter.recommend_movies(movielens_ratings, A_userid, similar_users_a, M)
user_preference_valid_a = valid_ratings_a[A_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_a, user_preference_valid_a)
print(f'Validation for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

user_preference_test_a = test_ratings_a[A_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_a, user_preference_test_a)
print(f'Test for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

#for users b
print("\nUser b\n")
movielens_ratings = starter.read_ratings('movielens.txt')
train_ratings_b = starter.read_ratings('train_b.txt')
valid_ratings_b = starter.read_ratings('valid_b.txt')
test_ratings_b = starter.read_ratings('test_b.txt')

similar_users_b = starter.get_similar_users(train_ratings_b, movielens_ratings, B_userid, "euclidean", K)
recommendations_b = starter.recommend_movies(movielens_ratings, B_userid, similar_users_b, M)

user_preference_valid_b = valid_ratings_b[B_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_b, user_preference_valid_b)
print(f'Validation for user b - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

user_preference_test_b = test_ratings_b[B_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_b, user_preference_test_b)
print(f'Test for user b - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

#for users c
print("\nUser c\n")
movielens_ratings = starter.read_ratings('movielens.txt')
train_ratings_c = starter.read_ratings('train_c.txt')
valid_ratings_c = starter.read_ratings('valid_c.txt')
test_ratings_c = starter.read_ratings('test_c.txt')

similar_users_c = starter.get_similar_users(train_ratings_c, movielens_ratings, C_userid, "euclidean", K)
recommendations_c = starter.recommend_movies(movielens_ratings, C_userid, similar_users_c, M)

user_preference_valid_c = valid_ratings_c[C_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_c, user_preference_valid_c)
print(f'Validation for user c - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

user_preference_test_c = test_ratings_c[C_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_c, user_preference_test_c)
print(f'Test for user c - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

User a

Validation for user a - Precision: 0.5, Recall: 0.023255813953488372, F1-score: 0.04444444444444444
Test for user a - Precision: 0.75, Recall: 0.1111111111111111, F1-score: 0.19354838709677416

User b

Validation for user b - Precision: 0.4166666666666667, Recall: 0.2777777777777778, F1-score: 0.33333333333333337
Test for user b - Precision: 0.34375, Recall: 0.4230769230769231, F1-score: 0.3793103448275862

User c

Validation for user c - Precision: 0.5806451612903226, Recall: 0.20930232558139536, F1-score: 0.3076923076923077
Test for user c - Precision: 0.4864864864864865, Recall: 0.21428571428571427, F1-score: 0.2975206611570248


## Q2 improved collaborative filter with other attributes (age and gender) (with normalization)

In [65]:
"""
Observations:
Higher M leads to higher recall and lower precision
Lower M leads to lower recall and higher precision

with improved similarity function, the precision is higher
"""

K = 80  # Number of similar users
M = 180 # Number of movies to recommend
A_userid = "405"
B_userid = "655"
C_userid = "13"

# for user a
print("User a\n")
movielens_ratings, movielens_userdata, movielens_original, movielens_genres = starter.read_more_data('movielens.txt')
train_ratings_a, train_userdata_a, train_original_a, train_genres_a = starter.read_more_data('train_a.txt')
valid_ratings_a, valid_userdata_a, valid_original_a, valid_genres_a = starter.read_more_data('valid_a.txt')
test_ratings_a, test_userdata_a, test_original_a, test_genres_a = starter.read_more_data('test_a.txt')

similar_users_a = starter.get_similar_users_improved(train_ratings_a, movielens_ratings, train_userdata_a, movielens_userdata, train_genres_a, movielens_genres, A_userid, "euclidean", K)
recommendations_a = starter.recommend_movies(movielens_ratings, A_userid, similar_users_a, M)

user_preference_valid_a = valid_original_a[A_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_a, user_preference_valid_a)
print(f'Validation for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

user_preference_test_a = test_original_a[A_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_a, user_preference_test_a)
print(f'Test for user a - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

# for user b
print("\nUser b\n")
movielens_ratings, movielens_userdata, movielens_original, movielens_genres = starter.read_more_data('movielens.txt')
train_ratings_b, train_userdata_b, train_original_b, train_genres_b = starter.read_more_data('train_b.txt')
valid_ratings_b, valid_userdata_b, valid_original_b, valid_genres_b = starter.read_more_data('valid_b.txt')
test_ratings_b, test_userdata_b, test_original_b, test_genres_b = starter.read_more_data('test_b.txt')

similar_users_b = starter.get_similar_users_improved(train_ratings_b, movielens_ratings, train_userdata_b, movielens_userdata, train_genres_b, movielens_genres, B_userid, "euclidean", K)
recommendations_b = starter.recommend_movies(movielens_ratings, B_userid, similar_users_b, M)

user_preference_valid_b = valid_original_b[B_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_b, user_preference_valid_b)
print(f'Validation for user b - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

user_preference_test_b = test_original_b[B_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_b, user_preference_test_b)
(recommendations_b, user_preference_test_b)
print(f'Test for user b - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

# for user c
print("\nUser c\n")
movielens_ratings, movielens_userdata, movielens_original, movielens_genres = starter.read_more_data('movielens.txt')
train_ratings_c, train_userdata_c, train_original_c, train_genres_c = starter.read_more_data('train_c.txt')
valid_ratings_c, valid_userdata_c, valid_original_c, valid_genres_c = starter.read_more_data('valid_c.txt')
test_ratings_c, test_userdata_c, test_original_c, test_genres_c = starter.read_more_data('test_c.txt')

similar_users_c = starter.get_similar_users_improved(train_ratings_c, movielens_ratings, train_userdata_c, movielens_userdata, train_genres_c, movielens_genres, C_userid, "euclidean", K)
recommendations_c = starter.recommend_movies(movielens_ratings, C_userid, similar_users_c, M)

user_preference_valid_c = valid_original_c[C_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_c, user_preference_valid_c)
print(f'Validation for user c - Precision: {precision}, Recall: {recall}, F1-score: {f1}')

user_preference_test_c = test_original_c[C_userid]
precision, recall, f1 = starter.evaluate_collaborative_filter(recommendations_c, user_preference_test_c)
print(f'Test for user c - Precision: {precision}, Recall: {recall}, F1-score: {f1}')


User a

Validation for user a - Precision: 0.0, Recall: 0.0, F1-score: 0.0
Test for user a - Precision: 0.6, Recall: 0.1111111111111111, F1-score: 0.18750000000000003

User b

Validation for user b - Precision: 0.37037037037037035, Recall: 0.2777777777777778, F1-score: 0.3174603174603175
Test for user b - Precision: 0.25, Recall: 0.3076923076923077, F1-score: 0.27586206896551724

User c

Validation for user c - Precision: 0.53125, Recall: 0.19767441860465115, F1-score: 0.28813559322033894
Test for user c - Precision: 0.45714285714285713, Recall: 0.19047619047619047, F1-score: 0.2689075630252101
