In [81]:
import numpy as np
import bokeh.plotting as plt
import sklearn.manifold as manifold
import ujson
import time
from sklearn.metrics import mean_squared_error
import pickle
np.seterr(divide='ignore', invalid='ignore')
from bokeh.plotting import figure, show, output_file

__author__ = 'amanda'


def load_data():
    # Load the rating data
    rating_data = np.load('tmp/clean_rating_data.npz')
    orig_rating = rating_data['full_rating']   #
    return orig_rating  # 1540 * 200

def load_rating_data():
    # Load the rating data
    rating_data = np.load('tmp/clean_rating_data.npz')
    orig_rating = rating_data['full_rating']   #
    mean = np.mean(orig_rating, axis=0)
    return mean.tolist()  # len = 200 list 

def svd_reconstruct(num_dim):
    rating = load_data()
    u, s, v = np.linalg.svd(rating, full_matrices=False)     # U.shape = [1540*200],s.shape = (200,) V.shape = 200*200
    s_diagonal = np.diag(s)

    # reduce dimensions of Rater matrix and face matrix
    user = u[:, 0:num_dim]
    face = np.dot(s_diagonal[0:num_dim, 0:num_dim], v[0:num_dim, :])  
    return user, face.T  # (1540, 30), (200, 30)


def tsne_on_user(u):
    model = manifold.TSNE()
    mapper = model.fit_transform(u)
    return mapper


def color_coding():    
    with open(r"clusterDict.txt", "rb") as input_file:
        e = pickle.load(input_file)
    color_map_list = []
    for i in range(1540): 
        if i in e[6]: 
            cur_color = 'red'  # biggest cluster
        elif i in e[7]:
            cur_color = 'blue'  # second biggest cluster
        else:
            cur_color = 'green'  # others
        color_map_list.append(cur_color)
    
    return color_map_list


def face_scatter_colored_by_attra(mapper):

    x = mapper[:, 0]
    y = mapper[:, 1]
    attractive_rating = load_rating_data()
    colors = ["#%02x%02x%02x" % (int(r), 255, 255) for r in attractive_rating]
#     colors = ["#%02x%02x%02x" % (int(r), int(g), int(b)) for r, g, b in zip(attractive_rating, attractive_rating, attractive_rating)]
    
    tools = "resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select," \
            "poly_select,lasso_select"
    p = plt.figure(tools=tools)
    p.scatter(x, y, fill_color=colors, fill_alpha=0.6, line_color=None)
    plt.output_file('user_scatter.html', title='user scatter plot example')
    plt.show(p)
    return




def scatter_user_show(mapper, colors):

    x = mapper[:, 0]
    y = mapper[:, 1]

    p = plt.figure()
    p.scatter(x, y, fill_color=colors, fill_alpha=0.6, line_color=None)
    plt.output_file('user_scatter.html', title='user scatter plot example')
    plt.show(p)
    return
    

def embed_show(img_list, mapper):
    with open('data/imagePathFile.json') as data_file:
        image_list = ujson.load(data_file)
    
    locs = np.asarray(mapper)
    p = plt.figure()
    p.image_url(x=locs[:, 0], y=locs[:, 1], url=img_list, w=15, h=20, anchor="center")

    p.circle(x=locs[:, 0], y=locs[:, 1])

    plt.output_file('test.html', title="")
    plt.save(p)
    plt.show(p)
    return


def main1():
    num_dim = 2
    user, face = svd_reconstruct(num_dim)
    # plot user scatter. 
#     mapper = tsne_on_user(user)
#     mapper = user[:, 4:]
#     colors = color_coding()
#     scatter_user_show(mapper, colors)
    
    # plot face scatter. 
    mapper = tsne_on_user(face)
#     mapper = face
    with open('data/imagePathFile.json') as data_file:
        image_list = ujson.load(data_file)
        
    
    embed_show(image_list, mapper)

    return



# def main2():
#     num_dim = 2
#     user, face = svd_reconstruct(num_dim)
     
#     # do tsne on face matrix or not 
# #      mapper = tsne_on_user(face)
#     mapper = face    
#     face_scatter_colored_by_attra(mapper)


# if __name__ == '__main__':
#     main()

main1()

In [73]:
user, face = svd_reconstruct(2)

In [74]:
face

array([[ -1.64455950e+02,  -3.32264259e+00],
       [ -1.62483546e+02,  -5.80665824e+00],
       [ -1.81483754e+02,  -5.98952299e+00],
       [ -1.81331271e+02,  -1.10333317e+01],
       [ -1.77310922e+02,  -1.07161263e+01],
       [ -1.68611561e+02,  -2.55214481e+00],
       [ -1.47314597e+02,   5.28302579e+00],
       [ -9.88000702e+01,   1.53835957e+01],
       [ -9.61336675e+01,   1.64840171e+01],
       [ -1.56626120e+02,  -3.80053488e+00],
       [ -1.15706521e+02,   7.74472546e+00],
       [ -1.45597819e+02,   4.72703150e+00],
       [ -9.85081096e+01,   1.68684843e+01],
       [ -1.36339496e+02,   8.36698689e+00],
       [ -1.60927557e+02,  -2.06653811e+00],
       [ -1.59008187e+02,  -2.56873336e+00],
       [ -1.89976175e+02,  -1.37167755e+01],
       [ -1.85576116e+02,  -7.35722558e+00],
       [ -1.74954487e+02,  -8.07641813e+00],
       [ -1.06008529e+02,   1.74086173e+01],
       [ -1.59812132e+02,  -4.29927985e+00],
       [ -1.56162455e+02,  -3.38765313e+00],
       [ -