In [None]:
def createDataframe(processed_subID_col, processed_stim_col, processed_bi_y, object_list, subID_list):
    
    # get sizes
    num_objects = len(object_list)
    num_subjects = len(subID_list)
    num_iterations = processed_bi_y.shape[0]
    
    # initialize the dataframe
    ranking_dataframe = pd.DataFrame(data=np.zeros((num_subjects,num_objects)), index=subID_list, columns=object_list)
    
    # fill the dataframe

    # iterate over data rows
    for i in range(num_iterations):
    
        # for each data row, get the subject number and stimulus
        current_subject = processed_subID_col[i]
        current_object = processed_stim_col[i]

        # for the current subject+stimulus, insert the info from the current data row i
        ranking_dataframe.at[current_subject,current_object] = processed_bi_y[i]
    
    return ranking_dataframe

In [None]:
def getRanking(ranking_dataframe, subject_ID, object_name):
    
    ranking = ranking_dataframe.at[subject_ID , object_name]
    print("Subject "+str(subject_ID)+" gave "+object_name+" a ranking of "+str(ranking))
    
    return None

In [None]:
def favoriteObjects(ranking_dataframe, object_list):
    
    print(object_list[np.argsort(np.sum(ranking_dataframe.values, axis=0))])
    
    return None

In [None]:
def getClusters(ranking_dataframe, num_clusters, object_list):
    
    # transpose the dataframe
    t_dataframe = ranking_dataframe.T

    # get numpy array from the dataframe
    t_rankings = t_dataframe.values

    # apply kmeans to cluster the stimulus-vectors
    kmeans_model = KMeans(n_clusters=num_clusters).fit(t_rankings)
    
    cluster_labels = kmeans_model.labels_

    # print out clusters
    for i in range(num_clusters):
        print("cluster "+str(i)+": "+str(object_list[np.where(cluster_labels==i)]))
    
    return cluster_labels, t_rankings

In [None]:
def visualizeClusters(cluster_labels, t_rankings):
    
    # reduce the vectors representing the stimuli to 2d
    pca_model = PCA(n_components=2)
    pca_components = pca_model.fit_transform(t_rankings)

    # visualizing the ranking clusters
    x = pca_components[:,0]
    y = pca_components[:,1]

    fig = plt.figure(figsize = (10, 10))
    ax = fig.add_subplot(111)
    ax.scatter(x,y, c=cluster_labels, s=100)
    
    return pca_components

In [None]:
def visualizePopularity(pca_components, ranking_dataframe):
    
    # according to preference
    x = pca_components[:,0]
    y = pca_components[:,1]

    popularities = np.argsort(np.sum(ranking_dataframe.values, axis=0))

    fig = plt.figure(figsize = (10, 10))
    ax = fig.add_subplot(111)
    ax.scatter(x,y, c=popularities, s=100, cmap='Greens_r')
    
    return popularities

In [None]:
def displayPopularity(popularities, num_clusters, cluster_labels):

    # for each cluster get the specific popularities
    for i in range(num_clusters):
        current_popularities = popularities[np.where(cluster_labels==i)]
        print("cluster "+str(i)+" mean popularity is "+str(np.mean(current_popularities)))
    
    return None