In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.metrics.pairwise import cosine_similarity
from math import nan


In [2]:
critics = pd.DataFrame({
	"Lady": [2.5, 3.0, 2.5, None, 3.0, 3.0, None],
	"Snakes": [3.5, 3.5, 3.0, 3.5, 4.0, 4.0, 4.5],
	"Luck": [3.0, 1.5, None, 3.0, 2.0, None, None],
	"Superman": [3.5, 5.0, 3.5, 4.0,3.0, 5.0, 4.0],
	"Dupree": [2.5, 3.5, None, 2.5, 2.0, 3.5, 1.0],
	"Night": [3.0, 3.0, 4.0, 4.5, 3.0, 3.0, None]
})
Name = ["Lisa Rose", "Gene Seymour", "Michael Phillips", "Claudia Puig", "Mick Lasalle", "Jack Matthews", "Toby"]
critics.index = Name

In [3]:
critics.dtypes

Lady        float64
Snakes      float64
Luck        float64
Superman    float64
Dupree      float64
Night       float64
dtype: object

In [4]:
critics.loc["Lisa Rose"]

# This is the preferred data format as 
# 1. Variables are column names, instead of constants as column names
# 2. Pandas dataframe is easy to do analysis as opposed to the dictionary recommended in the assignment

# Append Anne
Anne = pd.Series({"Lady": 1.5, "Snakes": None, "Luck": 4.0, "Superman": None, "Dupree":2.0, "Night": None}, name="Anne")
critics = critics.append(Anne)
critics

Unnamed: 0,Lady,Snakes,Luck,Superman,Dupree,Night
Lisa Rose,2.5,3.5,3.0,3.5,2.5,3.0
Gene Seymour,3.0,3.5,1.5,5.0,3.5,3.0
Michael Phillips,2.5,3.0,,3.5,,4.0
Claudia Puig,,3.5,3.0,4.0,2.5,4.5
Mick Lasalle,3.0,4.0,2.0,3.0,2.0,3.0
Jack Matthews,3.0,4.0,,5.0,3.5,3.0
Toby,,4.5,,4.0,1.0,
Anne,1.5,,4.0,,2.0,


In [5]:
# Tests
lisa = critics.loc["Lisa Rose"]
print(lisa.name)
print(lisa.to_dict())
for item in lisa.to_dict():
    print(item)

Lisa Rose
{'Lady': 2.5, 'Snakes': 3.5, 'Luck': 3.0, 'Superman': 3.5, 'Dupree': 2.5, 'Night': 3.0}
Lady
Snakes
Luck
Superman
Dupree
Night


In [6]:
def sim_distance_manhattan(person1, person2):
    """args:
           person1, person2: pandas series with movie ratings of each person
        
       returns:
           score: float which is a Manhattan distance between 2 persons"""
    scores = [abs(rating1 - rating2) if (not(np.isnan(rating1)) and not(np.isnan(rating2))) else 0 \
              for rating1, rating2 in zip(person1, person2)]
    
    return np.sum(scores)

In [7]:
#test
sim_distance_manhattan(critics.loc["Lisa Rose"], critics.loc["Gene Seymour"])

4.5

In [8]:
def sim_distance_euclidian(person1, person2):
    """args:
           person1, person2: pandas series with movie ratings of each person
        
       returns:
           score: float which is a Euclidian distance between 2 persons"""    

    scores = [np.square(rating1 - rating2) if (not(np.isnan(rating1)) and not(np.isnan(rating2))) else 0 \
              for rating1, rating2 in zip(person1, person2)]
    # print(scores)
    #remove the nan values
    return np.sqrt(np.sum(scores))

In [9]:
#test
sim_distance_euclidian(critics.loc["Lisa Rose"], critics.loc["Gene Seymour"])
sim_distance_euclidian(critics.loc["Lisa Rose"], critics.loc["Toby"])

1.8708286933869707

In [10]:
def pearson(person1, person2):
    """
    person1 and person2 are series
    """
    df = pd.concat([person1, person2], axis=1)
    df = df.dropna()
    correlation = df.corr(method='pearson')
    #print("correlation: ", correlation)
    return correlation.values[0][1]

In [11]:
#test
c = pearson(critics.loc["Toby"], critics.loc["Gene Seymour"])
print(c)

0.3812464258315117


In [12]:
def cosine(person1, person2):
    """
    person1 and person2 are series
    """
    df = pd.concat([person1, person2], axis=1)
    df = df.dropna()
    x = df[df.columns[0]].values
    y = df[df.columns[1]].values
    correlation = np.dot(x, y)/(np.linalg.norm(x)*np.linalg.norm(y))
    # print("correlation: ", correlation)
    return correlation

In [13]:
c = cosine(critics.loc["Toby"], critics.loc["Gene Seymour"])

In [14]:
def compute_nearest_neighbour(new_critic, critics, distance_measure = "manhattan"):
    """
    returning a sorted list of critics close
    to nouveaucritic.
    """
    distances=[]
    for critic, row in critics.iterrows():
        if critic!=new_critic:
            # print(critic, ",", new_critic, ": ", end="")
            if distance_measure == "manhattan":
                distance=sim_distance_manhattan(critics.loc[critic], critics.loc[new_critic])
            elif distance_measure == "euclidian":
                distance=sim_distance_euclidian(critics.loc[critic], critics.loc[new_critic])
            elif distance_measure == "pearson":
                distance=pearson(critics.loc[critic], critics.loc[new_critic])
            elif distance_measure == "cosine":
                distance=cosine(critics.loc[critic], critics.loc[new_critic])
            else:
                print("Incorrect distance metric provided")
                return
            # print(distance)
            if not(np.isnan(distance)):
                distances.append((distance,critic))
    return sorted(distances, key = lambda x: x[0])

In [15]:
#test
compute_nearest_neighbour("Lisa Rose",critics)

[(1.5, 'Michael Phillips'),
 (2.0, 'Claudia Puig'),
 (2.5, 'Anne'),
 (3.0, 'Mick Lasalle'),
 (3.0, 'Toby'),
 (3.5, 'Jack Matthews'),
 (4.5, 'Gene Seymour')]

In [16]:
#test
compute_nearest_neighbour("Toby", critics)

[(1.0, 'Anne'),
 (2.0, 'Michael Phillips'),
 (2.5, 'Claudia Puig'),
 (2.5, 'Mick Lasalle'),
 (3.0, 'Lisa Rose'),
 (4.0, 'Jack Matthews'),
 (4.5, 'Gene Seymour')]

In [17]:
def recommend_nearest_neighbour(new_critic, critics):
    distances = compute_nearest_neighbour(new_critic, critics)
    nearest_neighbour = distances[0]
    
    new_critic = critics.loc[new_critic].to_dict()
    nn = critics.loc[nearest_neighbour[1]].to_dict()
    
    recommendations = []
    for movie in new_critic:
        if np.isnan(new_critic[movie]) and not(np.isnan(nn[movie])):
            recommendations.append((movie, nn[movie]))
    
    return sorted(recommendations, key = lambda x: x[1], reverse=True)

In [18]:
#test
recommend_nearest_neighbour("Toby", critics)

[('Luck', 4.0), ('Lady', 1.5)]

In [19]:
def best_recommend(new_critic, critics, similarity_measure="manhattan", weight_measure="normal"):
    """for each movie, find the best score"""
    if similarity_measure == "manhattan":
        distances = compute_nearest_neighbour(new_critic,critics, "manhattan")
    elif similarity_measure == "euclidian":
        distances = compute_nearest_neighbour(new_critic,critics, "euclidian")
    elif similarity_measure == "pearson":
        distances = compute_nearest_neighbour(new_critic,critics, "pearson")
    elif similarity_measure == "cosine":
        distances = compute_nearest_neighbour(new_critic,critics, "cosine")
    else:
        print("Wrong Similarity measure provided")
        return None
    # print(distances)
    distances = {d[1]: d[0] for d in distances}
    movies = critics.loc[new_critic]
    movies_not_watched = movies[np.isnan(movies)].index
    # print(movies_not_watched)
    s_ = dict()
    for movie in movies_not_watched:
        total = 0 
        s = 0
        for critic in critics.index:
            movie_rating = critics.loc[critic].to_dict()[movie]
            if ~np.isnan(movie_rating) and (critic in distances): 
                if weight_measure == "normal":
                    weight = 1/(1+distances[critic])
                elif weight_measure == "exponential":
                    weight = math.exp(0-distances[critic])
                else:
                    print("wrong weight measure provided")
                    return None
                total += weight * movie_rating
                s += weight
        if s != 0:
            s_[movie] = total/s
        else:
            s_[movie] = 0
        
    print(s_)
    return sorted(s_, key=s_.get, reverse=True)[0]

In [20]:
best_recommend("Anne", critics)

{'Snakes': 3.7102754377162173, 'Superman': 3.908627564898196, 'Night': 3.6071368399770862}


'Superman'

In [21]:
# test
best_recommend("Anne", critics, weight_measure="exponential")

{'Snakes': 3.699042610620127, 'Superman': 3.821068742837592, 'Night': 3.927900678942954}


'Night'

In [22]:
best_recommend("Anne", critics, similarity_measure="pearson")

{'Snakes': 3.611325016525231, 'Superman': 4.537860505668406, 'Night': 3.0491769833757996}


'Superman'

In [23]:
best_recommend("Anne", critics, similarity_measure="cosine")

{'Snakes': 3.7124560447528605, 'Superman': 4.0053503123050636, 'Night': 3.402857861715499}


'Superman'

In [24]:
# read the music data
music_data = \
[["name","Angelica","Bill","Chan","Dan","Hailey","Jordyn","Sam","Veronica"],
["Blues Traveler",3.5,2,5,3,None,None,5,3],
["Broken Bells",2,3.5,1,4,4,4.5,2,None],
["Deadmau5",None,4,1,4.5,1,4,1,4],
["Norah Jones",4.5,None,3,None,4,5,3,5],
["Phoenix",5,2,5,3,None,5,5,4],
["Slightly Stoopid",1.5,3.5,1,4.5,None,4.5,4,2.5],
["The Strokes",2.5,None,None,4,4,4,5,3],
["Vampire Weekend",2,3,None,2,1,4,None,None]]
music_data = pd.DataFrame(np.transpose(music_data))
music_data = music_data
music_data.columns = music_data.iloc[0]
music_data = music_data.drop(0)
music_data.index = music_data.name.values
music_data = music_data.drop("name", axis=1)
music_data = music_data.astype(float)

In [25]:
music_data

Unnamed: 0,Blues Traveler,Broken Bells,Deadmau5,Norah Jones,Phoenix,Slightly Stoopid,The Strokes,Vampire Weekend
Angelica,3.5,2.0,,4.5,5.0,1.5,2.5,2.0
Bill,2.0,3.5,4.0,,2.0,3.5,,3.0
Chan,5.0,1.0,1.0,3.0,5.0,1.0,,
Dan,3.0,4.0,4.5,,3.0,4.5,4.0,2.0
Hailey,,4.0,1.0,4.0,,,4.0,1.0
Jordyn,,4.5,4.0,5.0,5.0,4.5,4.0,4.0
Sam,5.0,2.0,1.0,3.0,5.0,4.0,5.0,
Veronica,3.0,,4.0,5.0,4.0,2.5,3.0,


In [26]:
weight_measures = ["normal", "exponential"]
similarity_measures = ["manhattan", "euclidian", "pearson", "cosine"]
result = []
for w in weight_measures:
    S = []
    for s in similarity_measures:
        r = best_recommend("Veronica", music_data, similarity_measure=s, weight_measure=w)
        S.append(r)
    result.append(S)

result = pd.DataFrame(np.transpose(result), columns = weight_measures, index = similarity_measures)
print("result for veronica")
result

{'Broken Bells': 3.223210013347772, 'Vampire Weekend': 2.446409989594173}
{'Broken Bells': 3.103382168159156, 'Vampire Weekend': 2.4330477201609337}
{'Broken Bells': 2.9451583465622115, 'Vampire Weekend': 2.298630459863528}
{'Broken Bells': 2.9823516870991966, 'Vampire Weekend': 2.388165211110368}
{'Broken Bells': 3.284578069706963, 'Vampire Weekend': 2.569337175881855}
{'Broken Bells': 3.1422172416039498, 'Vampire Weekend': 2.4625959140684346}
{'Broken Bells': 2.9810900114221477, 'Vampire Weekend': 2.2841671911498596}
{'Broken Bells': 2.9661074216996925, 'Vampire Weekend': 2.3769631515605716}
result for veronica


Unnamed: 0,normal,exponential
manhattan,Broken Bells,Broken Bells
euclidian,Broken Bells,Broken Bells
pearson,Broken Bells,Broken Bells
cosine,Broken Bells,Broken Bells


In [27]:
result = []
for w in weight_measures:
    S = []
    for s in similarity_measures:
        r = best_recommend("Hailey", music_data, similarity_measure=s, weight_measure=w)
        S.append(r)
    result.append(S)

result = pd.DataFrame(np.transpose(result), columns = weight_measures, index = similarity_measures)
print("result for Haily")
result

{'Blues Traveler': 3.682402965547318, 'Phoenix': 4.16416306533244, 'Slightly Stoopid': 2.993922026299369}
{'Blues Traveler': 3.670076156996576, 'Phoenix': 4.204565095313143, 'Slightly Stoopid': 3.001090442232799}
{'Blues Traveler': 3.4000090475599793, 'Phoenix': 3.974747804267113, 'Slightly Stoopid': 3.0402550107697053}
{'Blues Traveler': 3.565280143893448, 'Phoenix': 4.122112999046882, 'Slightly Stoopid': 3.0741290215668666}
{'Blues Traveler': 4.110895740546039, 'Phoenix': 4.374146026272414, 'Slightly Stoopid': 2.813911848333065}
{'Blues Traveler': 3.933744548659873, 'Phoenix': 4.4312079088574805, 'Slightly Stoopid': 2.856389140787804}
{'Blues Traveler': 3.349053895129917, 'Phoenix': 3.9266026757524672, 'Slightly Stoopid': 3.026986566088232}
{'Blues Traveler': 3.5492979197390055, 'Phoenix': 4.103699976191357, 'Slightly Stoopid': 3.076516506322003}
result for Haily


Unnamed: 0,normal,exponential
manhattan,Phoenix,Phoenix
euclidian,Phoenix,Phoenix
pearson,Phoenix,Phoenix
cosine,Phoenix,Phoenix


In [28]:
# my own example
ratings = pd.DataFrame([[1,2,3,4,5],
[6,5,4,3,2],
[3,4,5,6,7],
[8,7,6,5,4],
[5,None,7,None,9]])
ratings.index = ["0", "1", "2", "3", "4"]
ratings

Unnamed: 0,0,1,2,3,4
0,1,2.0,3,4.0,5
1,6,5.0,4,3.0,2
2,3,4.0,5,6.0,7
3,8,7.0,6,5.0,4
4,5,,7,,9


In [29]:
# what to recommend to #4?
result = []
for w in weight_measures:
    S = []
    for s in similarity_measures:
        r = best_recommend("4", ratings, similarity_measure=s, weight_measure=w)
        S.append(r)
    result.append(S)

result = pd.DataFrame(np.transpose(result), columns = weight_measures, index = similarity_measures)
print("result for 4")
result #different for manhattan, euclidian and pearson, cosine

{1: 4.569286687869151, 3: 4.750113584734211}
{1: 4.4864539850160865, 3: 4.782748197718374}
{1: nan, 3: nan}
{1: 4.539059758462734, 3: 4.4697361511835325}
{1: 4.142720594941717, 3: 5.929217991459727}
{1: 4.185934770920111, 3: 5.829572967372933}
{1: 5.642391233933647, 3: 4.119202922022118}
{1: 4.5746545975919215, 3: 4.4422289130689006}
result for 4




Unnamed: 0,normal,exponential
manhattan,3,3
euclidian,3,3
pearson,1,1
cosine,1,1


In [40]:
# Another example: real dataset
ratings = pd.read_csv("test_data/ratings.csv")
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [42]:
#transform the data

distinct_movies = np.unique(ratings["movieId"])
distinct_users = np.unique(ratings["userId"])

#take 100 movies and 100 users from dataset and make a matrix of dimension distinct_users*distinct_movies
matrix = np.empty([100, 100])
matrix[:,:] = np.nan
for i in range(100):
    for j in range(100):
        value = ratings.query("userId == %d and movieId == %d"%(i, j))['rating'].values
        if len(value) != 0:
            matrix[i][j] = value[0]
print(matrix) #sparse matrix

[[nan nan nan ... nan nan nan]
 [nan 4.  nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan 4.5 nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]


In [43]:
df = pd.DataFrame(matrix)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,,,,,,,,,,,...,,,,,,,,,,
1,,4.0,,4.0,,,4.0,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,,,,,,,,3.0,,,...,,,,,,,,,,
96,,5.0,,,,,,,,,...,,,,,,,,,,
97,,,,,,,,,,,...,,,,,,,,,,
98,,4.5,,,,,,,,,...,,,,,,,,,,


In [44]:
# what to recommend to #99?
result = []
for w in weight_measures:
    S = []
    for s in similarity_measures:
        r = best_recommend(99, df, similarity_measure=s, weight_measure=w)
        S.append(r)
    result.append(S)

result = pd.DataFrame(np.transpose(result), columns = weight_measures, index = similarity_measures)
print("result for 99")
result #different for manhattan, euclidian and pearson, cosine

{0: 0, 1: 3.720744680851064, 2: 3.669491525423728, 3: 3.4680851063829783, 4: 3.0, 5: 3.4545454545454537, 6: 3.952797202797203, 7: 3.273972602739726, 8: 2.4545454545454546, 9: 0, 11: 3.603448275862068, 12: 1.0, 13: 3.6000000000000005, 14: 4.666666666666667, 15: 3.6000000000000005, 16: 4.257201646090535, 17: 3.407407407407407, 18: 3.5000000000000004, 19: 2.633928571428571, 20: 3.0, 21: 3.713298791018998, 24: 2.7142857142857144, 25: 3.5862068965517233, 26: 3.777777777777778, 27: 3.0, 28: 5.0, 29: 4.5, 30: 0, 31: 2.846846846846847, 32: 3.796305797845049, 33: 0, 34: 3.8850574712643686, 35: 0, 36: 4.121212121212121, 37: 0, 38: 2.5, 39: 3.394117647058824, 40: 0, 41: 3.7857142857142856, 42: 0, 43: 2.5, 44: 2.2499999999999996, 45: 3.333333333333333, 46: 3.727272727272727, 47: 3.8544758257539486, 48: 3.1475409836065578, 49: 0, 50: 4.340979782270606, 51: 0, 52: 3.75, 53: 5.0, 54: 3.0, 55: 4.0, 56: 0, 57: 4.0, 58: 4.0, 59: 0, 60: 3.7804878048780486, 61: 4.0, 62: 3.9135021097046416, 63: 0, 64: 2.0,

  if __name__ == '__main__':


{0: 0, 1: 4.041911579099224, 2: 3.5563026922784537, 3: 3.56576691558674, 4: 3.0000000000000004, 5: 3.670703851745643, 6: 4.055462163465193, 7: 3.5015188862377, 8: 3.994010828102057, 9: 0, 11: 3.9445567017753214, 12: 1.0, 13: 3.0000000000000004, 14: 4.0, 15: 3.3366841826447953, 16: 4.083123377603858, 17: 3.3021152878727684, 18: 2.0, 19: 2.3494703778151043, 20: 0, 21: 3.442016250095025, 24: 4.0, 25: 3.1662467552077165, 26: 3.5037602554383853, 27: 3.0, 28: 0, 29: 4.50076570397797, 30: 0, 31: 3.199395632036352, 32: 4.0416141115287845, 33: 0, 34: 3.7090308085097052, 35: 0, 36: 4.168766223961417, 37: 0, 38: 2.5, 39: 3.1666666666666665, 40: 0, 41: 4.331657908677602, 42: 0, 43: 4.0, 44: 2.0, 45: 3.4974868630164035, 46: 3.5037602554383853, 47: 3.843927751742736, 48: 3.167603483516704, 49: 0, 50: 4.079444253130019, 51: 0, 52: 4.0, 53: 0, 54: 3.0075205108767697, 55: 4.0, 56: 0, 57: 4.00153140795594, 58: 2.0, 59: 0, 60: 4.332337165557978, 61: 4.0, 62: 3.6675064895845666, 63: 0, 64: 2.0, 65: 1.8792

Unnamed: 0,normal,exponential
manhattan,28,28
euclidian,28,28
pearson,0,0
cosine,29,29


In [37]:
import random
#create a matrix of random ratings
ratings = np.random.rand(100,100)*5

#randomly select 10% of values in matrix to be NaN
for i in range(5000):
    row = random.randint(0,99)
    col = random.randint(0,99)
    ratings[row][col] = np.nan

ratings = pd.DataFrame(ratings)
    
ratings

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,3.523701,2.631387,0.461101,,,0.658341,0.116405,2.554733,,,...,0.833275,0.145140,2.319909,1.696017,,,3.587738,,,2.693525
1,0.939496,,,,,4.234869,,,,,...,3.485750,,2.861366,0.376020,1.394986,0.132390,4.642001,,2.041633,
2,4.649328,1.664935,2.859372,,,0.603967,,,2.118009,1.281084,...,,,,1.277176,0.930657,0.415010,,4.446615,3.150361,
3,,,1.244475,4.135594,0.865048,,,0.466837,3.423466,2.392278,...,2.946342,4.714563,,,2.938244,,2.791323,,,3.441322
4,1.301117,3.710906,4.478866,3.282938,,3.959652,,,0.648873,3.983268,...,,1.465753,1.820228,1.317111,,,0.308376,0.427613,0.483159,4.834011
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,,4.882421,2.825116,,2.047683,,0.945927,0.511328,0.469232,,...,2.854411,,,3.543414,,4.553102,1.824837,0.890054,1.732055,3.381668
96,4.924585,3.011199,,,,3.630291,3.895414,,1.526792,,...,0.915008,1.459340,,,,,2.347070,3.545407,4.176842,2.597650
97,,1.184541,,4.491366,,,,3.395970,4.116437,,...,0.999128,,3.291110,,3.957297,,,,1.067353,1.292886
98,4.512956,4.239892,1.497453,,4.814332,0.587722,3.297131,0.397752,,,...,4.211055,,,,0.865185,,0.553325,,0.430557,0.615293


In [38]:
# what to recommend to #9?
result = []
for w in weight_measures:
    S = []
    for s in similarity_measures:
        r = best_recommend(9, ratings, similarity_measure=s, weight_measure=w)
        S.append(r)
    result.append(S)

result = pd.DataFrame(np.transpose(result), columns = weight_measures, index = similarity_measures)
print("result for 8")
result #different for manhattan, euclidian and pearson, cosine

{0: 2.4870873096711352, 1: 2.589259821232642, 5: 2.4805947042991927, 10: 2.88011312391394, 12: 2.56487463313779, 16: 2.596299761777945, 19: 2.367074598628346, 20: 2.9286156273564776, 21: 2.3531111541711405, 28: 2.4040536877936014, 35: 2.317275508702669, 37: 2.577800913465379, 38: 2.461729973038076, 41: 2.41044147865136, 42: 2.4916558764685237, 43: 2.6255161234412667, 44: 2.357528524799496, 49: 2.528830774765755, 50: 2.3667596219018416, 52: 2.923371901525852, 53: 2.008407269346894, 60: 2.8039844910330647, 62: 2.3313472824119557, 65: 2.5661591738842393, 66: 2.41391059670638, 69: 2.256472830814755, 70: 2.324605840713027, 73: 2.4647334406100083, 76: 2.515434095788827, 80: 2.0960792309844445, 82: 2.546512869567525, 86: 2.290323563452628, 91: 2.513228518077591, 92: 2.649362313329904, 93: 2.620446719609898, 94: 2.467806474089379, 95: 2.301473544990077, 98: 2.4286986619893463}
{0: 2.462818078223419, 1: 2.5965833295700653, 5: 2.49604278638546, 10: 2.898407551508018, 12: 2.5666774022253502, 16: 

Unnamed: 0,normal,exponential
manhattan,20,93
euclidian,52,20
pearson,10,10
cosine,52,10
