In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")

In [2]:
data = pd.DataFrame([["A", "Mary", 4.0],
                     ["A", "Sam", 3.5],
                     ["B", "Matt", 3.0],
                     ["B", "Mary", 4.0],
                     ["B", "Jack", 2.0],
                     ["C", "Sam", 3.7],
                     ["C", "Jim", 3.0]], columns=["movie", "person", "score"])

In [18]:
data.groupby(["person"]).get_group("Sam")

Unnamed: 0,movie,person,score
1,A,Sam,3.5
5,C,Sam,3.7


In [19]:
movies = data["movie"].unique().tolist()
people = data["person"].unique().tolist()

In [115]:
matrix = pd.DataFrame(np.zeros((len(people), len(movies))), columns=movies, index=people)

In [116]:
matrix

Unnamed: 0,A,B,C
Mary,0.0,0.0,0.0
Sam,0.0,0.0,0.0
Matt,0.0,0.0,0.0
Jack,0.0,0.0,0.0
Jim,0.0,0.0,0.0


In [117]:
for movie in movies:
    for person in people:
        score = data[(data.movie == movie) & (data.person == person)]["score"].tolist()
        if score:
            matrix[movie][person] = score[0]
        else:
            matrix[movie][person] = 0

In [118]:
matrix

Unnamed: 0,A,B,C
Mary,4.0,4.0,0.0
Sam,3.5,0.0,3.7
Matt,0.0,3.0,0.0
Jack,0.0,2.0,0.0
Jim,0.0,0.0,3.0


In [119]:
def similarity(p1, p2, m):
    #print(p1, p2)
    arr = m.T[[p1,p2]].T.values
    residual = arr[0, :] - arr[1, :]
    return 1/(1 + np.sqrt((residual * residual).sum()))    

In [121]:
from itertools import combinations

for p1, p2 in combinations(matrix.index, 2):
    print(p1, p2, similarity(p1, p2, matrix))
#     similarity("Mary", "Sam", matrix)

Mary Sam 0.15451781645203114
Mary Matt 0.1951941016011038
Mary Jack 0.1827439976315568
Mary Jim 0.13507810593582123
Sam Matt 0.14469672016654864
Sam Jack 0.15451781645203114
Sam Jim 0.21885124868100078
Matt Jack 0.5
Matt Jim 0.1907435698305462
Jack Jim 0.21712927295533244


In [138]:
def recommand(data, recommand_person, similar=similarity):
    arr = data.assign(similar=lambda x: [
        similarity(recommand_person, name, data)
        if name != recommand_person else 0 for name in x.index
    ]).values

    val, simi = arr[:, :-1], arr[:, -1].reshape(-1, 1)
    return sorted(
        zip(((val * simi).sum(axis=0) / simi.sum()).tolist(), data.columns),
        key=lambda x: x[0],
        reverse=True)


recommand(matrix, "Matt")

[(1.7278449282514003, 'B'),
 (1.248953981626073, 'A'),
 (1.0746862157300092, 'C')]

In [147]:
sorted(
    zip([
        similarity("A", movie, matrix.T)
        for movie in matrix.columns if 'A' != movie
    ], filter(lambda x: x != "A", matrix.columns)),
    key=lambda x: x[0],
    reverse=True)

[(0.1665556739300633, 'C'), (0.16597681693032765, 'B')]