In [32]:
import pandas as pd


def string_to_set(string):
    res = set(string.replace('{', '').replace('}', '').replace('\\', '').replace("'", '').replace('"', '').strip().split(','))
    clear_res = set()
    for elem in res:
        clear_res.add(elem.strip())
    return clear_res


class BooksToFilmsMetrics:
    
    def __init__(self, books_path: str, films_path: str) -> None:
        self._books = pd.read_csv(books_path, index_col=0)
        self._books['lemmas_inter'] = self._books['lemmas_inter'].apply(lambda x: string_to_set(x))
                                                                        
        self._films = pd.read_csv(films_path, index_col=0)
        self._films['lemmas_inter'] = self._films['lemmas_inter'].apply(lambda x: string_to_set(x))

    def accuracy(self, book_id: int, reccomended_films_ids: list) -> list:
        item = (self._books.at[book_id, 'lemmas_inter'])
        recs = []
        for _id in reccomended_films_ids:
            geners = self._films[self._films['id'] == _id]['lemmas_inter'].item()
            metric = len(geners.intersection(item)) / len(item)
            recs.append(metric)
        
        return recs
    
    def weighted_accuracy(self, book_id: int, reccomended_films_ids: list, book_weight: float = 1, film_weight: float = 2.5) -> list:
        item = (self._books.at[book_id, 'lemmas_inter'])
        recs = []
        for _id in reccomended_films_ids:
            geners = self._films[self._films['id'] == _id]['lemmas_inter'].item()
            metric = len(geners.intersection(item)) / (book_weight*len(item) + film_weight*len(geners))
            recs.append(metric)
        
        return recs
    
    def absolute_accuracy(self, book_id: int, reccomended_films_ids: list, l1: float = 1.0) -> list:
        item = (self._books.at[book_id, 'lemmas_inter'])
        recs = []
        for _id in reccomended_films_ids:
            geners = self._films[self._films['id'] == _id]['lemmas_inter'].item()
            inter_len = len(geners.intersection(item))
            item_len = len(item)
            metric = (inter_len - l1*item_len)/ (item_len)
            recs.append(metric)
        
        return recs
    
    def cross_accuracy(self, book_id: int, reccomended_films_ids: list) -> float:
        item = (self._books.at[book_id, 'lemmas_inter'])
        lemmas = {}
        
        recs = []

        for i, id_i in enumerate(reccomended_films_ids):
            recs.append([])
            genres_i = self._films[self._films['id'] == id_i]['lemmas_inter'].item()
            for j, id_j in enumerate(reccomended_films_ids):
                genres_j = self._films[self._films['id'] == id_j]['lemmas_inter'].item()
                recs[i].append(len(genres_i.intersection(genres_j))/ len(genres_i))
        
        acc = 1
        for i in range(len(recs)):
            for j in range(len(recs[0])):
                if j < i:
                    acc *= recs[i][j]
        return acc

In [33]:
metrics = BooksToFilmsMetrics(books_path='books_with_lemmas.csv', films_path='films_with_lemmas.csv')

In [34]:
metrics.cross_accuracy(7847, [12168, 4190, 21165])

0.5289256198347108

In [35]:
metrics.accuracy(7847, [12168, 4190, 21165])

[0.7777777777777778, 0.7777777777777778, 0.8888888888888888]

In [36]:
metrics.weighted_accuracy(7847, [12168, 4190, 21165])

[0.2413793103448276, 0.2413793103448276, 0.2191780821917808]

In [37]:
metrics.absolute_accuracy(7847, [12168, 4190, 21165])

[-0.2222222222222222, -0.2222222222222222, -0.1111111111111111]