# Imports & Datas



In [1]:
from typing import Tuple, Callable, List, Optional, Dict

import numpy as np
from numpy.core.multiarray import ndarray

critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
                         'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
                         'The Night Listener': 3.0},
           'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
                            'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
                            'You, Me and Dupree': 3.5},
           'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
                                'Superman Returns': 3.5, 'The Night Listener': 4.0},
           'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
                            'The Night Listener': 4.5, 'Superman Returns': 4.0,
                            'You, Me and Dupree': 2.5},
           'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
                            'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
                            'You, Me and Dupree': 2.0},
           'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
                             'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
           'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}}


#Similarity

## Data view


```python
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0}}
```



## Formulas
### Euclidean distance: 

$\sqrt(\sum_{i=1}^{n} (x_i - y_i)^2)$

### Pearson corellation score:
$\frac{\sum_{i=1}^{n} x_i * y_i - \frac{\sum_{i=1}^{n} x_i * \sum_{i=1}^{n} y_i}{n}}{\sqrt((\sum_{i=1}^{n} x_i^2 - \frac{(\sum_{i=1}^{n} x_i)^2 }{n}) * (\sum_{i=1}^{n} y_i^2 - \frac{(\sum_{i=1}^{n} y_i)^2 }{n}))}$  


In [2]:

def intersection_of_users_rating(user1: dict, user2: dict) -> Tuple[ndarray, ndarray]:
    """
    Find items which both users rated and return rating for these items.
    :param user1: Dictionary of user ratings.
    :param user2: Dictionary of compared user ratings.
    :return: Return numpy list of items and their rating for both users.
    """
    intersection = user1.keys() & user2.keys()
    user1_ratings = np.array([user1[item] for item in intersection])
    user2_ratings = np.array([user2[item] for item in intersection])
    return user1_ratings, user2_ratings


def sim_distance(user1: dict, user2: dict) -> float:
    """
    Calculate similarity for two users with using Euclidean distance.
    :param user1: Dictionary of user ratings.
    :param user2: Dictionary of compared user ratings.
    :return: Euclidean distance, number between  0 and 1. 
    Return zero if users have nothing common.
    """
    user1_ratings, user2_ratings = intersection_of_users_rating(user1, user2)

    if not len(user1_ratings) or not len(user2_ratings):
        return 0

    euclidean_distance = np.linalg.norm(user1_ratings - user2_ratings)

    return 1 / (1 + euclidean_distance)


def sim_pearson(user1: dict, user2: dict) -> float:
    """
    Calculate similarity for two users with using Pearson correlation.
    :param user1: User ratings.
    :param user2: Compared user ratings.
    :return: Pearson correlation score, number between  -1 and 1. 
    Return zero if users have nothing common.
    """
    user1_ratings, user2_ratings = intersection_of_users_rating(user1, user2)

    if not len(user1_ratings) or not len(user2_ratings):
        return 0

    return np.corrcoef(user1_ratings, user2_ratings)[0][1]


## Recommendations


In [7]:
def top_matches(ratings: dict, user: str, n: Optional[int] = None,
                similarity: Callable[[dict, dict], float] = sim_pearson) -> List[Tuple[float, str]]:
    """
    Find top matches for selected user.
    :param ratings: Users rating.
    :param user: Name of user.
    :param n: Number of top similar items.
    :param similarity: Function for calculating similarity between two users.
    :return: 
    """
    scores = [(similarity(ratings[user], ratings[other]), other)
              for other in ratings if other != user]
    scores.sort()
    scores.reverse()
    result = scores[0:n] if n else scores
    return result


def get_recommendations(ratings: Dict[str, dict], user: str, n: Optional[int] = None,
                        similarity: Callable[[dict, dict], float] = sim_pearson) -> List[Tuple[float, str]]:
    total_rating_for_item = {}
    sum_similarities = {}
    users_similarity = top_matches(ratings, user, n, similarity)

    for similarity, compared_user in users_similarity:
        if similarity <= 0:
            continue
        for item, rating in ratings[compared_user]:
            if not ratings[user].get(item, None):
                total_rating_for_item.setdefault(item, 0)
                total_rating_for_item[item] += rating * similarity
                # Sum of similarities
                sum_similarities.setdefault(item, 0)
                sum_similarities[item] += similarity

    # Create the normalized list
    rankings = [(total / sum_similarities[item], item) for item, total in total_rating_for_item.items()]

    # Return the sorted list
    rankings.sort()
    rankings.reverse()
    return rankings


def transform_ranting(ranting: Dict[str, dict]):
    transformed = {}
    for person in ranting:
        for item in ranting[person]:
            transformed.setdefault(item, {})
            transformed[item][person] = ranting[person][item]
    return transformed


In [10]:
movies = transform_ranting(critics)
top_matches(movies, 'Superman Returns')


KeyError: 'Supermans Returns'