In [19]:
import setup_django
setup_django.init()

In [20]:
import numpy as np
import pandas as pd
from django.contrib.auth import get_user_model
from django.db.models import F, Count, Sum, IntegerField
from movies.models import Movie
from ratings.models import Rating
import random

User = get_user_model()

In [21]:
rating_obj = Rating.objects.filter(user__username='cfe', value__gte=5).first()
rating_obj.content_object

<Movie: The Matrix (1999)>

In [4]:
movie_id = rating_obj.content_object.id
value = rating_obj.value
user_id = rating_obj.user_id

In [5]:
print(f"Movie {movie_id} was rated {value} by {user_id}")

Movie 2571 was rated 5 by 1


In [6]:
other_user_ids = Rating.objects.filter(
        object_id=rating_obj.object_id, 
        content_type=rating_obj.content_type, 
        value__gte=rating_obj.value
).exclude(user=rating_obj.user).values_list('user_id', flat=True)


highly_rated = Rating.objects.filter(user_id__in=other_user_ids, value__gte=4)

In [7]:
rec_users = []
rec_movies = []
for rating_obj in highly_rated:
    if rating_obj.user not in rec_users:
        rec_users.append(rating_obj.user)
    object_id = rating_obj.object_id
    if object_id not in rec_movies:
        rec_movies.append(object_id)

In [8]:
print(len(rec_users), len(set(rec_movies)))

142 3761


In [9]:
import numpy as np

In [10]:
# Write a list of 2 movies attributes
movie_1 = ['sci-fi', 'comedy', 'Stars latest action movie star']
movie_2 = ['historical fiction', 'drama', 'Stars latest action movie star']

# Write a list of a User's preferences
user_a = ['comedy', 'sci-fi', 'Stars latest action movie star']
print(f"User has {len(user_a)} primary preferences")

# Total number of potential features
num_unique_features = len(set(movie_1 + movie_2))
print(f"These movies have {num_unique_features} unique features.")

User has 3 primary preferences
These movies have 5 unique features.


Convert the user preferences list into a numerical score between -1 and 1

In [11]:
my_preferences = np.array([.98, .85, .75])

Convert movie attributes into a numerical score between -1 and 1 based on the following 3 attributes:

- Sci-Fi
- Comedy
- Stars latest action hero

In [12]:
movie_attribute_scores = np.array([.96, .99, .92]) 

movie_2_attribute_scores = np.array([-.93, -.45, .5])


Predict User's likelihood of enjoying either movie using Matrix multiplication.

In [13]:
(my_preferences * movie_attribute_scores).sum() 

2.4723

In [14]:
(my_preferences * movie_2_attribute_scores).sum()

-0.9189

In [15]:
max_score = (np.array([1,1,1]) * np.array([1,1,1])).sum()
min_score = (np.array([1,1,1]) * np.array([-1,-1,-1])).sum()
print(max_score, min_score)

3 -3


This notebook was inspired from Chapter 8 In [Deep Learning for Coders](https://amzn.to/3Bs48Sz) by Jeremy Howard & Sylvain Gugger

In [22]:
ids = Movie.objects.all().popular()[:15]
most_active_users = Rating.objects.all().annotate(
    user_count=Count("user")
).order_by('-user_count').values_list('user_id')[:5_000]

ratings_qs =  Rating.objects.filter(
    object_id__in=ids, 
    user_id__in=most_active_users, 
    active=True, value__gte=0
).annotate(movie=F("object_id")).values('user', 'value', 'movie')

df = pd.DataFrame(ratings_qs)

In [23]:
df.pivot_table(index='user', columns=['movie'], values='value', fill_value='')[:15]

movie,1,110,260,296,318,356,527,589,858,1196,1198,1210,1270,2571,2858
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,5.0,,,4.0,1.0,5.0,,5.0,,5.0,4.0,5.0,5.0,5.0,
2,,4.0,,4.0,,3.0,4.0,5.0,,,,,,,
3,,4.0,,5.0,5.0,5.0,3.0,,,,,3.0,,,4.0
4,,,5.0,5.0,,5.0,,5.0,5.0,5.0,5.0,5.0,5.0,,
5,,,,,,4.0,,,3.0,,,,,,
6,,,,,,,,,,,,,,1.0,
7,3.0,5.0,5.0,,5.0,3.0,,3.0,,5.0,5.0,5.0,3.0,,
8,,4.0,4.0,4.0,5.0,4.0,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,5.0
9,4.0,,,,4.0,,5.0,,,,,,,5.0,
10,,,,,4.0,,,,,4.0,4.0,4.0,,5.0,


In [18]:
movie_1 = np.array([1, 1, 1, 1, 1])
user_1 = np.array([1, 1, 1, 1, 1])
(user_1 * movie_1).sum()

5

```python
movie_1_features = np.array(["x", "y", "z", "xx", "yy"])
user_1_preferences = np.array(["x", "y", "z", "xx", "yy"])
assert (user_1_preferences * movie_1_features).sum() == 5
```

These are called latent features. They are hidden and it's what the machine learning model will try and find for us.