A função "get_content_based_predictions()" pode ser criada de várias maneiras, dependendo das informações que você tem e do problema específico que está tentando resolver. Aqui está um exemplo geral de como ela poderia ser implementada:

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
#Importando Dataset
ratings = pd.read_csv("/content/drive/MyDrive/recomendacao/ratings_small.csv")

In [None]:
def Convert(string):
      li = list(string.split("|"))
      return li 

In [None]:
#Pegando as features dos filmes
filmes_avaliados = ratings["movieId"].tolist()

movies = pd.read_csv("/content/drive/MyDrive/recomendacao/movies_small.csv")

In [None]:
movies = movies.query('movieId in @filmes_avaliados')
movies['genres'] = movies['genres'].apply(Convert)
#Removendo o ano da coluna título
movies['title'] = movies.title.str.replace('(\(\d\d\d\d\))', '')
#Assegurando que removemos todos os espaços em branco da palavra
movies['title'] = movies['title'].apply(lambda x: x.strip())
#Mudando um dos generos
movies['genres'] = movies['genres'].replace(['(no genres listed)'], 'Others')
movies = movies.explode('genres')

In [None]:
movies.set_index("movieId", inplace=True, drop=True)

In [None]:
# Iremos criar vetores one-hot encoding que irão vetores para cada genero
movies_emb = pd.get_dummies(movies, columns = ['genres'])
movies_emb.columns = movies_emb.columns.str.replace("genres_", "")
movies_emb = movies_emb.groupby(['movieId', 'title'], as_index=False).sum()

In [None]:
movies_emb.head()

Unnamed: 0,movieId,title,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story,0,0,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji,0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
movie_genres = list(movies.genres.unique())

In [None]:
df3 = movies_emb.merge(ratings, on='movieId')
df3 = df3[["userId","movieId",'title',"rating"] + movie_genres]
df4 = df3[movie_genres].multiply(df3["rating"], axis="index")
df5 = df3[["userId","movieId",'title',"rating"]]
df_final = pd.concat([df5, df4], axis=1)

In [None]:
df_final.head()

Unnamed: 0,userId,movieId,title,rating,Adventure,Animation,Children,Comedy,Fantasy,Romance,...,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1,1,Toy Story,4.0,4.0,4.0,4.0,4.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,1,Toy Story,4.0,4.0,4.0,4.0,4.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,7,1,Toy Story,4.5,4.5,4.5,4.5,4.5,4.5,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,15,1,Toy Story,2.5,2.5,2.5,2.5,2.5,2.5,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,17,1,Toy Story,4.5,4.5,4.5,4.5,4.5,4.5,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
user_genre = df_final[["userId"] + movie_genres]
total_genre = user_genre.groupby(['userId'], as_index=False).sum()
normalaize_genre = total_genre.iloc[:,1:].div(total_genre.iloc[:,1:].sum(axis=1), axis=0).round(4)
users = total_genre["userId"]
total_genre_normalize = pd.concat([users, normalaize_genre], axis=1)

In [None]:
total_genre_normalize.head()

Unnamed: 0,userId,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,...,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1,0.1231,0.0449,0.063,0.1172,0.0667,0.037,0.1017,0.1284,0.0647,...,0.0195,0.0248,0.0558,0.0327,0.034,0.0,0.0,0.0099,0.0017,0.0
1,2,0.0428,0.0,0.0,0.0959,0.0,0.0154,0.226,0.149,0.1301,...,0.0103,0.0274,0.0531,0.0154,0.0,0.0445,0.0514,0.012,0.0,0.0
2,3,0.1154,0.0077,0.0096,0.0346,0.0519,0.0096,0.0462,0.1923,0.0038,...,0.1442,0.0192,0.2423,0.0096,0.0019,0.0,0.0,0.0,0.0,0.0
3,4,0.0581,0.0132,0.0208,0.2002,0.0384,0.1075,0.2293,0.0455,0.0565,...,0.0093,0.0439,0.0187,0.0137,0.0351,0.0044,0.0016,0.0208,0.0088,0.0
4,5,0.0558,0.0558,0.0794,0.1116,0.0622,0.073,0.2039,0.0601,0.0987,...,0.0064,0.0086,0.0107,0.0215,0.0472,0.0,0.0236,0.0129,0.0,0.0


In [None]:
def recommendation(userId, movie_embedding, total_genre_normalize):
  user_1 = total_genre_normalize[total_genre_normalize["userId"]==userId].iloc[:,1:].values
  rec_user = movie_embedding.iloc[:,2:].mul(user_1 , axis=1)
  movies = movie_embedding[["movieId",'title']]
  rec_user_1 = pd.concat([movies, rec_user], axis=1)
  rec_user_1 = rec_user_1[["movieId","title"] + movie_genres]
  rec_user_1['total'] = rec_user_1[['movieId'] + movie_genres].iloc[:,2:].sum(axis=1).values
  rec_user = rec_user_1[["movieId","title", "total"]]
  user_1_movies = df_final[df_final["userId"]==userId]
  movie_watched = list(user_1_movies.title.unique())
  not_watched = rec_user[~rec_user["title"].isin(movie_watched)]
  dfsim = not_watched.sort_values("total", ascending=False).head(10)
  return dfsim

In [None]:
recommendation(1, movies_emb, total_genre_normalize)

Unnamed: 0,movieId,title,total
5543,26701,Patlabor: The Movie (Kidô keisatsu patorebâ: T...,0.3919
7450,81847,Tangled,0.3846
7164,72294,"Christmas Carol, A",0.3796
3455,4719,Osmosis Jones,0.3754
8882,134853,Inside Out,0.3608
1583,2123,All Dogs Go to Heaven,0.3608
511,594,Snow White and the Seven Dwarfs,0.3565
7343,78637,Shrek Forever After (a.k.a. Shrek: The Final C...,0.3519
7338,78499,Toy Story 3,0.3519
5805,32031,Robots,0.3519


In [None]:
!pip install surprise -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/772.0 KB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.3/772.0 KB[0m [31m6.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 KB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone


In [None]:
from surprise import SVD, Reader, Dataset, KNNWithMeans
from surprise.model_selection import train_test_split

In [None]:
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)

In [None]:
algo_svd = SVD()
algo_svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f2adaab7b80>

In [None]:
movies_ids = list(recommendation(1, movies_emb, total_genre_normalize)["movieId"].values)

In [None]:
movies_ids

[26701, 81847, 72294, 4719, 134853, 2123, 594, 78637, 78499, 32031]

In [None]:
user_id = 1
my_recs = []
for iid in movies_ids:
 my_recs.append((iid, algo_svd.predict(uid=1,iid=iid).est))
pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)

Unnamed: 0,iid,predictions
8,78499,4.852693
1,81847,4.727119
6,594,4.454815
4,134853,4.374111
5,2123,4.152068
2,72294,4.114762
9,32031,4.099699
0,26701,4.041749
7,78637,4.028522
3,4719,3.854358


In [None]:
def collaborative_filtering(user_id,movies_emb, total_genre_normalize):
  movies_ids = list(recommendation(user_id, movies_emb, total_genre_normalize)["movieId"].values)
  my_recs = []
  for iid in movies_ids:
    my_recs.append((iid, algo_svd.predict(uid=1,iid=iid).est))
  df = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
  return df

In [None]:
collaborative_filtering(4,movies_emb, total_genre_normalize)

Unnamed: 0,iid,predictions
3,1148,4.970673
0,2987,4.635786
4,72226,4.615231
5,79091,4.487994
9,55167,4.311146
8,2090,4.247312
6,26701,4.041749
2,8253,4.035374
1,4719,3.854358
7,1405,3.715516
