# Implementação do Sistema de Recomendação


## Importando Bibliotecas


In [101]:
import numpy as np
import pandas as pd
import warnings
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

warnings.simplefilter(action='ignore', category=FutureWarning)

## Carregando conjuntos de dados


In [102]:
ratings = pd.read_csv("ratings.csv")
print(ratings.head())

   book_id  user_id  rating
0        1      314       5
1        1      439       3
2        1      588       5
3        1     1169       4
4        1     1185       4


In [104]:
books = pd.read_csv("books.csv")
print(books.head())

   book_id                      authors  \
0        1              Suzanne Collins   
1        2  J.K. Rowling, Mary GrandPré   
2        3              Stephenie Meyer   
3        4                   Harper Lee   
4        5          F. Scott Fitzgerald   

                             original_title  \
0                          The Hunger Games   
1  Harry Potter and the Philosopher's Stone   
2                                  Twilight   
3                     To Kill a Mockingbird   
4                          The Great Gatsby   

                                               title  
0            The Hunger Games (The Hunger Games, #1)  
1  Harry Potter and the Sorcerer's Stone (Harry P...  
2                            Twilight (Twilight, #1)  
3                              To Kill a Mockingbird  
4                                   The Great Gatsby  


## Análise Estatística de Classificações


In [105]:
n_ratings = len(ratings)
n_books = len(ratings['book_id'].unique())
n_users = len(ratings['user_id'].unique())

print(f"Number of ratings: {n_ratings}")
print(f"Number of unique book_id's: {n_books}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per book: {round(n_ratings/n_books, 2)}")

Number of ratings: 981756
Number of unique book_id's: 10000
Number of unique users: 53424
Average ratings per user: 18.38
Average ratings per book: 98.18


## Frequência de avaliação do usuário


In [106]:
user_freq = ratings[['user_id', 'book_id']].groupby(
    'user_id').count().reset_index()

user_freq.columns = ['user_id', 'n_ratings']
print(user_freq.head())

   user_id  n_ratings
0        1          3
1        2          3
2        3          2
3        4          3
4        5          5


## Análise de classificação de livros


In [107]:
mean_rating = ratings.groupby('book_id')[['rating']].mean()
lowest_rated = mean_rating['rating'].idxmin()
books.loc[books['book_id'] == lowest_rated]
highest_rated = mean_rating['rating'].idxmax()
books.loc[books['book_id'] == highest_rated]
ratings[ratings['book_id'] == highest_rated]
ratings[ratings['book_id'] == lowest_rated]

book_stats = ratings.groupby('book_id')[['rating']].agg(['count', 'mean'])
book_stats.columns = book_stats.columns.droplevel()

## Criação de matriz de item de usuário


In [108]:
def create_matrix(df):
    N = len(df['user_id'].unique())
    M = len(df['book_id'].unique())

    user_mapper = dict(zip(np.unique(df["user_id"]), list(range(N))))
    book_mapper = dict(zip(np.unique(df["book_id"]), list(range(M))))

    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id"])))
    book_inv_mapper = dict(zip(list(range(M)), np.unique(df["book_id"])))

    user_index = [user_mapper[i] for i in df['user_id']]
    book_index = [book_mapper[i] for i in df['book_id']]

    X = csr_matrix((df["rating"], (book_index, user_index)), shape=(M, N))

    return X, user_mapper, book_mapper, user_inv_mapper, book_inv_mapper


X, user_mapper, book_mapper, user_inv_mapper, book_inv_mapper = create_matrix(
    ratings)

## Análise de similaridade de filmes


In [111]:
def find_similar_books(book_id, X, k, metric='cosine', show_distance=False):
    neighbour_ids = []
    book_ind = book_mapper[book_id]
    book_vec = X[book_ind]
    k += 1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    book_vec = book_vec.reshape(1, -1)
    neighbour = kNN.kneighbors(book_vec, return_distance=show_distance)

    for i in range(0, k):
        n = neighbour.item(i)
        neighbour_ids.append(book_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids


book_titles = dict(zip(books['book_id'], books['title']))
book_id = 3
similar_ids = find_similar_books(book_id, X, k=10)
book_title = book_titles[book_id]

print(f"Since you watched {book_title}")
for i in similar_ids:
    print(book_titles[i])

Since you watched Twilight (Twilight, #1)
Harry Potter and the Sorcerer's Stone (Harry Potter, #1)
Pride and Prejudice
The Hunger Games (The Hunger Games, #1)
Memoirs of a Geisha
The Kite Runner
The Catcher in the Rye
The Alchemist
The Giver (The Giver, #1)
The Diary of a Young Girl
The Fellowship of the Ring (The Lord of the Rings, #1)


## Recomendação de livros com relação à preferência dos usuários


In [112]:
def recommend_books_for_user(user_id, X, k=10):
    df1 = ratings[ratings['user_id'] == user_id]

    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return

    book_id = df1[df1['rating'] == max(df1['rating'])]['book_id'].iloc[0]
    book_titles = dict(zip(books['book_id'], books['title']))
    similar_ids = find_similar_books(book_id, X, k)
    book_title = book_titles.get(book_id, "Book not found")

    if book_title == "Book not found":
        print(f"Book with ID {book_id} not found.")
        return

    print(f"Since you watched {book_title}, you might also like:")
    for i in similar_ids:
        print(book_titles.get(i, "Book not found"))

## Recomendo os filmes


In [116]:
user_id = 180  # Replace with the desired user ID
recommend_books_for_user(user_id, X, k=10)

Since you watched The Looming Tower: Al-Qaeda and the Road to 9/11, you might also like:
Ghost Wars: The Secret History of the CIA, Afghanistan, and bin Laden from the Soviet Invasion to September 10, 2001
The Big Short: Inside the Doomsday Machine
Too Big to Fail: The Inside Story of How Wall Street and Washington Fought to Save the Financial System from Crisis — and Themselves
The Making of the Atomic Bomb
Going Clear: Scientology, Hollywood, and the Prison of Belief
Dispatches
Moneyball: The Art of Winning an Unfair Game
Train Dreams
Liar's Poker
The Signal and the Noise: Why So Many Predictions Fail - But Some Don't
