# <div align="center">Book Recommender System - Reinforcement Learning Recommendation Implementation</div>
## <div align="center">CP421 Final Project: Data Mining</div>
### <div align="center">Group 4</div>
#### <div align="center">Due on 06-Dec-2023 at 11:59 PM</div>

##### Imports: #####

In [1]:
import csv
import numpy as np
import pandas as pd
import gym
from gym import spaces

##### ::: Data Preprocessing ::: #####

In [2]:
'''
The below chunk of code, drops null values from all datasets, turns the year_of_publications col to int64,
drops all images and image col from df's, converts all col to string (excluding year_of_pub), drops null values from ratings col 
'''

# Replace 'your_file.csv' with the path to your CSV file
file_paths = ['Users.csv', 'Books.csv', 'Ratings.csv']

# Load data and drop rows with null values
userData = pd.read_csv("data/"+file_paths[0], quoting=csv.QUOTE_MINIMAL, quotechar='"').dropna()


booksData = pd.read_csv("data/"+file_paths[1], quoting=csv.QUOTE_MINIMAL, quotechar='"', 
                        converters={'Year-Of-Publication': lambda x: pd.to_numeric(x, errors='coerce')}).dropna()
# Drop specified columns
columns_to_drop = ['Image-URL-S', 'Image-URL-M', 'Image-URL-L']
booksData.drop(columns=columns_to_drop, inplace=True)
booksData['Year-Of-Publication'] = booksData['Year-Of-Publication'].astype('Int64')

# Select columns to convert to strings (excluding 'Year-Of-Publication')
columns_to_convert = [col for col in booksData.columns if col != 'Year-Of-Publication']

ratingsData = pd.read_csv("data/"+file_paths[2], quoting=csv.QUOTE_MINIMAL, quotechar='"').dropna()


"""# Display data  of all columns in booksData
print("user data")
print(50 * "=")
print(userData)
print("book data")
print(50 * "=")
print(booksData)
print("rating data")
print(50 * "=")
print(ratingsData)"""


'# Display data  of all columns in booksData\nprint("user data")\nprint(50 * "=")\nprint(userData)\nprint("book data")\nprint(50 * "=")\nprint(booksData)\nprint("rating data")\nprint(50 * "=")\nprint(ratingsData)'

##### ::: Getting the User and their data for the Agent ::: #####

In [3]:
# Randomly user who has made multiple ratings
current_user = 35953 

# Get all ratings of the randomly selected user
user_ratings = ratingsData[ratingsData['User-ID'] == current_user]

user_info = userData[userData['User-ID'] == current_user][['User-ID', 'Location', 'Age']]


print(f"Randomly selected user ID: {current_user}")
print(user_info)
print(50 * "=")
print("User's ratings:")
print(50 * "=")
print(user_ratings)

Randomly selected user ID: 35953
       User-ID                   Location   Age
35952    35953  forest grove, oregon, usa  37.0
User's ratings:
        User-ID        ISBN  Book-Rating
163290    35953  0064434796            4
163291    35953  029270822X            0
163292    35953  0312959974            7
163293    35953  034542526X            0
163294    35953  0345435923            0
163295    35953  0375412530            8
163296    35953  0380702843            0
163297    35953  039913493X            0
163298    35953  0440174643            0
163299    35953  0440226406            0
163300    35953  0449006530            0
163301    35953  0451208439            0
163302    35953  0451520459            0
163303    35953  0671002481            8
163304    35953  0671795988            0
163305    35953  0671877070            0
163306    35953  0679417796            8
163307    35953  0684872153            0
163308    35953  0758204116            9
163309    35953  078688097X        

In [4]:
# getting the books that have actually been reviewed by only the *specific user (..?)

unique_books = pd.DataFrame({'ISBN': booksData['ISBN'].unique()})
user_book_ratings = pd.merge(unique_books, user_ratings, on='ISBN', how='left')
user_book_ratings.drop(columns='User-ID', inplace=True)
user_book_ratings['Read'] = ~user_book_ratings['Book-Rating'].isnull()
user_book_info = pd.merge(user_book_ratings, booksData)

user_book_info['Book-Rating'].fillna(-1, inplace=True)
user_book_info['Book-Rating'] = user_book_info['Book-Rating'].astype(int)

#books_with_ratings = user_book_info[~user_book_info['Book-Rating'].isnull()] --- this is redundant ??
print(user_book_info)


              ISBN  Book-Rating   Read  \
0       0195153448           -1  False   
1       0002005018           -1  False   
2       0060973129           -1  False   
3       0374157065           -1  False   
4       0393045218           -1  False   
...            ...          ...    ...   
271349  0440400988           -1  False   
271350  0525447644           -1  False   
271351  006008667X           -1  False   
271352  0192126040           -1  False   
271353  0767409752           -1  False   

                                               Book-Title  \
0                                     Classical Mythology   
1                                            Clara Callan   
2                                    Decision in Normandy   
3       Flu: The Story of the Great Influenza Pandemic...   
4                                  The Mummies of Urumchi   
...                                                   ...   
271349                         There's a Bat in Bunk Five   
271350 

##### ::: Reinforcement Learning Recommender ::: #####

In [5]:
# Define a Q-learning agent
# navigates an environment, learns from experiences, and improves its decision-making 
# capabilities over time by updating Q-values based on observed rewards and actions

class QLearningAgent:
    def __init__(self, action_space_size, state_space_size, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        # Initialize Q-table with zeros
        self.q_table = np.zeros((state_space_size, action_space_size))
        self.learning_rate = learning_rate  # Set learning rate
        self.discount_factor = discount_factor  # Set discount factor for future rewards
        self.epsilon = epsilon  # Set epsilon for exploration vs. exploitation trade-off
        self.action_space_size = action_space_size  # Number of possible actions

    def choose_action(self, state):
        # Epsilon-greedy policy: exploration vs. exploitation
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.randint(self.action_space_size)  # Explore randomly
        else:
            return np.argmax(self.q_table[state, :])  # Exploit learned values

    def learn(self, state, action, reward, next_state, done):
        # Q-learning update equation
        current_q = self.q_table[state, action]
        max_next_q = np.max(self.q_table[next_state, :])
        target_q = reward + self.discount_factor * max_next_q * (1 - done)
        self.q_table[state, action] += self.learning_rate * (target_q - current_q)

# Initialize Q-learning agent
agent = QLearningAgent(action_space_size=len(unique_books), state_space_size=1)


In [18]:
# Define a custom environment for book recommendation
class BookRecommendationEnv(gym.Env):
    def __init__(self, books, ratings, agent):
        # Initialize the environment with necessary attributes and spaces
        self.books = books
        self.ratings = ratings
        self.book_attributes = ['Book-Author', 'Publisher', 'Year-Of-Publication']
        self.action_space = spaces.Discrete(len(unique_books))  # Define action space
        self.observation_space = spaces.Discrete(1 + len(self.book_attributes))  # Define observation space
        self.current_book_idx = 0
        self.done = False
        self.agent = agent  # Reference to the Q-learning agent

    def reset(self):
        # Reset the environment to start a new episode
        self.current_book_idx = 0
        self.done = False
        return np.array([self.current_book_idx])  # Return initial state as an array

    def step(self, action):
        bookID = env.render()
        print(f"Recommended book: {bookID}")
        # Gather user feedback on the recommended book and update the environment state
        user_feedback = input("Have you read the recommended book? (Y/N): ")
        if user_feedback.lower() == 'y':
            user_rating = float(input("What rating would you give? (0-10): "))
            self.ratings.loc[ratings['ISBN'] == bookID, 'Book-Rating'] = user_rating #add rating to df to be considered for reward
            print(self.ratings[self.ratings['ISBN'] == bookID])
        else:
            user_rating = -1  # Indicates unread
        if self.done:
            raise ValueError("Episode is done. Please call reset to start a new episode.")
        reward = self.calculate_reward(action)  # Calculate reward based on user interaction
        print(reward)
        next_book_idx = (self.current_book_idx + reward) % len(self.books)
        next_state = np.array([next_book_idx])
        self.current_book_idx = next_book_idx
        return next_state, reward, self.done, {}

    def calculate_reward(self, action):
        # Calculate the reward based on user ratings for the recommended book
        book_id = self.books[action]  # Get book ISBN for the action
        user_ratings_for_book = self.ratings[self.ratings['ISBN'] == book_id]['Book-Rating']
        
        if user_ratings_for_book.empty:
            return 0  # If no user ratings found for the book, set default reward
        
        user_rating = user_ratings_for_book.values[0]
        
        # Consider user's read status and rating weight in reward calculation
        read_status = 1 if user_rating != -1 else 0  # Assume -1 indicates unread
        rating_weight = user_rating if user_rating != -1 else 0  # Consider rating as weight if read
        
        # Perform reward calculation considering factors
        reward = (rating_weight * read_status) / 5  # Normalize reward between 0 and 1

        return int(reward * 100)  # Scale reward for agent learning

    def render(self):
        # return the recommended book
        return(self.books[self.current_book_idx])


# Extract books list and ratings dictionary from the generated data
books_list = booksData['ISBN'].tolist()
ratings = user_book_info[['ISBN', 'Book-Rating']]

# Create the environment
env = BookRecommendationEnv(books_list, ratings, agent)

# Start a new episode
state = env.reset()

# Choose an action (recommendation) for the current state using the agent's logic
action = agent.choose_action(state)
next_state, reward, done, _ = env.step(action)


# Render the recommendation
print(env.render())
recommended_book_isbn = env.render()
recommended_book_title = booksData[booksData["ISBN"] == recommended_book_isbn]["Book-Title"].values[0]
recommended_book_author = booksData[booksData["ISBN"] == recommended_book_isbn]["Book-Author"].values[0]
recommended_book_year_of_pub = booksData[booksData["ISBN"] == recommended_book_isbn]["Year-Of-Publication"].values[0]
recommended_book_publisher = booksData[booksData["ISBN"] == recommended_book_isbn]["Publisher"].values[0]

print()
print(f"::: Recommended Book :::\nISBN: {recommended_book_isbn}\nTitle: {recommended_book_title}\nAuthor: {recommended_book_author}\nYear of Publication: {recommended_book_year_of_pub}\nPublisher: {recommended_book_publisher}\n")
print(f"\nReward to Agent for recommended book: {reward}")




Recommended book: 0195153448


         ISBN  Book-Rating
0  0195153448            7
140
0393020371

::: Recommended Book :::
ISBN: 0393020371
Title: Next: The Future Just Happened
Author: Michael Lewis
Year of Publication: 2001
Publisher: W.W. Norton &amp; Company


Reward to Agent for recommended book: 140
