# This notebook is used to augment the book dataset with the help of the language model Cohere ( API key is required). 

In [0]:
import pandas as pd
from langchain_community.chat_models import ChatCohere
from langchain_core.messages import HumanMessage
from secret import API_KEY
import json

In [3]:
def llm_user_profile(history,llm):
    """
    :param history: user history of books read
    :param llm: LLm model
    :return: LLM response
    """
    u_profile = f"Generate a user profile based on the provided user history and analysis, that each book with [index] title, year, author. History: {history}. Please output the following infomation of user without missing or None fields, output format: {{age: , gender: , liked genre: , disliked genre: , liked authors: , country: , language: }}. Give specific answers and no ranges, for gender give either male or female. Please don't give any 'None' answers it's important. please output only the content in format above, but no other thing else, no reasoning. Reiterating once again!! Please only output the content after \"output format: \", and do not include any other content such as introduction or acknowledgments. "
    messages = [HumanMessage(content=u_profile)]
    response = llm.invoke(messages)
    return response.content

In [4]:
def llm_user_item_interaction(user_history,candidates,llm):
    """
    
    :param user_history: history of books read by user
    :param candidates: a list of candidates books to be recommended
    :param llm: LLM model
    :return: LLM response
    """
    prompt = f"""The user has read the following books,that each book with [index] title, year, genres. :
            {user_history}
            Candidates:
            {candidates}
            Task:
            The recommendation system needs to predict which books the user will like and which books will dislike from the provided candidates based on the history and analysis.
            Please provide the output in the format of [index of one liked, index of one disliked] with no introductions or acknowledgments."""
    
    messages = [HumanMessage(content=prompt)]
    response = llm.invoke(messages)
    return response.content

In [5]:
def llm_book_profile(book,llm):
    """
    
    :param book: book information (title, year, author)
    :param llm: LLm model
    :return: llm response
    """
    title, year,author = book
    prompt = f"""Provide the inquired information of the given book title : {title} ({year}) written by {author}. The inquired information is: genres, language. Please provide directly the output in the format of : {{"genres" : " | " , "language" :}} with no introductions or acknowledgments. Please consider only the specific genres"""
    
    messages = [HumanMessage(content=prompt)]
    response = llm.invoke(messages)

    return response.content

In [6]:
with open("../data/books/train.json","r") as f:
    train = json.load(f)
books = pd.read_csv("../data/books/items_with_attributes.csv",sep=";")
users = pd.read_csv("../data/books/users.txt")

m = len(books)
n = len(users)
# print number of users and items
print(f"Number of users: {n}")
print(f"Number of items: {m}")

Number of users: 14790
Number of items: 33962


In [6]:
# load candidates
with open("../data/books/users_candidates.json","r") as f:
    candidates = json.load(f)


In [7]:
# Function to generate candidates for each user
def generate_candidates(n_users,items_liste,candidates) :
    """
    
    :param n_users: number of users
    :param n_items:  number of items
    :param candidates_size:  number of candidates to be generated
    :param train_data:  training data
    :param items_liste: items dataset
    :return: Dictionary that match each user with his candidates list
    """
    candidates_titles = {}
    for user_id in range(n_users):
        unique_indices = candidates[str(user_id)]
        items_infos = items_liste.loc[unique_indices,["item_id","Title","Author","Year",'Genres']]
        book = ""
        for idx,row in items_infos.iterrows() :
            book += f" [{row["item_id"]}]{row["Title"]} ({row["Year"]}) genres : {row["Genres"]}."
        candidates_titles[user_id] = book
    return candidates_titles
# Test the function woth 3 users and 5 items and 3 candidates
generate_candidates(3,books,candidates)

{0: ' [9564]Tanner on Ice: An Evan Tanner Novel (Tanner Mystery Series) (1999) genres : Thriller, Mystery, Adventure. [3274]Prime Suspect (1993) genres : Crime Fiction , Thriller. [5046]Of Human Bondage (Penguin Twentieth-Century Classics) (1992) genres : Fiction , Drama ..',
 1: " [25847]Abhorsen (The Abhorsen Trilogy) (2004) genres : Fantasy , Adventure. [15802]The Giving Tree (1964) genres : Children's literature , Poetry. [1059]The Whale Rider (Movie Cover Edition) (2002) genres : Drama , Adventure.",
 2: ' [2463]Private Sector (2003) genres : Thriller, Mystery, Fiction. [19026]Slain in the Spirit (2002) genres : Fiction , Religious fiction. [5231]The Man Who Walked Through Time (1989) genres : Adventure, Memoir, Travel.'}

In [7]:
#function to get history of each user
def get_user_history(n_users,items_liste) :
    """
    
    :param n_users: number of users
    :param items_liste: items dataset
    :return: Dictionary that match each user with his history of books read
    """
    all_history = {}
    for user_id in range(n_users):
        interacted_items = train[str(user_id)]
        items_infos = items_liste.loc[interacted_items,["Title","Author","Year",'item_id',"Genres"]].iloc[:2,:]
        book = ""
        for idx,row in items_infos.iterrows() :
            book += f" [{row["item_id"]}]{row["Title"]} ({row["Year"]}) genres : {row["Genres"]}."
        all_history[user_id] = book
    return all_history
# Test the function
get_user_history(2,books)

{0: " [14111]The Red Tent (Bestselling Backlist) (1998) genres : Historical fiction , Drama. [22601]Dude, Where's My Country? (2003) genres : Political satire, non-fiction.",
 1: " [14408]Lirael: Daughter of the Clayr (2002) genres : Fantasy , Adventure. [3163]The Giving Tree (1964) genres : Children's literature , Poetry."}

In [11]:

"""API = API_KEY # Paid API key
LLM = ChatCohere(cohere_api_key=API, model="chat",  max_tokens=256,temperature=0.5, connectors=[{"id": "web-search"}])
initial_context = "You are now a books recommender systems. Givent user history of read books and  a list of candidates The recommendation system needs to predict which books the user will like and which books will dislike from the provided candidates based on the history and analysis."
LLM.invoke([HumanMessage(content=initial_context)]) """

'API = "BEY7gCMA5muFeXMnzZUXhlKj8MHSrc8cZ1MmLQYc" # Paid API key\nLLM = ChatCohere(cohere_api_key=API, model="chat",  max_tokens=256,temperature=0.5, connectors=[{"id": "web-search"}])\ninitial_context = "You are now a books recommender systems. Givent user history of read books and  a list of candidates The recommendation system needs to predict which books the user will like and which books will dislike from the provided candidates based on the history and analysis."\nLLM.invoke([HumanMessage(content=initial_context)]) '

In [None]:
"""# user item interaction augmentation
responses = {}
API = API_KEY # Paid API key
LLM = ChatCohere(cohere_api_key=API, model="chat",  max_tokens=256,temperature=0.5, connectors=[{"id": "web-search"}])
initial_context = "You are now a books recommender systems. Givent user history of read books and  a list of candidates The recommendation system needs to predict which books the user will like and which books will dislike from the provided candidates based on the history and analysis."
LLM.invoke([HumanMessage(content=initial_context)])
for i in range(0,n) :
    user_candidate = candidates[str(i)]
    user_history = users_history[str(i)]
    user_item_interaction = llm_user_item_interaction(user_history,user_candidate,LLM)
    responses[i] = user_item_interaction
with open("../data/books/interactions_2.json","w") as f:
    json.dump(responses,f)"""

In [None]:
"""# user profile augmentation
responses = {}
for i in range(11118,n) :
    user_history = users_history[str(i)]
    LLM = ChatCohere(cohere_api_key=API, model="chat",  max_tokens=256,temperature=0.75, connectors=[{"id": "web-search"}])
    user_item_interaction = llm_user_profile(user_history,LLM)
    responses[i] = user_item_interaction
"""