In [7]:
import json
from sentence_transformers import SentenceTransformer
from typing import List

# Define the DataLoader base class
class DataLoader:
    def __init__(self, json_file_path: str):
        self.json_file_path = json_file_path
        self.data = self.load_data()
    
    def load_data(self):
        with open(self.json_file_path, 'r') as file:
            return json.load(file)
    
    def get_page_content(self, item: dict) -> str:
        raise NotImplementedError("Subclasses should implement this method")

# Define the BookDataLoader class
class BookDataLoader(DataLoader):
    def get_page_content(self, item: dict) -> str:
        return f"{item['title']} {item['author']} {item['publication_date']} {item['description']} {' '.join(item['genres'])}"

# Define the MovieDataLoader class
class MovieDataLoader(DataLoader):
    def get_page_content(self, item: dict) -> str:
        return f"{item['title']} {item['release_date']} {item['summary']} {' '.join(item['movie_genres_list'])} {' '.join(item['movie_actor_list'])}"

# Initialize the embedding model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Generate embeddings for summaries
def generate_embeddings(texts: List[str]):
    return model.encode(texts, convert_to_tensor=True)

# Load book and movie data using the defined loaders
book_loader = BookDataLoader('C:\\Users\\harsh\\OneDrive - IIT Kanpur\\Desktop\\ML Project\\Chatbot_LLM_RAG_in_Action\\data\\BookSummaries\\book.json')
movie_loader = MovieDataLoader('C:\\Users\\harsh\\OneDrive - IIT Kanpur\\Desktop\\ML Project\\Chatbot_LLM_RAG_in_Action\\data\\MovieSummaries\\movie.json')

# Extract summaries and create page content
book_contents = [book_loader.get_page_content(item) for item in book_loader.data]
movie_contents = [movie_loader.get_page_content(item) for item in movie_loader.data]

# Generate embeddings
book_embeddings = generate_embeddings(book_contents)
movie_embeddings = generate_embeddings(movie_contents)

# Define a simple retrieval function (you may need to use a more advanced method)
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def find_most_similar(query_embedding, embeddings):
    similarities = cosine_similarity(query_embedding, embeddings)
    return np.argmax(similarities)

# Example usage with a query
query = "Science fiction books"
query_embedding = generate_embeddings([query])
most_similar_book_index = find_most_similar(query_embedding, book_embeddings)
query = "Science fiction movie"
query_embedding = generate_embeddings([query])
most_similar_movie_index = find_most_similar(query_embedding, movie_embeddings)

print("Most similar book:", book_contents[most_similar_book_index])
print("Most similar movie:", movie_contents[most_similar_movie_index])




Most similar book: Article 23 William R. Forstchen 1998-09  Academy plebe Justin Bell is excited about his new career in space. Unfortunately several colonies are agitating for independence. On top of this very dangerous political situation, contact has just been made with non-human life.
 Science Fiction Speculative fiction Fiction Novel
Most similar movie: Godsend 2004-04-30 Summary not available Thriller Science Fiction Horror Sci-Fi Horror Psychological thriller Drama Cameron Bright Greg Kinnear Rebecca Romijn Robert De Niro Christopher Britton Merwin Mondesir Deborah Odell Jake Simons Jenny Levine Devon Bostick Janet Bailey


In [8]:
import streamlit as st
import google.generativeai as genai
import os
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load environment variables
os.environ['GENAI_API_KEY'] = 'AIzaSyDJuDtnzp3dEvttkr4oia3pGvmTQuDT-E0'
genai.configure(api_key=os.environ['GENAI_API_KEY'])

# Load environment variables
load_dotenv()
genai.configure(api_key=os.getenv('GENAI_API_KEY'))

# Initialize the embedding model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Define DataLoader classes as before
class DataLoader:
    def __init__(self, json_file_path: str):
        self.json_file_path = json_file_path
        self.data = self.load_data()
    
    def load_data(self):
        with open(self.json_file_path, 'r') as file:
            return json.load(file)
    
    def get_page_content(self, item: dict) -> str:
        raise NotImplementedError("Subclasses should implement this method")

class BookDataLoader(DataLoader):
    def get_page_content(self, item: dict) -> str:
        return f"{item['title']} {item['author']} {item['publication_date']} {item['description']} {' '.join(item['genres'])}"

class MovieDataLoader(DataLoader):
    def get_page_content(self, item: dict) -> str:
        return f"{item['title']} {item['release_date']} {item['summary']} {' '.join(item['movie_genres_list'])} {' '.join(item['movie_actor_list'])}"

# Load data and generate embeddings
book_loader = BookDataLoader('C:\\Users\\harsh\\OneDrive - IIT Kanpur\\Desktop\\ML Project\\Chatbot_LLM_RAG_in_Action\\data\\BookSummaries\\book.json')
movie_loader = MovieDataLoader('C:\\Users\\harsh\\OneDrive - IIT Kanpur\\Desktop\\ML Project\\Chatbot_LLM_RAG_in_Action\\data\\MovieSummaries\\movie.json')

book_contents = [book_loader.get_page_content(item) for item in book_loader.data]
movie_contents = [movie_loader.get_page_content(item) for item in movie_loader.data]

book_embeddings = generate_embeddings(book_contents)
movie_embeddings = generate_embeddings(movie_contents)

# Define retrieval function
def find_most_similar(query_embedding, embeddings):
    similarities = cosine_similarity(query_embedding, embeddings)
    return np.argmax(similarities)

# Chatbot class
class ChatAgent:
    def __init__(self, model_name: str):
        self.model = genai.GenerativeModel(model_name=model_name)

    def get_response(self, query: str) -> str:
        # Generate query embedding
        query_embedding = generate_embeddings([query])
        
        # Retrieve relevant book and movie
        most_similar_book_index = find_most_similar(query_embedding, book_embeddings)
        most_similar_movie_index = find_most_similar(query_embedding, movie_embeddings)
        
        book_info = book_contents[most_similar_book_index]
        movie_info = movie_contents[most_similar_movie_index]
        
        # Create a context for the Gemini model
        context = f"Book Info: {book_info}\nMovie Info: {movie_info}\n\nQuery: {query}"
        
        # Generate response from Gemini model
        try:
            response = self.model.generate_content(context)
            return response.text
        except Exception as e:
            return f"An error occurred: {str(e)}"






In [4]:
#dont use streamlit, simply use the gemini model to reply to a query which I can test in my system VS Code using gemini-1.5-flash
# Initialize the chat agent
chat_agent = ChatAgent(model_name='gemini-1.5-flash')

# Define a query
query = "Science fiction movie"

# Get response from the chat agent
response = chat_agent.get_response(query)
print(response)


You're looking for a science fiction movie, but the information you provided is about a science fiction book and a thriller/horror movie. 

To help you find a science fiction movie, please tell me:

* **What kind of science fiction are you interested in?**  (e.g., space opera, cyberpunk, dystopian, alien invasion, time travel, etc.)
* **Do you have any specific themes or elements you'd like to see in the movie?** (e.g., action, adventure, romance, suspense, humor, philosophical themes)
* **Do you have any favorite actors or directors?** 
* **Are you looking for a specific release year or decade?**

Once I have more information about your preferences, I can suggest some great science fiction movies for you. 



In [8]:
def main():
    model_name = "gemini-1.5-flash"
    chat_agent = ChatAgent(model_name=model_name)
    
    print("Type 'exit' or 'quit' to stop the chatbot.")
    
    try:
        while True:
            user_query = input("You: ")
            if user_query.lower() in ['exit', 'quit']:
                print("Exiting the chatbot...")
                break
            
            # Get bot response
            bot_response = chat_agent.get_response(user_query)
            
            # Print both user message and bot response
            print(f"You: {user_query}")
            print(f"Bot: {bot_response}")
    except KeyboardInterrupt:
        print("\nInterrupted by user. Exiting...")
    
if __name__ == "__main__":
    main()

Type 'exit' or 'quit' to stop the chatbot.
You: is liar game a book?
Bot: The information you provided does not indicate whether "Liar Game" is a book.  

You provided information about a book ("Lies and the Lying Liars Who Tell Them") and a movie ("Liar Game: The Final Stage").  

While "Liar Game" is the title of a popular Japanese media franchise, it started as a **manga** (Japanese comic book) series.  

Therefore, "Liar Game" is not a book in the traditional sense, but it is a **manga series** that has been adapted into a television drama and a movie. 

You: 
Bot: It looks like you're asking for a comparison between the book "King Solomon's Carpet" by Ruth Rendell and the movie "Loverboy." While both works involve interesting characters and intriguing plots, they are vastly different in terms of genre, tone, and themes. 

Here's a breakdown:

**King Solomon's Carpet**

* **Genre:** Crime fiction, mystery
* **Themes:** Social commentary, isolation, urban life, moral ambiguity
* **P

In [1]:
import json
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load Book and Movie Data
def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

# Define Paths
book_json_file_path = 'C:\\Users\\harsh\\OneDrive - IIT Kanpur\\Desktop\\ML Project\\Chatbot_LLM_RAG_in_Action\\data\\BookSummaries\\book.json'
movie_json_file_path = 'C:\\Users\\harsh\\OneDrive - IIT Kanpur\\Desktop\\ML Project\\Chatbot_LLM_RAG_in_Action\\data\\MovieSummaries\\movie.json'

# Load data
books_data = load_data(book_json_file_path)
movies_data = load_data(movie_json_file_path)

# Processing Functions
class BookDataLoader:
    def get_page_content(self, item):
        return f"{item['title']} {item['author']} {item['publication_date']} {item['description']} {' '.join(item['genres'])}"

class MovieDataLoader:
    def get_page_content(self, item):
        return f"{item['title']} {item['release_date']} {item['summary']} {' '.join(item['movie_genres_list'])} {' '.join(item['movie_actor_list'])}"

# Prepare Book and Movie Contents
book_loader = BookDataLoader()
movie_loader = MovieDataLoader()

book_contents = [book_loader.get_page_content(item) for item in books_data]
movie_contents = [movie_loader.get_page_content(item) for item in movies_data]


In [2]:
# Initialize the HuggingFace Embedding model
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# Create and store embeddings in FAISS index
book_faiss_index = FAISS.from_texts(book_contents, embedding_model)
movie_faiss_index = FAISS.from_texts(movie_contents, embedding_model)




In [3]:
def find_most_similar(query, faiss_index, embedding_model):
    # Convert query to embedding
    query_embedding = embedding_model.embed_query(query)
    
    # Perform similarity search with FAISS
    docs_and_scores = faiss_index.similarity_search_with_score(query, k=1)
    
    # Return the most similar document and its score
    return docs_and_scores[0]

# Example user query
user_query = "I'm looking for a sci-fi book about space exploration."

# Find the most similar book and movie
most_similar_book, book_score = find_most_similar(user_query, book_faiss_index, embedding_model)
most_similar_movie, movie_score = find_most_similar(user_query, movie_faiss_index, embedding_model)

# Print the results
print("Most similar book:", most_similar_book.page_content)
print("Most similar movie:", most_similar_movie.page_content)


Most similar book: Article 23 William R. Forstchen 1998-09  Academy plebe Justin Bell is excited about his new career in space. Unfortunately several colonies are agitating for independence. On top of this very dangerous political situation, contact has just been made with non-human life.
 Science Fiction Speculative fiction Fiction Novel
Most similar movie: Starflight: The Plane That Couldn't Land 1983-02-27 Summary not available Disaster Thriller Science Fiction Lee Majors Hal Linden Kirk Cameron


In [6]:
import google.generativeai as genai
from dotenv import load_dotenv
import os

# Load environment variables
os.environ['GENAI_API_KEY'] = 'AIzaSyDJuDtnzp3dEvttkr4oia3pGvmTQuDT-E0'
genai.configure(api_key=os.environ['GENAI_API_KEY'])

# Load environment variables
load_dotenv()
genai.configure(api_key=os.getenv('GENAI_API_KEY'))

class ChatAgent:
    def __init__(self, model_name: str):
        self.model = genai.GenerativeModel(model_name=model_name)

    def get_response(self, query: str) -> str:
        try:
            response = self.model.generate_content(query)
            return response.text
        except Exception as e:
            return f"An error occurred: {str(e)}"

# Initialize the Chat Agent
chat_agent = ChatAgent(model_name="gemini-1.5-flash")

# Start an interactive loop for user input
while True:
    # Get user input
    user_query = input("You: ")

    # Check for exit conditions
    if user_query.lower() in ["exit", "quit", "stop"]:
        print("Exiting the chat. Goodbye!")
        break

    # Find the most similar book and movie
    most_similar_book, book_score = find_most_similar(user_query, book_faiss_index, embedding_model)
    most_similar_movie, movie_score = find_most_similar(user_query, movie_faiss_index, embedding_model)

    # Combine the most similar book and movie info with the query
    context = f"Book Info: {most_similar_book.page_content}\nMovie Info: {most_similar_movie.page_content}\n\nQuery: {user_query}"

    # Generate and print the bot's response
    bot_response = chat_agent.get_response(context)
    print(f"User Query: {user_query}")
    print(f"Bot: {bot_response}")


User Query: suggest some scifi movie
Bot: It seems like you're interested in sci-fi movies that deal with themes of post-apocalyptic survival, nuclear war, and perhaps some dark humor. Based on your description of "Long Voyage Back" and "The Age of Insects," here are some movie suggestions:

**Post-Apocalyptic Survival and Nuclear War:**

* **Mad Max: Fury Road (2015):** A classic example of a post-apocalyptic action film with a strong focus on survival and resource scarcity.
* **The Road (2009):** A bleak but powerful drama about a father and son's journey across a desolate America after a nuclear catastrophe. 
* **Children of Men (2006):** This film explores a world where humanity is facing extinction, and it delves into themes of survival, hope, and the fragility of civilization. 
* **The Book of Eli (2010):** A lone warrior battles across a desolate wasteland in search of a sacred text that holds the key to humanity's future.
* **Threads (1984):** This British television film offer

Objective:
To develop an interactive chatbot capable of providing contextually relevant responses to user queries about books and movies by combining retrieval-augmented generation (RAG) techniques with structured data and generative AI.

Method:
1. utilized structured JSON files containing detailed summaries of books and movies, which were processed and converted into embeddings using the HuggingFace sentence-transformers model from langchain.

2. The FAISS vector store from langchain is used to store and manage these embeddings.

3. The most similar book and movie, along with the user’s query, are combined to create a context.This context is passed to the Gemini model, which generates a coherent response that incorporates the retrieved information and addresses the user’s query.

Result:
Successfully created a chatbot that delivers accurate and relevant responses by integrating advanced natural language processing, embedding techniques, and generative AI.