In [None]:
# Install required libraries
!pip install openai > /dev/null 2>&1 || echo "Error: Installation failed"
!pip install langchain langchain_community > /dev/null 2>&1 || echo "Error: Installation failed"
!pip install --upgrade --quiet langchain langchainhub
!pip install faiss-cpu > /dev/null 2>&1 || echo "Error: Installation failed"
!pip install tiktoken > /dev/null 2>&1 || echo "Error: Installation failed"
!pip install pandas > /dev/null 2>&1 || echo "Error: Installation failed"

In [None]:
import warnings

# Filter out any warnings
warnings.filterwarnings("ignore")

In [None]:
import textwrap
from IPython.display import display, Markdown
import pandas as pd

def display_response(response, width=80):
    """
    Display the AI response in a readable format, preserving original line breaks.

    Args:
    response (str): The text response from the AI.
    width (int): The maximum width of each line before wrapping.
    """
    # Split the response into lines
    lines = response.split('\n')

    # Wrap each line individually
    wrapped_lines = []
    for line in lines:
        if line.strip() == '':
            wrapped_lines.append('')
        else:
            wrapped_lines.extend(textwrap.wrap(line, width=width))

    # Join the wrapped lines
    wrapped_text = '\n'.join(wrapped_lines)

    # Add markdown formatting
    formatted_text = f"```\n{wrapped_text}\n```"
    #formatted_text = f"\n{wrapped_text}\n"

    # Display as markdown
    display(Markdown(formatted_text))

def display_input_output(input, output):
  print("Prompt:\n")
  display_response(input)
  print("\nResponse:\n")
  display_response(output)

In [None]:
import openai

# Set up OpenAI API key securely
from google.colab import userdata

# Securely get the API key
OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")

# Ensure the API key is set
if OPENAI_API_KEY is None:
    raise ValueError("Please set the OPENAI_API_KEY in Colab's Secrets (under Tools > Settings > Secrets)")

# Set the API key for OpenAI
openai.api_key = OPENAI_API_KEY

In [None]:
# import necessary libraries

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

In [None]:
# Load the CSV
csv_path = '/content/movies.csv'
df = pd.read_csv(csv_path)

In [None]:
print("First 5 records:", df.head())

First 5 records:                                              name rating      genre  year  \
0                                     The Shining      R      Drama  1980   
1                                 The Blue Lagoon      R  Adventure  1980   
2  Star Wars: Episode V - The Empire Strikes Back     PG     Action  1980   
3                                       Airplane!     PG     Comedy  1980   
4                                      Caddyshack      R     Comedy  1980   

                        released  score    votes         director  \
0  June 13, 1980 (United States)    8.4   927000  Stanley Kubrick   
1   July 2, 1980 (United States)    5.8    65000   Randal Kleiser   
2  June 20, 1980 (United States)    8.7  1200000   Irvin Kershner   
3   July 2, 1980 (United States)    7.7   221000     Jim Abrahams   
4  July 25, 1980 (United States)    7.3   108000     Harold Ramis   

                    writer            star         country    budget  \
0             Stephen King  Jack 

In [None]:
from langchain.docstore.document import Document
documents = []
for index, row in df.iterrows():
    content = f"""
    Name: {row['name']}
    Rating: {row['rating']}
    Genre: {row['genre']}
    Year: {row['year']}
    Released: {row['released']}
    Score: {row['score']}
    Votes: {row['votes']}
    Director: {row['director']}
    Writer: {row['writer']}
    Star: {row['star']}
    """
    metadata = {"source": csv_path, "row": index, "name": row['name']} # Include 'name' in metadata
    doc = Document(page_content=content, metadata=metadata)
    documents.append(doc)

In [None]:
# Convert documents to embeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) # You may need to provide an API key here
vector_store = FAISS.from_documents(documents, embeddings)

In [None]:
# Create a retriever
retriever = vector_store.as_retriever()

In [None]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1, openai_api_key=OPENAI_API_KEY) # We are using gpt-4o-mini

In [None]:
# Define the RAG system
rag_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

In [None]:
# Example query
query = "Who directed The Shawshank Redemption?"
display_response(rag_chain.run(query))

```
The Shawshank Redemption was directed by Frank Darabont.
```