## Dataset Selection
I chose the Character Descriptions dataset because it contains fictional profiles. It is ideal for demonstrating how a custom chatbot can answer questions specifically about fictional characters, ensuring more accurate, dataset-specific responses compared to a general model.

In [1]:
# Install needed packages if not already installed
# pip install pandas openai numpy scikit-learn
import pandas as pd
import numpy as np
import openai
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
# API Key Setup
openai.api_base = "https://openai.vocareum.com/v1"
openai.api_key = ""

In [3]:
# TODO 1: Choose a Dataset and Explain the Scenario

"""
I chose the "Character Descriptions" dataset, which contains character profiles from TV shows, films, and theater productions. This dataset is ideal for building a chatbot that can answer questions about different fictional characters because it contains rich, varied descriptions that can be used to ground responses.
"""

'\nI chose the "Character Descriptions" dataset, which contains character profiles from TV shows, films, and theater productions. This dataset is ideal for building a chatbot that can answer questions about different fictional characters because it contains rich, varied descriptions that can be used to ground responses.\n'

In [4]:
# TODO 2: Prepare the Dataset for the Custom Query Process

# Load the dataset
df = pd.read_csv("./data/character_descriptions.csv")

# Display the first few rows to verify
df.head()

# Check the number of rows
print(f"Total number of rows: {len(df)}")

# Ensure the dataframe has a 'text' column
if 'text' not in df.columns:
    # Assuming the descriptions are in a column called 'description'
    df = df.rename(columns={df.columns[0]: 'text'})

# Final check
df.head()

Total number of rows: 55


Unnamed: 0,text,Description,Medium,Setting
0,Emily,"A young woman in her early 20s, Emily is an as...",Play,England
1,Jack,"A middle-aged man in his 40s, Jack is a succes...",Play,England
2,Alice,"A woman in her late 30s, Alice is a warm and n...",Play,England
3,Tom,"A man in his 50s, Tom is a retired soldier and...",Play,England
4,Sarah,"A woman in her mid-20s, Sarah is a free-spirit...",Play,England


In [5]:
# TODO 3: Perform the Custom Query Process

# Step 1: Create the TF-IDF Matrix
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["text"])

# Step 2: Define a function for custom querying
def get_custom_response(user_question):
    # Add user question to the corpus
    temp_corpus = df["text"].tolist() + [user_question]

    # Recreate the TF-IDF Matrix
    temp_vectorizer = TfidfVectorizer(stop_words="english")
    temp_tfidf_matrix = temp_vectorizer.fit_transform(temp_corpus)

    # Calculate Similarity Scores
    temp_cosine_similarities = cosine_similarity(temp_tfidf_matrix[-1], temp_tfidf_matrix[:-1])

    # Find the most relevant text
    most_similar_idx = temp_cosine_similarities.argsort()[0][-1]
    relevant_text = df.iloc[most_similar_idx]["text"]

    # Create a custom prompt
    custom_prompt = f"Use the following character description to answer the user's question:\n\n{relevant_text}\n\nUser question: {user_question}\n\nAnswer:"

    # Generate a response
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=custom_prompt,
        max_tokens=200,
        temperature=0.5
    )

    return response.choices[0].text.strip()


In [None]:
def get_custom_response(question):
    custom_prompt = f"Answer the following question about fictional characters: {question}"
    
    try:
        response = openai.ChatCompletion.create(  # Use v1/chat/completions endpoint
            model="gpt-3.5-turbo",  # Or any available chat model
            messages=[
                {"role": "system", "content": "You are a helpful assistant that answers questions about fictional characters."},
                {"role": "user", "content": custom_prompt}
            ],
            max_tokens=200,
            temperature=0.7
        )
        # Debug output in case of structure issues
        print("Raw API response:", response)

        # Safely extract the answer
        if 'choices' in response and len(response['choices']) > 0:
            return response['choices'][0]['message']['content'].strip()
        else:
            return "No response received from OpenAI."

    except Exception as e:
        print("OpenAI API Error:", e)
        return "Error: Unable to fetch a response."


# Basic (no context) prompt
basic_question_1 = "Tell me about a character who is a wizard."
basic_question_2 = "Who is a famous detective in literature?"

# Custom prompt using function
custom_answer_1 = get_custom_response(basic_question_1)
custom_answer_2 = get_custom_response(basic_question_2)

# Display results
print("Basic Prompt Answer 1:")
print("----------------------")
print(basic_question_1)
print("\nCustom Prompt Answer 1:")
print(custom_answer_1)
print("\n============================\n")

print("Basic Prompt Answer 2:")
print("----------------------")
print(basic_question_2)
print("\nCustom Prompt Answer 2:")
print(custom_answer_2)


Raw API response: {
  "error": {
    "message": "This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?",
    "type": "invalid_request_error",
    "param": "model",
    "code": null
  }
}
Raw API response: {
  "error": {
    "message": "This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?",
    "type": "invalid_request_error",
    "param": "model",
    "code": null
  }
}
Basic Prompt Answer 1:
----------------------
Tell me about a character who is a wizard.

Custom Prompt Answer 1:
No response received from OpenAI.


Basic Prompt Answer 2:
----------------------
Who is a famous detective in literature?

Custom Prompt Answer 2:
No response received from OpenAI.
