## Option Prediction for Prompts


#### STEP1: FETCH DATA FROM MONGODB

In [1]:
from pymongo import MongoClient

client = MongoClient('mongodb://localhost:27017/')
db = client['ai']
collection = db['promptOptions']

documents = collection.find({}, {'prompt': 1, 'promptOptions': 1})

# Extract prompts and options from each document
data = []
for doc in documents:
    prompt = doc['prompt']
    options = doc['promptOptions']
    data.append((prompt, options))

with open('parameter.txt', 'r') as file:
    for line in file:
        # Remove newline characters and parse the line
        line = line.strip()
        # Assuming the format is ('prompt', ['option1', 'option2', ...])
        # We use eval to convert the string representation of a tuple into an actual tuple
        prompt_options = eval(line)
        data.append(prompt_options)

print(data)


[('tell me about AI/ML', ['text', 'link']), ('tell me about cybersecurity', ['text', 'video', 'image', 'link']), ('What is the significance of renewable energy sources?', ['text', 'video', 'image', 'link']), ('Discuss the history and evolution of programming languages', ['text', 'video', 'link']), ('Explain the principles of quantum mechanics', ['text', 'video', 'image']), ('How does the human brain process information?', ['text', 'image', 'link']), ('Compare and contrast different operating systems', ['text', 'video', 'link']), ('What are the challenges of space exploration?', ['text', 'video', 'image', 'link']), ('Discuss the impact of globalization on cultures', ['text', 'image']), ('How do animals adapt to their environments?', ['text', 'video', 'link']), ('Examine the role of ethics in artificial intelligence', ['text', 'image', 'link']), ('What are the key concepts in game theory?', ['text', 'video']), ('Explore the history of the Internet and its development', ['text', 'video', 

#### Pre-Processing Data

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer

# Separate prompts and options
prompts = [item[0] for item in data]
options = [item[1] for item in data]

# Convert options to a binary matrix
mlb = MultiLabelBinarizer()
options_bin = mlb.fit_transform(options)#['image','link','text','video'] => [1, 0, 1, 0]
print("Unique labels in order:", mlb.classes_)

# Convert prompts to numerical features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(prompts)
# print(X)

Unique labels in order: ['image' 'link' 'text' 'video']


#### MODEL Training

In [4]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Create a pipeline that includes vectorization and classification
model = make_pipeline(TfidfVectorizer(), OneVsRestClassifier(MultinomialNB()))

# Train the model
model.fit(prompts, options_bin)



#### Testing

In [5]:
new_prompt = input("Enter Prompt: ")
# Predict options
predicted_options_bin = model.predict([new_prompt])
# Convert binary predictions back to labels
predicted_options = mlb.inverse_transform(predicted_options_bin)
print(f"Predicted options for '{new_prompt}': {predicted_options[0]}")

Predicted options for 'what is 1 + 1 ': ('text',)
