In [1]:
!pip install openai==0.28.1
!pip install tiktoken==0.6.0
!pip install langchain==0.1.20
!pip install chromadb==0.5.0
!pip install faiss-cpu



In [2]:
import openai
import numpy as np
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from openai.embeddings_utils import get_embedding
import faiss
import warnings
import os
warnings.filterwarnings("ignore")

In [6]:
os.environ['OPENAI_API_KEY'] = ''

In [7]:
openai.api_key = ''

In [8]:
System_Prompt = """
Role:
You are a knowledgeable and helpful Training Program Specialist. You assist users by providing insights and guidance about training programs using the dataset of training sessions, participants, trainers, and associated metrics.

Intent:
Your primary objective is to answer user queries, provide insights about training sessions, assist with data interpretation, and guide decision-making related to training programs based on the dataset. Always provide concise, accurate, and context-aware responses.

Context:
The dataset contains detailed information about training sessions, including topics, trainers, participants, schedules, costs, feedback scores, and certifications. Use this data to respond to queries about training schedules, participant performance, training costs, and trainer effectiveness. If the dataset lacks the information required to answer, politely explain its limitations and suggest possible alternatives for acquiring the information.

Capabilities:

    Retrieve specific information from the dataset (e.g., participant details, training costs, completion scores).
    Summarize trends or provide comparative insights (e.g., identifying high-performing participants or the most cost-effective training sessions).
    Assist in planning or optimizing future training sessions based on available data.
    Handle follow-up questions seamlessly and maintain context throughout the conversation.

End Goals:

    Ensure users receive actionable and reliable information to improve their training programs.
    Help identify areas for improvement in training sessions (e.g., low feedback scores or high costs).
    Foster better understanding of the dataset and empower users to make data-driven decisions.
    Maintain a user-friendly and professional tone to build trust and enhance user satisfaction.

Example Responses:

    User: "What is the average completion score for training sessions?"
    AI: "Based on the dataset, the average completion score for training sessions is 87.5%. This indicates a high level of participant achievement overall."

    User: "Which training methods received the highest feedback scores?"
    AI: "In the dataset, the 'In-Person' training method received the highest average feedback score of 4.8 out of 5."

    User: "Can you provide the cost of the Data Analytics training session?"
    AI: "The cost of the 'Data Analytics' training session is $450, as listed in the dataset.""
"""

In [9]:
struct = [{"role": "system", "content": System_Prompt}]

In [10]:
dataframed = pd.read_csv('https://raw.githubusercontent.com/ddpasiliao/AI_First_Day_4/refs/heads/main/training_sample_data_fixed.csv')

In [11]:
dataframed.head()

Unnamed: 0,Training_ID,Participant_Name,Trainer_Name,Training_Topic,Start_Date,End_Date,Status,Duration_Hours,Completion_Score,Feedback_Score,Training_Method,Trainer_Organization,Training_Location,Cost (USD),Certificate_Issued
0,2001,Alice Brown,John Doe,Leadership Skills,01/11/2024,17/11/2024,Completed,30,80.0,3.5,In-Person,Leadership Academy,Virtual,246.81,Yes
1,2002,Bob White,Jane Smith,Data Analytics,27/10/2024,19/11/2024,Scheduled,10,90.0,,In-Person,Data Insights Ltd,"New York, NY",735.25,No
2,2003,Charlie Black,Sarah Lee,Project Management,26/10/2024,10/11/2024,Scheduled,20,80.0,4.8,Online,PM Institute,"Chicago, IL",642.31,No
3,2004,Diane Green,Tom Harris,Software Development,07/11/2024,19/11/2024,In Progress,30,80.0,3.5,In-Person,Code Experts Inc,"San Francisco, CA",698.36,Yes
4,2005,Evan Davis,Ella Clark,Machine Learning,06/11/2024,19/11/2024,Scheduled,10,80.0,4.0,Online,Marketing Masters,Virtual,499.38,No


In [12]:
dataframed['combined'] = dataframed.apply(lambda row : ' '.join(row.values.astype(str)), axis = 1)

In [13]:
documents = dataframed['combined'].tolist()

In [14]:
embeddings = [get_embedding(doc, engine = "text-embedding-3-small") for doc in documents]

In [15]:
embedding_dim = len(embeddings[0])

In [16]:
embeddings_np = np.array(embeddings).astype('float32')

In [17]:
index = faiss.IndexFlatL2(embedding_dim)

In [18]:
index.add(embeddings_np)

In [19]:
user_message = "Hello my name is Michael Brown, Who is the trainer for Data Analytics?"

In [20]:
query_embedding = get_embedding(user_message, engine='text-embedding-3-small')
query_embedding_np = np.array([query_embedding]).astype('float32')

In [21]:
_, indices = index.search(query_embedding_np, 2)

In [22]:
retrieved_docs = [documents[i] for i in indices[0]]

In [23]:
context = ' '.join(retrieved_docs)

In [24]:
structured_prompt = f"Context:\n{context}\n\nQuery:\n{user_message}\n\nResponse:"

In [25]:
print(structured_prompt)

Context:
2002 Bob White Jane Smith Data Analytics 27/10/2024 19/11/2024 Scheduled 10 90.0 nan In-Person Data Insights Ltd New York, NY 735.25 No 2019 Samantha Wood Aaron Black Agile Frameworks 30/10/2024 17/11/2024 Completed 20 80.0 4.5 Hybrid Marketing Masters Houston, TX 375.91 No

Query:
Hello my name is Michael Brown, Who is the trainer for Data Analytics?

Response:


In [26]:
print(user_message)

Hello my name is Michael Brown, Who is the trainer for Data Analytics?


In [27]:
chat =  openai.ChatCompletion.create(model = "gpt-4o-mini", messages = struct + [{"role": "user", "content" : structured_prompt}], temperature=0.5, max_tokens=1500, top_p=1, frequency_penalty=0, presence_penalty=0)
struct.append({"role": "user", "content": user_message})
response = chat.choices[0].message.content
struct.append({"role": "assistant", "content": response})

In [28]:
print(response)

Hello Michael! The trainer for the Data Analytics session scheduled from October 27, 2024, to November 19, 2024, is Bob White. If you have any more questions or need further information, feel free to ask!
