## 1. Installing Dependencies

In [None]:
!pip install pinecone-client

In [None]:
!pip install langchain

In [None]:
!pip install sentence-transformers

In [None]:
!pip install langchain_groq

In [None]:
!pip install groq

In [None]:
!pip install einops

In [None]:
!pip install nomic

In [None]:
!pip install numpy==1.24.1

In [None]:
!pip install gradio

## 2. Import Statements

In [None]:
import json
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer
import gradio as gr
import os
from groq import Groq
from langchain.chains import LLMChain
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq

## 3. Preprocessing Data

In [None]:
def remove_key_from_json_file(file_path, key_to_remove):
    # Read the JSON data from the file
    with open(file_path, 'r') as file:
        json_data = json.load(file)

    # Check if json_data is a list of objects
    if isinstance(json_data, list):
        for obj in json_data:
            if isinstance(obj, dict):
                # Remove the key from each JSON object
                obj.pop(key_to_remove, None)
    elif isinstance(json_data, dict):
        # If it's a single JSON object, remove the key directly
        json_data.pop(key_to_remove, None)

    # Write the updated JSON data back to the file
    with open(file_path, 'w') as file:
        json.dump(json_data, file, indent=4)

# Usage example
file_path = 'Alexander_Street_shareGPT_2.0.json' #Psych8k dataset
key_to_remove = 'instruction'

remove_key_from_json_file(file_path, key_to_remove)

## 4. Uploading Data to Pinecone

In [None]:
# Initialize Pinecone
pc = Pinecone(api_key="INSERT-PINECONE-API-KEY") #INSERT YOUR PINECONE API KEY
index_name = 'mindguardian'

if index_name not in pc.list_indexes():
    pc.create_index(
        index_name,
        dimension=768,
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

index = pc.Index(index_name)

In [None]:
# Load JSON data
with open('Alexander_Street_shareGPT_2.0.json', 'r') as file:
    dataset = json.load(file)

In [None]:
# Load embedding model
embedding_model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)

In [None]:
# Function to create dataset embeddings and store in Pinecone
def store_embedded_data_in_pinecone(dataset):
    count=0
    for data in dataset:
        merged_text = data['input'] + " " + data['output']
        embedding = embedding_model.encode(merged_text).tolist()
        index.upsert([(str(count), embedding, data)])
        count+=1

# Store the embedded data in Pinecone
store_embedded_data_in_pinecone(dataset)

## 5. Using Groq API for llama 3.1

In [None]:
os.environ["GROQ_API_KEY"] = "INSERT-GROQ-API-KEY" # INSERT YOUR GROQ API KEY

# Get Groq API key
groq_api_key = os.environ['GROQ_API_KEY']
llm_model = 'llama-3.1-70b-versatile'
# Initialize Groq Langchain chat object and conversation
groq_chat = ChatGroq(
        groq_api_key=groq_api_key,
        model_name=llm_model
)

## 6. LLM Code

In [None]:
embedding_model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)

In [None]:
pc = Pinecone(api_key="INSERT-PINECONE-API-KEY") #INSERT YOUR PINECONE API KEY
index_name = 'mindguardian'
index = pc.Index(index_name)

In [None]:
system_prompt = 'You are a expert mental health counseling chatbot named Mindguardian, You provide professional mental health counseling to users'
conversational_memory_length = 5 # number of previous messages the chatbot will remember during the conversation
memory = ConversationBufferWindowMemory(k=conversational_memory_length, memory_key="chat_history", return_messages=True)

In [None]:
# Function to query Pinecone
def query_pinecone(user_query):
    # Generate the query vector from the user's input
    query_vector = embedding_model.encode(user_query).tolist()

    # Query Pinecone for the top 5 similar vectors
    response = index.query(vector=query_vector, top_k=5, include_metadata=True)
    return response['matches']

In [None]:
def query_llm(user_question,_):
    context = query_pinecone(user_question)

    # Construct a chat prompt template using various components
    prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessage(
                content=system_prompt
            ),  # This is the persistent system prompt that is always included at the start of the chat.

            MessagesPlaceholder(
                variable_name="chat_history"
            ),  # This placeholder will be replaced by the actual chat history during the conversation. It helps in maintaining context.

            # Include the retrieved context in the prompt
            SystemMessage(
                content=f"Use this context only if relevant to user query: {context}"
            ),

            HumanMessagePromptTemplate.from_template(
                "User query: {human_input}"
            ),  # This template is where the user's current input will be injected into the prompt.
        ]
    )

    # Create a conversation chain using the LangChain LLM (Language Learning Model)
    conversation = LLMChain(
        llm=groq_chat,  # The Groq LangChain chat object initialized earlier.
        prompt=prompt,  # The constructed prompt template.
        verbose=False,   # TRUE Enables verbose output, which can be useful for debugging.
        memory=memory,  # The conversational memory object that stores and manages the conversation history.
    )
    # The chatbot's answer is generated by sending the full prompt to the Groq API.
    response = conversation.predict(human_input=user_question)

    return response

## 7. Gradio UI

In [None]:
default_message = """I'm MindGuardian, a mental health counseling chatbot. How can I help you?"""

# Gradio Interface
gradio_interface = gr.ChatInterface(
        query_llm,
        chatbot=gr.Chatbot(value=[[None, default_message]]),
        textbox=gr.Textbox(placeholder="Type your query", container=False, scale=7),
        title="Mindguardian, a mental health counseling chatbot",
        #description=f"",
        theme='gradio/base', # themes at https://huggingface.co/spaces/gradio/theme-gallery
        retry_btn=None,
        undo_btn="Delete Previous",
        clear_btn="Clear",
)

# Launch the interface
gradio_interface.launch()

In [None]:
gradio_interface.close()