In [4]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import gradio as gr

# Step 1: Create mock DataFrame
data = {
    'document_id': [1, 2, 3],
    'text_chunk': [
        "This is the first document about AI.",
        "This document discusses machine learning and its applications.",
        "The third document covers natural language processing techniques."
    ]
}
df = pd.DataFrame(data)

# Step 2: Initialize your model and embed the document chunks
model = SentenceTransformer('all-MiniLM-L6-v2')
df['embeddings'] = df['text_chunk'].apply(lambda x: model.encode(x))

# Step 3: Define the search function
def search(query, df, top_n=2):
    query_embedding = model.encode(query)
    df['similarity'] = df['embeddings'].apply(lambda x: util.cos_sim(x, query_embedding).item())
    top_results = df.nlargest(top_n, 'similarity')
    return top_results['text_chunk'].tolist()

# Step 4: Combine query with top search results
def combine_query_with_results(query, top_chunks):
    combined_input = query + "\n" + "\n".join(top_chunks)
    return combined_input

# Step 5: Generate answers (placeholder function)
def generate_answer(combined_input):
    # Replace this with your generative model call
    return f"Generated answer based on the input: {combined_input}"

# Gradio Interface Function
def qa_system(user_query):
    top_chunks = search(user_query, df)
    combined_input = combine_query_with_results(user_query, top_chunks)
    answer = generate_answer(combined_input)
    return answer

# Step 6: Create Gradio interface
iface = gr.Interface(
    fn=qa_system,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text",
    title="Document QA System",
    description="Ask a question and get answers based on the provided documents."
)

# Step 7: Launch the Gradio app
iface.launch()


  from tqdm.autonotebook import tqdm, trange


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [7]:
from dotenv import load_dotenv
load_dotenv()
import os 
openapi_key = os.environ['OPENAI_API_KEY']

In [None]:
import openai
from langsmith.wrappers import wrap_openai
from langsmith import traceable

client = wrap_openai(openai.Client())


In [None]:
from langsmith import Client
from dotenv import load_dotenv
import os
api_key = os.environ['OPENAI_API_KEY']

client = Client(api_key=api_key)


client = Client()

# Define dataset: these are your test cases
dataset_name = "QA Example Dataset"
dataset = client.create_dataset(dataset_name)
client.create_examples(
    inputs=[
        {"question": "What is LangChain?"},
        {"question": "What is LangSmith?"},
        {"question": "What is OpenAI?"},
        {"question": "What is Google?"},
        {"question": "What is Mistral?"},
    ],
    outputs=[
        {"answer": "A framework for building LLM applications"},
        {"answer": "A platform for observing and evaluating LLM applications"},
        {"answer": "A company that creates Large Language Models"},
        {"answer": "A technology company known for search"},
        {"answer": "A company that creates Large Language Models"},
    ],
    dataset_id=dataset.id,
)

In [None]:
from langchain.output_parsers import DatetimeOutputParser,CommaSeparatedListOutputParser , PydanticOutputParser

date_time_parser = DatetimeOutputParser()