## Installations

In [1]:
!pip install langchain
!pip install sentence_transformers
!pip install faiss-gpu
!pip install google-generativeai
!pip install --upgrade --quiet langchain-google-genai
!pip install -q -U google-generativeai
!pip install openai==0.28
!pip install faiss-cpu
!pip install sentence-transformers

[31mERROR: Could not find a version that satisfies the requirement faiss-gpu (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for faiss-gpu[0m[31m


In [2]:
pip install --upgrade pip

Collecting pip
  Using cached pip-24.0-py3-none-any.whl.metadata (3.6 kB)
Using cached pip-24.0-py3-none-any.whl (2.1 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.3.1
    Uninstalling pip-23.3.1:
      Successfully uninstalled pip-23.3.1
Successfully installed pip-24.0
Note: you may need to restart the kernel to use updated packages.


## Importing the packages

In [1]:
from langchain.chains import RetrievalQA
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.llms import OpenAI

## Data Loading

In [2]:


loader = CSVLoader(file_path='Dataset/dataset.csv', source_column="Question")
data = loader.load()

## Instructor embeddings


In [3]:


instructor_embeddings = HuggingFaceBgeEmbeddings(model_name="hkunlp/instructor-large")

e = instructor_embeddings.embed_query("Kia German zuban systematic hai?")

In [4]:
len(e)

768

In [5]:
type(e)

list

In [6]:
print(e[:10])

[-0.008809738792479038, -0.008304326795041561, -0.00048444615094922483, 0.018991028890013695, -0.005874128080904484, 0.04169213026762009, -0.0023592321667820215, 0.03036239556968212, -0.05335968732833862, 0.02770894020795822]


## FAISS Vector Database

In [7]:

vectordb = FAISS.from_documents(documents=data, embedding=instructor_embeddings)

retriever = vectordb.as_retriever()

In [8]:
question = "German or english ma kya farq ha?"
rdocs = retriever.get_relevant_documents(question)

In [9]:
rdocs

[Document(page_content="Question: Kya English or German me saal ka hawala dete hue koi farq hai?\nAnswer: Haan, English or German me saal ka hawala dete hue kafi farq hai. English me 'in' ka istemal hota hai jabki German me nahi.", metadata={'source': 'Kya English or German me saal ka hawala dete hue koi farq hai?', 'row': 1737}),
 Document(page_content='Question: Kia German zuban mein English zuban se koi farq hai?\nAnswer: Jee han, German zuban English se mukhtalif hai. Kuch basic principles ko samajhna aapke liye helpful ho sakta hai.', metadata={'source': 'Kia German zuban mein English zuban se koi farq hai?', 'row': 2}),
 Document(page_content='Question: German language ki kaunsi khasiyat ke bare mein bataya gaya hai?\nAnswer: German language ki khasiyat ye hai ke hum noun ka kirdar uske akhir aur article ke form se jaan sakte hain.', metadata={'source': 'German language ki kaunsi khasiyat ke bare mein bataya gaya hai?', 'row': 36}),
 Document(page_content='Question: Paragraph ke 

## OPEN AI GPT MODEL 

In [10]:
import openai

# openai.api_key = ''


def answer_question_based_on_context(context, question):
  response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
          {"role": "system", "content": """Given the following context and a question, generate an answer based on this context only.
In the answer try to provide as much text as possible from "Answer" section in the source document context without making much changes.
If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer. First understand the question and then generate the answer kindly."""},
          {"role": "user", "content": f"Context: {context}\nQuestion: {question}"}
      ],
      temperature=1,
      max_tokens=150,
      top_p=1.0,
      frequency_penalty=0.0,
      presence_penalty=0.0
  )
  # Extracting the text from the last response in the chat
  if response.choices:
      last_choice = response.choices[-1]
      answer = last_choice.message['content'].strip()
      return answer

## Testing

In [11]:
question = "German mein naye ism (noun) ko kaise seekhna chahiye?"
rdocs = retriever.get_relevant_documents(question)
context = str(rdocs[0])
answer = answer_question_based_on_context(context, question)
print(answer)

Answer: Paragraph ke mutabiq, jab aap German mein ek naya ism (noun) seekhte hain, to aapko hamesha uske jins (gender) ke sath seekhna chahiye.


## Flask Application

In [13]:
pip install Flask

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [14]:
from flask import Flask, request, jsonify
import openai

app = Flask(__name__)

# openai.api_key = 'YOUR_API_KEY_HERE'

@app.route('/answer', methods=['POST'])
def answer_question_based_on_context():
    data = request.json  # Assumes the question is sent as JSON
    context = data.get('context')
    question = data.get('question')
    
    if not context or not question:
        return jsonify({"error": "Missing context or question"}), 400

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": """Given the following context and a question, generate an answer based on this context only. In the answer try to provide as much text as possible from "Answer" section in the source document context without making much changes. If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer. First understand the question and then generate the answer kindly."""},
                {"role": "user", "content": f"Context: {context}\nQuestion: {question}"}
            ],
            temperature=1,
            max_tokens=150,
            top_p=1.0,
            frequency_penalty=0.0,
            presence_penalty=0.0
        )

        if response.choices:
            last_choice = response.choices[-1]
            answer = last_choice.message['content'].strip()
            return jsonify({"answer": answer})

    except Exception as e:
        return jsonify({"error": str(e)}), 500

    return jsonify({"error": "Failed to generate an answer"}), 500

if __name__ == '__main__':
    app.run(debug=True, port=5005)  # Turn off debug mode in production


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5005
[33mPress CTRL+C to quit[0m
 * Restarting with stat
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/jadi/anaconda3/envs/fyp-work/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/jadi/anaconda3/envs/fyp-work/lib/python3.11/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/home/jadi/anaconda3/envs/fyp-work/lib/python3.11/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/jadi/anaconda3/env

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
## Testing

In [15]:
tests = [
    {
    "question": "kia german zuban systematic hai?",
    "answer": "Jee han, German zuban systematic hai aur rules ko follow karti hai."
    },
    {
        "question": "German zuban Samajhna mushkil ha?",
        "answer": "Agar aap German zuban ko step by step samajhte hain to yeh aap soch rahe hain se ziada asan ho sakti hai."
,   },
    {
        "question": "German mein umlauts ka istemal lafz ki ma'ani ko kaise badal deta hai?",
        "answer": "German mein umlauts ka istemal lafz ki talaffuz aur ma'ani dono ko badal deta hai."
    },
    {
        "question": "Verb ki koi misalein btaoo",
        "answer": "Verb ki misalein hain: 'Wo gussa hai', 'Wo wahan rehti hai', 'Wo mujhse mohabbat karte hain.'"
    },
    {
        "question": "Kaise verbs ko English mein rakha jata hai?",
        "answer": "English mein, verbs ko sirf mojooda zamane mein teesray shakhs ki wahid form ke siwa kisi bhi form mein nahin rakha jata."
    },
]

# Function to calculate accuracy based on correct answers
def calculate_accuracy(tests):
    total_correct = 0
    total_tests = len(tests)

    for test in tests:
        # question = "German mein naye ism (noun) ko kaise seekhna chahiye?"
        question = test["question"]
        rdocs = retriever.get_relevant_documents(question)
        context = str(rdocs[0])
        
        expected_answer = test["answer"]
        generated_answer = answer_question_based_on_context(context, question)
        print(expected_answer, generated_answer)
        if generated_answer == expected_answer:
            total_correct += 1

    accuracy = total_correct / total_tests
    return accuracy

# Calculate and print accuracy
accuracy = calculate_accuracy(tests)
print("Accuracy:", accuracy)



Jee han, German zuban systematic hai aur rules ko follow karti hai. Answer: Jee han, German zuban systematic hai aur rules ko follow karti hai.
Agar aap German zuban ko step by step samajhte hain to yeh aap soch rahe hain se ziada asan ho sakti hai. I don't know.
German mein umlauts ka istemal lafz ki talaffuz aur ma'ani dono ko badal deta hai. Answer: German mein umlauts ka istemal lafz ki talaffuz aur ma'ani dono ko badal deta hai.
Verb ki misalein hain: 'Wo gussa hai', 'Wo wahan rehti hai', 'Wo mujhse mohabbat karte hain.' Answer: Verb ki misalein hain: 'Wo gussa hai', 'Wo wahan rehti hai', 'Wo mujhse mohabbat karte hain.'
English mein, verbs ko sirf mojooda zamane mein teesray shakhs ki wahid form ke siwa kisi bhi form mein nahin rakha jata. Answer: English mein, verbs ko sirf mojooda zamane mein teesray shakhs ki wahid form ke siwa kisi bhi form mein nahin rakha jata.
Accuracy: 0.0
