# Installs

In [None]:
%%time

from IPython.display import clear_output
!pip install langchain-community==0.2.15 langchain-chroma==0.1.3 langchain-text-splitters==0.2.2 langchain-huggingface==0.0.3 langchain-groq==0.1.9 unstructured==0.15.0 unstructured[pdf]==0.15.0 nltk==3.8.1
clear_output()

CPU times: user 607 ms, sys: 149 ms, total: 757 ms
Wall time: 56.2 s


In [None]:
!pip install pytesseract
!pip install pymupdf pdfplumber
clear_output()

# Imports

In [None]:

import warnings
warnings.filterwarnings("ignore")
import os

from langchain_community.document_loaders import UnstructuredPDFLoader, DirectoryLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

# Groq API

In [None]:
GROQ_API_KEY = "yours):""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"

In [None]:
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

# Load documents

In [None]:
directory_path = "/content/"

loader = DirectoryLoader(directory_path, glob="./*.pdf", loader_cls=UnstructuredPDFLoader)
documents = loader.load()

print(f"Nombre de documents chargés : {len(documents)}")

Nombre de documents chargés : 4


# Split documents

In [None]:
%%time
text_splitter = CharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=500
)

text_chunks = text_splitter.split_documents(documents)



CPU times: user 17.5 ms, sys: 1.04 ms, total: 18.5 ms
Wall time: 19.4 ms


In [None]:
persist_directory = "doc_db"

In [None]:
from IPython.display import clear_output

embedding = HuggingFaceEmbeddings()


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Create Embeddings

In [None]:
vectorstore = Chroma.from_documents(
    documents=text_chunks,
    embedding=embedding,
    persist_directory=persist_directory
)

In [None]:
retriever = vectorstore.as_retriever()

# Model

In [None]:
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0
)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# Test

In [None]:
query = " What are the main constraints faced by farmers in adopting drip irrigation?"
response = qa_chain.invoke({"query":query})

In [None]:
print(response)

{'query': ' What are the main constraints faced by farmers in adopting drip irrigation?', 'result': 'According to the provided information, the main constraints faced by farmers in adopting drip irrigation are:\n\n1. Non-availability of quality material\n2. No follow-up services by drip agencies\n3. High initial investment cost\n4. Lack of capital to cover maximum holding under drip irrigation\n5. Delay in sanction of loan\n6. Leakage in the present drip system', 'source_documents': [Document(metadata={'source': '/content/df1.pdf'}, page_content='Collected information were analysed and results are presented in the following table.\n\nTable 1. Benefits of drip irrigation\n\nS.\n\nBenefits of Drip irrigation farmers\n\nNo. expressing the advantages\n\nNo.\n\nNumber\n\nPer cent\n\n(n= 30)\n\n(%)\n\n1.\n\nSaving of water\n\n28\n\n93.33\n\n2.\n\nSaving of labour cost for irrigation\n\n22\n\n73.33\n\n3.\n\nUniform application\n\n27\n\n90.00\n\n4.\n\nImproved quality of produce\n\n20\n\n66.67

In [None]:
print(response["result"])

According to the provided information, the main constraints faced by farmers in adopting drip irrigation are:

1. Non-availability of quality material
2. No follow-up services by drip agencies
3. High initial investment cost
4. Lack of capital to cover maximum holding under drip irrigation
5. Delay in sanction of loan
6. Leakage in the present drip system


In [None]:
query = "How does the non-availability of quality materials affect drip irrigation adoption?"
response = qa_chain.invoke({"query":query})
print(response["result"])

According to the provided text, the non-availability of quality materials is one of the constraints encountered by the farmers who adopt drip irrigation. This constraint can affect drip irrigation adoption in several ways:

1. **Inadequate performance**: Low-quality materials can lead to poor performance of the drip irrigation system, resulting in reduced water application efficiency, uneven water distribution, and decreased crop yields.
2. **Increased maintenance**: Substandard materials may require more frequent maintenance, repairs, or replacement, which can be time-consuming and costly for farmers.
3. **Reduced system lifespan**: Poor-quality materials can lead to a shorter lifespan of the drip irrigation system, requiring farmers to invest in new equipment more frequently.
4. **Increased risk of clogging**: Low-quality drippers or emitters can clog more easily, reducing the system's efficiency and effectiveness.
5. **Decreased farmer confidence**: The non-availability of quality m

# Evaluation

In [None]:
query = "What is the capital of France?"
generated_answer = "The capital of France is Paris."
chunk = "Paris, the capital of France, is known for its art, fashion, and culture."


In [None]:
evaluation_prompt = f"""
Please evaluate the following response based on these criteria:
1. **Relevance**: Does the answer directly address the question?
2. **Coherence**: Is the answer logically structured and internally consistent?
3. **Accuracy**: Is the answer factually correct based on the context?
4. **Clarity**: Is the answer well-formulated and easy to understand?

**Question**: {query}
**Generated Answer**: {generated_answer}
**Supporting Chunk**: {chunk}

Provide a score from 1 (very poor) to 10 (excellent) for each criterion and explain your score.
"""



In [None]:
from langchain_groq import ChatGroq

# Initialize ChatGroq model
model = ChatGroq(model="llama-3.1-8b-instant", temperature=0)

# Send evaluation prompt to model, passing the prompt directly as a string.
response = model.invoke(evaluation_prompt) # Changed from {"query": evaluation_prompt} to evaluation_prompt

# Print the evaluation results
# Access the 'content' attribute of the AIMessage object instead of using indexing.
print(response.content)

Here's the evaluation of the response based on the given criteria:

1. **Relevance**: 10/10
The answer directly addresses the question, providing the correct information about the capital of France.

2. **Coherence**: 9/10
The answer is logically structured and internally consistent. However, the supporting chunk seems unrelated to the main answer and could be removed without affecting the coherence of the response.

3. **Accuracy**: 10/10
The answer is factually correct based on the context. Paris is indeed the capital of France.

4. **Clarity**: 9/10
The answer is well-formulated and easy to understand. However, the supporting chunk could be considered unnecessary and might make the response slightly less clear by adding an unrelated piece of information.

Overall, the response is strong, with high scores in relevance, accuracy, and clarity. The coherence score could be improved by removing the supporting chunk or making it more relevant to the main answer.


In [None]:
import json

# Load the JSON file containing queries and expected answers
with open("/content/test.json", "r") as file:
    data = json.load(file)

# Initialize your model (ChatGroq in this case)
model = ChatGroq(model="llama-3.1-8b-instant", temperature=0)

# Function to evaluate answers based on the queries and chunks
def evaluate_responses(data):
    for item in data['data']:
        query = item['query']
        expected_answer = item['expected_answer']
        chunk = item['chunk']

        # Prepare the prompt for the model
        evaluation_prompt = f"""
        Please evaluate the following response based on these criteria:
        1. **Relevance**: Does the answer directly address the question?
        2. **Coherence**: Is the answer logically structured and internally consistent?
        3. **Accuracy**: Is the answer factually correct based on the context?
        4. **Clarity**: Is the answer well-formulated and easy to understand?

        **Question**: {query}
        **Generated Answer**: {expected_answer}
        **Supporting Chunk**: {chunk}

        Provide a score from 1 (very poor) to 10 (excellent) for each criterion and explain your score.
        """

        # Send the evaluation request to the model
        response = model.invoke(evaluation_prompt)

        # Print out the evaluation result
        print(f"Evaluation for Query: {query}")
        print(response.content)
        print("-" * 80)

# Evaluate the responses in the loaded data
evaluate_responses(data)


Evaluation for Query: Who wrote *Don Quixote*?
**Relevance: 10**
The answer directly addresses the question, providing the name of the author of *Don Quixote*. The supporting chunk further reinforces this answer by providing additional context about the author.

**Coherence: 9**
The answer is logically structured and internally consistent. However, the supporting chunk could be more integrated with the answer, rather than being a separate statement. A more coherent approach would be to include the additional context within the answer itself.

**Accuracy: 10**
The answer is factually correct. Miguel de Cervantes is widely recognized as the author of *Don Quixote*. The supporting chunk also provides accurate information about the author.

**Clarity: 9**
The answer is well-formulated and easy to understand. However, the supporting chunk could be more concise and directly related to the answer. A more clear approach would be to integrate the supporting chunk into the answer, making it a si

# Save and Export the Model

In [None]:
"""import json

# First, get the necessary attributes of the model you want to save
model_config = {
    "model": model.model_name,  # Changed from model.model to model.model_name
    # Add other relevant attributes like temperature, etc. if needed
    "temperature": model.temperature
}

# Then, save the configuration as a JSON file:
with open("./saved_model_config.json", "w") as file:
    json.dump(model_config, file)

print("Model configuration saved successfully.")

# To reload the model:
with open("./saved_model_config.json", "r") as file:
    model_config = json.load(file)

# Create a new instance of ChatGroq using the saved configuration:
reloaded_model = ChatGroq(**model_config)  # ** unpacks the dictionary into keyword arguments

Model configuration saved successfully.


In [None]:
import shutil

shutil.make_archive("/content/doc_db", 'zip', "/content/doc_db")
print("Dossier compressé avec succès en /content/doc_db.zip")


Dossier compressé avec succès en /content/doc_db.zip


In [None]:
!pip install flask ngrok


Collecting ngrok
  Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ngrok
Successfully installed ngrok-1.4.0


In [None]:
from flask import Flask, request, jsonify
from threading import Thread
import time

app = Flask(__name__)

# Exemple de modèle de RAG (vous pouvez utiliser votre propre modèle ici)
def mock_qa_chain(query):
    # Cette fonction est une simulation d'une réponse générée par le modèle
    return {"result": f"Réponse à la question : {query}"}

@app.route("/ask", methods=["POST"])
def ask():
    # Récupérer la question envoyée par l'utilisateur
    query = request.json["query"]
    result = qa_chain(query)  # Remplacez cette ligne par votre modèle RAG
    return jsonify({"response": result["result"]})  # Ne retourne que la réponse

# Lancer l'application Flask dans un thread séparé
def run_app():
    app.run(host="0.0.0.0", port=5001)  # Changez ici le port de 5000 à 5001

thread = Thread(target=run_app)
thread.start()



# Attendre que l'application Flask démarre
time.sleep(5)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5001
 * Running on http://172.28.0.12:5001
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [None]:
from pyngrok import ngrok

# Ouvrir un tunnel ngrok sur le port 5001
public_url = ngrok.connect(5001)

# Afficher l'URL publique générée par ngrok
print(f"Votre API est maintenant accessible à l'adresse : {public_url}")


Votre API est maintenant accessible à l'adresse : NgrokTunnel: "https://34a5-34-147-79-96.ngrok-free.app" -> "http://localhost:5001"


In [None]:
import requests

# L'URL publique générée par ngrok
url = "http://34a5-34-147-79-96.ngrok-free.app/ask"

# La question à poser
query = {"query": "What drip irrigation?"}

# Envoyer la requête POST à l'API Flask
response = requests.post(url, json=query)

# Afficher la réponse de l'API
print(response.json())


INFO:werkzeug:127.0.0.1 - - [01/Dec/2024 04:09:27] "POST /ask HTTP/1.1" 200 -


{'response': 'According to the provided information, the main constraints faced by farmers in adopting drip irrigation are:\n\n1. Non-availability of quality material\n2. No follow-up services by drip agencies\n3. High initial investment cost\n4. Lack of capital to cover maximum holding under drip irrigation\n5. Delay in sanction of loan\n6. Leakage in the present drip system'}
