In [4]:
from git import Repo
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq
from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory

In [5]:
!mkdir test_repo

In [6]:
repo_path= "test_repo/"
repo= Repo.clone_from("https://github.com/chetanp2002/Food-Delivery-Prediction", to_path=repo_path)

In [7]:
loader= GenericLoader.from_filesystem(repo_path,
                                      glob="**/*",
                                      suffixes=[".py"],
                                      parser=LanguageParser(language=Language.PYTHON, parser_threshold=500))

In [8]:
documents= loader.load()

In [9]:
documents

[Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='import pickle\nimport pandas as pd\nimport streamlit as st\nimport numpy as np\nfrom geopy.distance import geodesic\nfrom sklearn.preprocessing import LabelEncoder\nimport requests\n\n\nwith open(\'models/scaler.pkl\', \'rb\') as f:\n    scaler = pickle.load(f)\n\nwith open(\'models/model.pkl\', \'rb\') as f:\n    model = pickle.load(f)\n\n\n\ndef extract_column_value(df):\n    # #Extract time and convert to int\n    # df[\'Time_taken(min)\'] = df[\'Time_taken(min)\'].apply(lambda x: int(x.split(\' \')[1].strip()))\n    #Extract Weather conditions\n    # df[\'Weather_conditions\'] = df[\'Weather_conditions\'].apply(lambda x: x.split(\' \')[1].strip())\n    #Extract city code from Delivery person ID\n    df[\'City_code\']=df[\'Delivery_person_ID\'].str.split("RES", expand=True)[0]\n    \n\n\n\n\ndef extract_date_features(data):\n    data["day"] = data.Order_Date.dt.day\n    data["m

In [10]:
document_splitter= RecursiveCharacterTextSplitter.from_language(language=Language.PYTHON,
                                                                chunk_size= 500,
                                                                chunk_overlap= 20)

In [11]:
texts= document_splitter.split_documents(documents)

In [12]:
texts

[Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content="import pickle\nimport pandas as pd\nimport streamlit as st\nimport numpy as np\nfrom geopy.distance import geodesic\nfrom sklearn.preprocessing import LabelEncoder\nimport requests\n\n\nwith open('models/scaler.pkl', 'rb') as f:\n    scaler = pickle.load(f)\n\nwith open('models/model.pkl', 'rb') as f:\n    model = pickle.load(f)"),
 Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='def extract_column_value(df):\n    # #Extract time and convert to int\n    # df[\'Time_taken(min)\'] = df[\'Time_taken(min)\'].apply(lambda x: int(x.split(\' \')[1].strip()))\n    #Extract Weather conditions\n    # df[\'Weather_conditions\'] = df[\'Weather_conditions\'].apply(lambda x: x.split(\' \')[1].strip())\n    #Extract city code from Delivery person ID\n    df[\'City_code\']=df[\'Delivery_person_ID\'].str.split("RES", expand=True)[0]'),
 Doc

In [13]:
len(texts)

26

In [14]:
from dotenv import load_dotenv
load_dotenv()

HF_TOKEN= os.getenv('HF_TOKEN')

In [15]:
os.environ['HF_TOKEN']= HF_TOKEN

In [16]:
groq_api_key= os.getenv('GROQ_API_KEY')

In [17]:
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory='./db')

In [19]:
vectordb.persist()

  vectordb.persist()


In [20]:
llm = ChatGroq(model='gemma2-9b-it', groq_api_key=groq_api_key)

In [21]:
memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)

  memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)


In [22]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":8}), memory=memory)

In [23]:
question = "what is extract_column_value funtion?"

In [24]:
result= qa(question)
print(result['answer'])

  result= qa(question)


The `extract_column_value` function is designed to extract specific pieces of information from columns in a Pandas DataFrame. 

Here's a breakdown based on the provided code:

1. **Purpose:**  The function's primary goal is to manipulate data within columns of a DataFrame to make it more suitable for analysis or modeling.

2. **Functionality:**

   - **Time Extraction:**  It attempts to extract time information from a column likely named 'Time_taken(min)' by:
     - Splitting the string at the space character (' ').
     - Taking the second part of the split (index 1), removing any leading/trailing whitespace, and converting it to an integer.

   - **Weather Condition Extraction:** It aims to extract weather conditions from a column named 'Weather_conditions' by:
     - Splitting the string at the space character.
     - Taking the second part of the split, removing leading/trailing whitespace.

   - **City Code Extraction:** It extracts a city code from a column called 'Delivery_perso