In [30]:
import langchain
import huggingface_hub
import os
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain_groq import ChatGroq
import pypdf
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain_community.document_loaders import DirectoryLoader
from transformers import RobertaTokenizer, RobertaModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
api_key="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"

In [24]:
model1="qwen-2.5-32b"
model2="qwen-2.5-coder-32b"

## testing LLM

In [25]:
llm=ChatGroq(
    api_key=api_key,
    model=model2,
    temperature=0
)

In [28]:
op=llm.invoke("Hi i will send you the some part of the code you need to correct the syntax"
"sytem.out.println('hi')")

In [29]:
print(op.content)

Certainly! It looks like you're trying to print "hi" to the console in Java, but there are a couple of syntax issues in your code. Here's the corrected version:

```java
System.out.println("hi");
```

Changes made:
1. Changed the single quotes around "hi" to double quotes. In Java, strings are enclosed in double quotes, while single quotes are used for characters.
2. Ensured that the method `println` is called on `System.out` with the correct syntax.

If you have any more parts of the code that need correction, feel free to share!


## testing embeding models

In [None]:
tokenizer = RobertaTokenizer.from_pretrained("microsoft/graphcodebert-base")
model = RobertaModel.from_pretrained("microsoft/graphcodebert-base")

In [46]:
from typing import List
from langchain.embeddings.base import Embeddings
from transformers import AutoModel, AutoTokenizer
import torch

class CodeBERTEmbeddings(Embeddings):
    def __init__(self):
        self.model_name = "microsoft/codebert-base"
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModel.from_pretrained(self.model_name)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        embeddings = []
        for text in texts:
            inputs = self.tokenizer(
                text, 
                return_tensors="pt", 
                padding=True, 
                truncation=True, 
                max_length=512
            ).to(self.device)
            
            with torch.no_grad():
                outputs = self.model(**inputs)
            
            # Use mean pooling for sentence embedding
            token_embeddings = outputs.last_hidden_state
            sentence_embedding = token_embeddings.mean(dim=1).squeeze()
            embeddings.append(sentence_embedding.cpu().numpy().tolist())
        
        return embeddings

    def embed_query(self, text: str) -> List[float]:
        return self.embed_documents([text])[0]

In [51]:
os.environ["HUGGINGFACEHUB_API_TOKEN"]="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"

In [None]:
codebert = CodeBERTEmbeddings()

code_embeddings = codebert.embed_documents([])

In [32]:
from langchain_huggingface import HuggingFaceEmbeddings

In [None]:
embeddings=HuggingFaceEmbeddings(
    model_name="jinaai/jina-embeddings-v2-base-code"
    )

In [69]:
from sentence_transformers import SentenceTransformer


In [71]:
model = SentenceTransformer('microsoft/codebert-base', use_auth_token=False)

No sentence-transformers model found with name microsoft/codebert-base. Creating a new one with mean pooling.


In [72]:
# Custom class that implements the Embeddings interface
class CodeBertEmbeddings(Embeddings):
    def __init__(self, model):
        self.model = model

    def embed_documents(self, code_chunks: list):
        embeddings = self.model.encode(code_chunks)  # Shape (n, 768) where n is the number of code chunks
        return embeddings.tolist()  # Convert numpy array to list of lists

    def embed_query(self, query: str):
        return self.model.encode([query])[0].tolist()  # Embed a single query string

# Create an instance of the custom embeddings class
code_bert_embeddings = CodeBertEmbeddings(model)

In [49]:
print(os.getenv("HUGGINGFACEHUB_API_TOKEN"))

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


## testing vectorDb

In [54]:
from qdrant_client import QdrantClient
from langchain_qdrant import QdrantVectorStore
from qdrant_client.http.models import VectorParams, Distance

In [56]:
qdrant_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.TBwdW04b7GD9gmouLOlPEyOZNtuJji-9cfmtRg4zAvo"
end_point="https://c510483d-1bc3-42c3-8645-3bf7698b142d.us-east4-0.gcp.cloud.qdrant.io"

In [57]:
qdrant_client = QdrantClient(
    url=end_point, 
    api_key=qdrant_key
)

In [84]:
# qdrant_client.create_collection(
#     collection_name="test1",
#     vectors_config=VectorParams(size=768, distance=Distance.COSINE)
# )

In [90]:
qdrant_client.delete_collection(collection_name="test1")

True

In [85]:
vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name="test1",
    embedding=code_bert_embeddings,
)

In [86]:
ch=[Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': 'Output: "other"'}, page_content='G.Devi Sri Ranga Prasad \nLinkedin: g-devi-sri-ranga-prasad                                                                                        Email: devisrprasad948@gmail.com'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills"'}, page_content='GitHub:-  https://github.com/dspStudent                                                            Mobile:  +91-7032857480 \nSKILLS SUMMARY \n• Languages: C++, C, Core Java, Python, R , kotlin \n• Technologies: HTML , CSS, Java Script, Java Spring Boot, Spring Security 6'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': 'Based on the provided text, the content category is "skills". This is because the text mainly discusses the tools, platforms, and programming languages that the person is familiar with, which are indicative of their skills.'}, page_content='• Tools/Platforms: MySQL, MongoDB \n• Data Analysis:          Excel, Tabuleo  \nINTERNSHIP  \n• R Programing language:                                                                                                                             June 2023'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"projects"'}, page_content='PROJECTS \n• Anime List Backend Api:'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"other"'}, page_content='• Api Link :- https://naa-anime-list-backend-api.onrender.com/login                                         \n• Users can keep track of the anime they have watched'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills"'}, page_content='• The security is robust because it contains OAuth 2.0 and JWT token verification.  And also The Animes Collection has 14k+ \nrecords   \n• Developed proficiently in Spring Boot Security 6, with APIs and MongoDB as well. \n• Rock paper scissor game:'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills, experiences"'}, page_content='• AI-based project where the player competes against the computer.                                                             Jul 2023 \n• Utilizes OpenCV and Media Pipe for hand gesture recognition.'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills, experiences"'}, page_content='• Developed proficiency in Python and learned to implement computer vision and machine learning techniques. \n• Airbnb data analysis: \n• Conducted a comprehensive analysis of Airbnb pricing patterns to understand market dynamics.      Jun 2023'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills, experiences"'}, page_content='• Developed a predictive model capable of estimating Airbnb rental prices.  \n• Enhanced data analytics skills, particularly in R programming, and gained insights into the factors influencing \naccommodation pricing.  \nACHIEVEMENTS'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills"'}, page_content='ACHIEVEMENTS  \n• Solved 370+ Questions in LeetCode , 100+ Streak in LeetCode and GFG, CodeCheff Max Rating 1503,  GFG Max Rating 1780, \nLeetCode Max Rating 1489 \nCERTIFICATES \n• R programing || Board infinity Jul 2023 \n \n• Java ||Hacker Rank  Feb 2022 \n \n• Python || Coursera   Nov 2022'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"experiences"'}, page_content='• C, C++ || Coursera Dec 2021 \nEDUCATION \nLovely Professional  University                                                               Phagwara, IN'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"experiences"'}, page_content='B.Tech. in Computer Science 76%                                                                                                                             2021-2025'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"other"'}, page_content='Narayana Junior College                                                                                                                                Andhra Pradesh, IN'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"other"'}, page_content='Intermediate 96.5%                                                                                                                                                    2019-2021'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills"'}, page_content='Narayana High School                                                                                                                                      Andhra Pradesh, IN   \n     10th class 95% \n      \n• Embarked on a steep learning curve to master R programming and data analysis techniques.'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills, experiences"'}, page_content='• Overcame initial challenges through dedicated study and hands -on practice with real datasets.  \n• Transitioned from finding data analysis challenging to enjoying the intricacies of data interpretation and model creation.'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills, behaviors"'}, page_content='• Applied newly acquired R programming skills to contribute meaningfully to a project, demonstrating the ability to learn  \n• and apply new technologies effectively.     \n \n2018-2019')]

In [87]:
vector_store.add_documents(ch)

['5398af9434614ddfb971784898d4fdef',
 'dadc10f21fb74c9c8f2ef638199669a5',
 'f42c356315484629a9e6d02be684247e',
 '618cfc73a6764f3fb7ad52cdb00cf012',
 '5193e98504854e0d928e94f587934fef',
 'dc49e50e81a544de91782fe602f2e417',
 '1d61b90a11d14e91888d4f96eb68931c',
 '57bdcc20dbda48c294b534a838cdba6a',
 '6ba2d0f2f85548a0bc5e6ba723072546',
 'cddcac4605f945d8ab58814300b1dbb3',
 '09fe850471e84f47b8e2f16c6c7d7324',
 '485dcd53c06c4a1cad4c0148648be017',
 '3749841237e94fa3bdf0f242caf31213',
 '69307a4e60cd44dcbb5bc343b625075c',
 '6a96164105f348ed97bb0b063d5077d6',
 '9a11de37fc054be4b5935ff670abdd56',
 'c286e131e32741b4bedf7869c838b111']

In [89]:
vector_store.similarity_search("who is devi", k=5)

[Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"projects"', '_id': '618cfc73-a676-4f3f-b7ad-52cdb00cf012', '_collection_name': 'test1'}, page_content='PROJECTS \n• Anime List Backend Api:'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills"', '_id': 'dc49e50e-81a5-44de-9178-2fe602f2e417', '_collection_name': 'test1'}, page_content='• The security is robust because it contains OAuth 2.0 and JWT token verification.  And also The Animes Collection has 14k+ \nrecords   \n• Developed proficiently in Spring Boot Security 6, with APIs and MongoDB as well. \n• Rock paper scissor game:'),
 Document(metadata={'name': 'Devi Sri Ranga Prasad Gudimetla', 'index': '"skills, experiences"', '_id': '9a11de37-fc05-4be4-b593-5ff670abdd56', '_collection_name': 'test1'}, page_content='• Overcame initial challenges through dedicated study and hands -on practice with real datasets.  \n• Transitioned from finding data analysis challenging to enjoying t

In [None]:
li=["Hi hello", "Hi how are you"]

"hi", "hello", "how", "are", "you"

li[0]=1 1 0 0 0