In [1]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders  import TextLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from chromadb.config import Settings

In [2]:
class ResumeAnalyser:
    def __init__(self, resume) -> None:
        """
        Info:
          This is Initializer for all the necessary data I/O
        Args:
          None
        Returns:
          None
        """
        self.resume = resume
        self.pdf_loder = PyMuPDFLoader
        self.embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        self.client_settings = Settings(
            chroma_server_host = "192.168.43.163",
            chroma_server_http_port="8000"
        )
        self.collection_name = "chroma_test"
        
    def Analyse(self, query):
        """
        Info:
          This function used cosine simalarity to analyse the resume
        Args:
          resume --> Input Resume
        Returns:
          str --> Returns data based on Cosine-similarity
        """
        resume_data = self.pdf_loder(self.resume)
        resumes_data = resume_data.load()
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=500,
            chunk_overlap=10,
            length_function=len,
            is_separator_regex=False)
        data = text_splitter.split_text(resumes_data[0].page_content)
        db = Chroma.from_texts(data, self.embedding_model, client_settings=self.client_settings, collection_name=self.collection_name)
        docs = db.similarity_search(query, k = 4)
        return docs[0].page_content

In [3]:
analyser = ResumeAnalyser("../data/Saiprasad Toshatwad.pdf")

In [4]:
analyser.Analyse("TextFlow")

'06-2022 To 12-2022 \nThe development of an AutoML platform aimed at simplifying the creation and deployment of machine learning models. I played a \nkey role in building automated pipelines for tasks like hyperparameter tuning, model selection, and training. Dockerized the \napplications for efficient deployment. \n \nProjects \n \nTextFlow – Personal Project \n● \nTextFLow is advanced video-to-text summarization with chat with context data project using RabbitMQ, Docker,'

In [5]:
analyser.client_settings.chroma_server_host

'192.168.43.163'