In [1]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Milvus
from langchain_community.llms import CTransformers
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

In [5]:
class ResumeAnalyser:
    def __init__(self, resume) -> None:
        """
        Info:
          A Resume vector store function
        Args:
          resume[Document] --> A pdf document
        Returns:
          None
        """
        self.resume = resume
        self.pdf_loder = PyMuPDFLoader
        self.callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
        self.embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        self.human_template = """{question}"""
        self.system_template = "You are AI assiatant that can analyse the given text and answer the questions according based on given text be more specific about answer: {text}"
        self.collection_name = "resume_collections"
        self.client_settings = {
            "host": "192.168.43.163",
            "port": "19530"}
        self.config = {
            'max_new_tokens': 512,
            'repetition_penalty': 1.1,
            'context_length': 2000,
        }
        self.model = CTransformers(
            model='C:\\Users\\Saiprasad\\.cache\\huggingface\\hub\\models--TheBloke--Mistral-7B-Instruct-v0.2-GGUF\\snapshots\\3a6fbf4a41a1d52e415a4958cde6856d34b2db93\\mistral-7b-instruct-v0.2.Q4_K_M.gguf',
            model_type='llama', lib='avx', config=self.config, callback_manager=self.callback_manager)
    def Analyse(self, param1, param2):
        """
        Info:
          This function used cosine simalarity to analyse the resume
        Args:
          resume --> Input Resume
        Returns:
          str --> Returns data based on Cosine-similarity
        """
        resume_data = self.pdf_loder(self.resume)
        resumes_data = resume_data.load()
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=2500,
            chunk_overlap=10,
            length_function=len,
            is_separator_regex=False)
        data = text_splitter.split_documents(resumes_data)
        db = Milvus.from_documents(
            data, 
            self.embedding_model, 
            connection_args=self.client_settings, 
            collection_name = self.collection_name)
        docs = db.similarity_search(param1, k = 1)
        human_prompt = HumanMessagePromptTemplate.from_template(self.human_template)
        system_prompt = SystemMessagePromptTemplate.from_template(self.system_template)
        chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt])
        prompt = chat_prompt.format_prompt(question=param2, text=docs).to_messages()
        return self.model.invoke(prompt)

In [6]:
analyser = ResumeAnalyser("../data/Saiprasad Toshatwad.pdf")

In [7]:
analyser.Analyse("skills", "list down his skill sections on resume")

.
AI: Based on the provided text from Saiprasad Toshatwad's resume, his skill sections include:

1. SQL (MySQL, PostgreSQL), VectorDB’s
2. Python, LangChain (Pandas, Tensorflow, Yellowbrick, Shap, NumPy, Scikit-learn, API, ORM.. etc.)
3. Golang, ORM
4. MLOps: Linux, Kedro, Docker, Git, DVC, MLflow, AWS, EC2, S3, botot3, AWS lambda, Github Actions
5. PowerBI

".\nAI: Based on the provided text from Saiprasad Toshatwad's resume, his skill sections include:\n\n1. SQL (MySQL, PostgreSQL), VectorDB’s\n2. Python, LangChain (Pandas, Tensorflow, Yellowbrick, Shap, NumPy, Scikit-learn, API, ORM.. etc.)\n3. Golang, ORM\n4. MLOps: Linux, Kedro, Docker, Git, DVC, MLflow, AWS, EC2, S3, botot3, AWS lambda, Github Actions\n5. PowerBI"

In [8]:
analyser.Analyse("Companies", "In which companies saiprasad toshatwad worked")

 as a Data Scientist?
AI: Saiprasad Toshatwad worked as a Data Scientist at Eaton between 2023 and 2023.

' as a Data Scientist?\nAI: Saiprasad Toshatwad worked as a Data Scientist at Eaton between 2023 and 2023.'

In [9]:
analyser.Analyse("Experiance", "How many years saiprasad have experiance as golang developer")

?
AI: Saiprasad mentions his projects that include the use of Golang, but there's no explicit statement about the number of years he has been a Golang developer. Therefore, I cannot provide an exact answer based on the given text.

"?\nAI: Saiprasad mentions his projects that include the use of Golang, but there's no explicit statement about the number of years he has been a Golang developer. Therefore, I cannot provide an exact answer based on the given text."

In [10]:
analyser.Analyse("Experiance", "How many years saiprasad have experiance as data scientist")

?
Assistant: Based on the provided text, Saiprasad has worked on several projects that involve data analysis and machine learning. However, the text doesn't explicitly mention the number of years he has spent as a data scientist. Therefore, I cannot provide an exact answer to this question based on the given text.

"?\nAssistant: Based on the provided text, Saiprasad has worked on several projects that involve data analysis and machine learning. However, the text doesn't explicitly mention the number of years he has spent as a data scientist. Therefore, I cannot provide an exact answer to this question based on the given text."

In [11]:
analyser.Analyse("Experiance", "How many years saiprasad have experiance in total?")


Assistant: Based on the provided text, Saiprasad has worked on several projects. However, there is no clear indication of the number of years he's had experience in total. The text mentions his engineering of a robust e-commerce backend application, development of a haze remover application, application of LSTM and deep learning, utilization of Kedro, Docker, PowerBI, and DAX techniques, but it does not provide the length of time he spent on these projects or his overall experience.

"\nAssistant: Based on the provided text, Saiprasad has worked on several projects. However, there is no clear indication of the number of years he's had experience in total. The text mentions his engineering of a robust e-commerce backend application, development of a haze remover application, application of LSTM and deep learning, utilization of Kedro, Docker, PowerBI, and DAX techniques, but it does not provide the length of time he spent on these projects or his overall experience."

In [13]:
analyser.Analyse("Projects", "list down his projects")

 and work experience.
Assistant: I'd be happy to help you with that! The text provides information about two personal projects and three work experiences for Saiprasad Toshatwad. Here they are:

**Projects:**
1. TextFlow: An advanced video-to-text summarization project using RabbitMQ, Docker, MongoDB, SQL, and a 3-tier architecture. He overcame challenges in text summarization and SaaS models to deliver the project.

**Work Experience:**
1. Awesomesuite: Golang Developer from April 2023 to present. At AwesomeSuite, he is responsible for spearheading the development of cutting-edge APIs for SaaS applications, utilizing AWS for deployment, and leading the charge in API creation, backend optimization, and deploying services on AWS Lambda.
2. Eaton: Data Scientist from August 2023 to November 2023. During his internship at Eaton, he was a core team member in the Generative Design project, where he led various image-focused projects including Mask RC

Number of tokens (1201) exceeded maximum context length (1200).


NN

Number of tokens (1202) exceeded maximum context length (1200).


,

Number of tokens (1203) exceeded maximum context length (1200).


 RC

Number of tokens (1204) exceeded maximum context length (1200).


NN

Number of tokens (1205) exceeded maximum context length (1200).


,

Number of tokens (1206) exceeded maximum context length (1200).


 RC

Number of tokens (1207) exceeded maximum context length (1200).


NN

Number of tokens (1208) exceeded maximum context length (1200).


,

Number of tokens (1209) exceeded maximum context length (1200).


 RC

Number of tokens (1210) exceeded maximum context length (1200).


NN

Number of tokens (1211) exceeded maximum context length (1200).


,

Number of tokens (1212) exceeded maximum context length (1200).


 RC

Number of tokens (1213) exceeded maximum context length (1200).


NN

Number of tokens (1214) exceeded maximum context length (1200).


,

Number of tokens (1215) exceeded maximum context length (1200).


 RC

Number of tokens (1216) exceeded maximum context length (1200).


NN

Number of tokens (1217) exceeded maximum context length (1200).


,

Number of tokens (1218) exceeded maximum context length (1200).


 RC

Number of tokens (1219) exceeded maximum context length (1200).


NN

Number of tokens (1220) exceeded maximum context length (1200).


,

Number of tokens (1221) exceeded maximum context length (1200).


 RC

Number of tokens (1222) exceeded maximum context length (1200).


NN

Number of tokens (1223) exceeded maximum context length (1200).


,

Number of tokens (1224) exceeded maximum context length (1200).


 RC

Number of tokens (1225) exceeded maximum context length (1200).


NN

Number of tokens (1226) exceeded maximum context length (1200).


,

Number of tokens (1227) exceeded maximum context length (1200).


 RC

Number of tokens (1228) exceeded maximum context length (1200).


NN

Number of tokens (1229) exceeded maximum context length (1200).


,

Number of tokens (1230) exceeded maximum context length (1200).


 RC

Number of tokens (1231) exceeded maximum context length (1200).


NN

Number of tokens (1232) exceeded maximum context length (1200).


,

Number of tokens (1233) exceeded maximum context length (1200).


 RC

Number of tokens (1234) exceeded maximum context length (1200).


NN

Number of tokens (1235) exceeded maximum context length (1200).


,

Number of tokens (1236) exceeded maximum context length (1200).


 RC

Number of tokens (1237) exceeded maximum context length (1200).


NN

Number of tokens (1238) exceeded maximum context length (1200).


,

Number of tokens (1239) exceeded maximum context length (1200).


 RC

Number of tokens (1240) exceeded maximum context length (1200).


NN

Number of tokens (1241) exceeded maximum context length (1200).


,

Number of tokens (1242) exceeded maximum context length (1200).


 RC

Number of tokens (1243) exceeded maximum context length (1200).


NN

Number of tokens (1244) exceeded maximum context length (1200).


,

Number of tokens (1245) exceeded maximum context length (1200).


 RC

Number of tokens (1246) exceeded maximum context length (1200).


NN

Number of tokens (1247) exceeded maximum context length (1200).


,

Number of tokens (1248) exceeded maximum context length (1200).


 RC

Number of tokens (1249) exceeded maximum context length (1200).


NN

Number of tokens (1250) exceeded maximum context length (1200).


,

Number of tokens (1251) exceeded maximum context length (1200).


 RC

Number of tokens (1252) exceeded maximum context length (1200).


NN

Number of tokens (1253) exceeded maximum context length (1200).


,

Number of tokens (1254) exceeded maximum context length (1200).


 RC

Number of tokens (1255) exceeded maximum context length (1200).


NN

Number of tokens (1256) exceeded maximum context length (1200).


,

Number of tokens (1257) exceeded maximum context length (1200).


 RC

Number of tokens (1258) exceeded maximum context length (1200).


NN

Number of tokens (1259) exceeded maximum context length (1200).


,

Number of tokens (1260) exceeded maximum context length (1200).


 RC

Number of tokens (1261) exceeded maximum context length (1200).


NN

Number of tokens (1262) exceeded maximum context length (1200).


,

Number of tokens (1263) exceeded maximum context length (1200).


 RC

Number of tokens (1264) exceeded maximum context length (1200).


NN

Number of tokens (1265) exceeded maximum context length (1200).


,

Number of tokens (1266) exceeded maximum context length (1200).


 RC

Number of tokens (1267) exceeded maximum context length (1200).


NN

Number of tokens (1268) exceeded maximum context length (1200).


,

Number of tokens (1269) exceeded maximum context length (1200).


 RC

Number of tokens (1270) exceeded maximum context length (1200).


NN

Number of tokens (1271) exceeded maximum context length (1200).


,

Number of tokens (1272) exceeded maximum context length (1200).


KeyboardInterrupt: 