In [72]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.linkedin.com/jobs/view/4231414211")

In [73]:
pg_data = loader.load().pop().page_content
pg_data

'\n\n\n\n \n\n\n\n\n\n\n\n\n\n\nTata Consultancy Services hiring Python Developer in Ames, IA | LinkedIn\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\nLinkedIn and 3rd parties use essential and non-essential cookies to provide, secure, analyze and improve our Services, and to show you relevant ads (including professional and job ads) on and off LinkedIn. Learn more in our Cookie Policy.Select Accept to consent or Reject to decline non-essential cookies for this use. You can update your choices at any time in your settings.\n\n\n\n\n                Accept\n                \n\n                Reject\n                \n\n\n \n\n\n\n\n\n      Skip to main content\n    \n\n\n\nLinkedIn\n\n\n\n\n\n\n\n              Python Developer in Stuttgart\n \nExpand search\n\n\n\n\n\n\nThis button displays the currently selected search type. When expanded it provides a list of search options that will switch the search inputs to match the current selection. \

In [74]:
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY=os.getenv('GROQ')
llm = ChatGroq(
    temperature = 0,
    groq_api_key=API_KEY,
    model_name="llama-3.3-70b-versatile"
)

In [75]:
prompt_extract = PromptTemplate.from_template(
    """
    ### SCRAPED TEXT FROM WEBSITE:
    {pg_data}

    ### INSTRUCTUION
    The scrapped text is from the job page of LinkedIn. Your task is to extract the following details from the job posting and return them in a JSON format with the keys:
    `job_title`, `role`, `description`, `skills`, `experience`, `tasks`.
    Only return the valid JSON format.

    ### VALID JSON (NO PREAMBLE)
    """
)

# creating a langchain chain
chain_extract = prompt_extract | llm # getting a prompt and passing it to the llm

res = chain_extract.invoke(input={'pg_data': pg_data})

In [76]:
# using the output parser to convert the str json content into a json type
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_obj = json_parser.parse(res.content)

json_obj

{'job_title': 'Python Developer',
 'role': 'Python Developer',
 'description': 'Tata Consultancy Services is hiring a Python Developer in Ames, IA',
 'skills': ['Proven experience as a Python Developer',
  'Strong proficiency in SQL and experience with database management systems',
  'Experience in developing APIs using REST and SOAP',
  'Hands-on experience with Linux, including shell scripting and system administration',
  'Familiarity with version control systems like Git'],
 'experience': 'Entry level',
 'tasks': ['Ability to write clean, efficient, and well-documented code',
  'Excellent problem-solving skills and attention to detail',
  'Strong communication skills and the ability to work well in a team',
  'Understanding of Agile development methodologies']}

In [None]:
# Getting the skills data to store in chromadb
import pandas as pd

df = pd.read_csv('skills.csv')
df

In [79]:
# Preparing the chromaDB as a persistent client to store the data on a disk so that we can access the same data later on without having to insert it again
# using just client stores the data in-memory

import chromadb
import uuid

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name='Portfolio')
print(df.columns.tolist())
if not collection.count():
    for _, row in df.iterrows():
        primary_link = str(row['Link1']) if pd.notna(row['Link1']) else ''
        secondary_link = str(row['Link2']) if pd.notna(row['Link2']) else ''
        tech_stack = str(row['TechStack']) if pd.notna(row['TechStack']) else ''
        collection.add(
            documents=[tech_stack],
            metadatas={
                'primary_links': primary_link,
                'secondary_links': secondary_link
            },
            ids=[str(uuid.uuid4())]
        )

['TechStack', 'Link1', 'Link2']


In [None]:
print(collection.peek())
# collection.delete(
#     ids=['710bb5f3-b95f-4497-a816-611f3c569343', '32224923-7d2e-4075-a147-7d1fcaff0718', '920fff09-2502-409a-8a16-b80d841075e4', 'f19a6f6c-4090-4e58-9140-6ac2fa41edc7', 'a8f9b1b9-c0b6-45cc-a23e-985abb7eb2b8', '3950196c-92d0-4017-8262-d5a196a77638', '96d687ee-47c3-4892-800c-e6805ed01d75', 'd7664246-0a58-4db2-87bd-1ce50549e984', 'e20d5376-087e-493c-a645-f0fd73ecfc32', '6a4e31e7-a1a6-4c8d-8fbe-fe61b0589b62']
# )
collection.count()

In [85]:
# skills relevant to the job description
print(json_obj['skills'])
print()

links = collection.query(
    query_texts=json_obj['skills'],
    n_results=1
).get('metadatas')

links

['Proven experience as a Python Developer', 'Strong proficiency in SQL and experience with database management systems', 'Experience in developing APIs using REST and SOAP', 'Hands-on experience with Linux, including shell scripting and system administration', 'Familiarity with version control systems like Git']



[[{'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend',
   'secondary_links': ' https://github.com/anniechakraborty/BookQuest'}],
 [{'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend',
   'secondary_links': ' https://github.com/anniechakraborty/BookQuest'}],
 [{'primary_links': ' https://github.com/anniechakraborty/BookQuest',
   'secondary_links': ' https://github.com/anniechakraborty/BookQuest'}],
 [{'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend',
   'secondary_links': ' https://github.com/anniechakraborty/BookQuest'}],
 [{'secondary_links': ' https://github.com/anniechakraborty/BookQuest',
   'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend'}]]

In [84]:
job_desc = json_obj['description']
job_tasks = json_obj['tasks']

prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_desc}
        and TASKS:
        {job_tasks}
        ### INSTRUCTION:
        You are Mohan, a business development executive at AtliQ. AtliQ is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Atliq's portfolio: {links}
        Remember you are Mohan, BDE at AtliQ. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_desc": str(job_desc), 'job_tasks': job_tasks, "links": links})
print(res.content)

Subject: Expert Python Development Services for Tata Consultancy Services

Dear Hiring Manager,

I came across the job posting for a Python Developer at Tata Consultancy Services in Ames, IA, and I'm excited to introduce AtliQ, an AI & Software Consulting company that can fulfill your development needs. Our team of experts has a proven track record of delivering high-quality solutions that meet the exact requirements of our clients.

At AtliQ, we pride ourselves on our ability to write clean, efficient, and well-documented code, which aligns perfectly with your job requirements. Our developers possess excellent problem-solving skills and attention to detail, ensuring that our solutions are robust and reliable. We also emphasize strong communication skills and teamwork, allowing us to collaborate seamlessly with your team.

Our understanding of Agile development methodologies enables us to adapt to your project's unique needs and deliver results quickly. We've empowered numerous enterpr