In [1]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.linkedin.com/jobs/view/4231414211")

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
pg_data = loader.load().pop().page_content
pg_data

'\n\n\n\n \n\n\n\n\n\n\n\n\n\n\nTata Consultancy Services hiring Python Developer in Ames, IA | LinkedIn\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\nLinkedIn and 3rd parties use essential and non-essential cookies to provide, secure, analyze and improve our Services, and to show you relevant ads (including professional and job ads) on and off LinkedIn. Learn more in our Cookie Policy.Select Accept to consent or Reject to decline non-essential cookies for this use. You can update your choices at any time in your settings.\n\n\n\n\n                Accept\n                \n\n                Reject\n                \n\n\n \n\n\n\n\n\n      Skip to main content\n    \n\n\n\nLinkedIn\n\n\n\n\n\n\n\n              Python Developer in Stuttgart\n \nExpand search\n\n\n\n\n\n\nThis button displays the currently selected search type. When expanded it provides a list of search options that will switch the search inputs to match the current selection. \

In [3]:
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY=os.getenv('GROQ')
llm = ChatGroq(
    temperature = 0,
    groq_api_key=API_KEY,
    model_name="llama-3.3-70b-versatile"
)

In [4]:
prompt_extract = PromptTemplate.from_template(
    """
    ### SCRAPED TEXT FROM WEBSITE:
    {pg_data}

    ### INSTRUCTUION
    The scrapped text is from the job page of LinkedIn. Your task is to extract the following details from the job posting and return them in a JSON format with the keys:
    `job_title`, `role`, `description`, `skills`, `experience`, `tasks`.
    Only return the valid JSON format.

    ### VALID JSON (NO PREAMBLE)
    """
)

# creating a langchain chain
chain_extract = prompt_extract | llm # getting a prompt and passing it to the llm

res = chain_extract.invoke(input={'pg_data': pg_data})

In [5]:
# using the output parser to convert the str json content into a json type
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_obj = json_parser.parse(res.content)

json_obj

{'job_title': 'Python Developer',
 'role': 'Python Developer at Tata Consultancy Services',
 'description': 'Tata Consultancy Services is hiring a Python Developer in Ames, IA',
 'skills': ['Python',
  'SQL',
  'Database management systems',
  'API development using REST and SOAP',
  'Linux',
  'Shell scripting',
  'System administration',
  'Version control systems like Git'],
 'experience': 'Proven experience as a Python Developer',
 'tasks': ['Write clean, efficient, and well-documented code',
  'Excellent problem-solving skills and attention to detail',
  'Strong communication skills and the ability to work well in a team',
  'Understanding of Agile development methodologies']}

In [10]:
# Getting the skills data to store in chromadb
import pandas as pd

df = pd.read_csv('./../app/resources/skills.csv')
df

Unnamed: 0,TechStack,Link1,Link2
0,React,https://github.com/anniechakraborty/anniechak...,https://github.com/anniechakraborty/textutils
1,Angular,https://github.com/anniechakraborty/simpleKan...,https://github.com/anniechakraborty/frontend-...
2,Flask,https://github.com/anniechakraborty/simpleKan...,https://github.com/anniechakraborty/PersonalW...
3,MongoDB,https://github.com/anniechakraborty/simpleKan...,https://github.com/anniechakraborty/BookQuest
4,Python,https://github.com/anniechakraborty/simpleKan...,https://github.com/anniechakraborty/BookQuest
5,GenAI,https://github.com/anniechakraborty/GenAIProj...,https://github.com/anniechakraborty/cold-emai...
6,ReactNative,https://github.com/anniechakraborty/TextHelp,https://github.com/anniechakraborty/my-app
7,"HTML, CSS, TailwindCSS",https://github.com/anniechakraborty/manage-la...,https://github.com/anniechakraborty/social-li...
8,Machine Learning,https://github.com/anniechakraborty/Classific...,https://github.com/anniechakraborty/Regressio...
9,"XML, Xquery",https://github.com/anniechakraborty/BookQuest,https://github.com/anniechakraborty/BookQuest


In [11]:
# Preparing the chromaDB as a persistent client to store the data on a disk so that we can access the same data later on without having to insert it again
# using just client stores the data in-memory

import chromadb
import uuid

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name='Portfolio')
print(df.columns.tolist())
if not collection.count():
    for _, row in df.iterrows():
        primary_link = str(row['Link1']) if pd.notna(row['Link1']) else ''
        secondary_link = str(row['Link2']) if pd.notna(row['Link2']) else ''
        tech_stack = str(row['TechStack']) if pd.notna(row['TechStack']) else ''
        collection.add(
            documents=[tech_stack],
            metadatas={
                'primary_links': primary_link,
                'secondary_links': secondary_link
            },
            ids=[str(uuid.uuid4())]
        )

['TechStack', 'Link1', 'Link2']


In [12]:
print(collection.peek())
# collection.delete(
#     ids=['710bb5f3-b95f-4497-a816-611f3c569343', '32224923-7d2e-4075-a147-7d1fcaff0718', '920fff09-2502-409a-8a16-b80d841075e4', 'f19a6f6c-4090-4e58-9140-6ac2fa41edc7', 'a8f9b1b9-c0b6-45cc-a23e-985abb7eb2b8', '3950196c-92d0-4017-8262-d5a196a77638', '96d687ee-47c3-4892-800c-e6805ed01d75', 'd7664246-0a58-4db2-87bd-1ce50549e984', 'e20d5376-087e-493c-a645-f0fd73ecfc32', '6a4e31e7-a1a6-4c8d-8fbe-fe61b0589b62']
# )
collection.count()

{'ids': ['74cba15c-374e-4933-a192-99194b81a991', '1b6a56f7-c637-40cc-8561-157c0fbb5b61', 'c56438c9-2711-4ef8-9a36-4840f8c219c8', 'e02c6203-ec95-4282-a4ab-e279769df1c5', '2a099f01-d118-429e-878e-4ccdcbd59bed', '5f88cc3e-9fae-4a84-bf1d-d2dc59daa882', '167a3ce4-db50-438f-b179-7558f8b74434', '96ce3828-381c-4c3c-a7d3-83c711794d8e', 'cc235622-433f-45a6-8c8d-bbe3656da877', '98478e1d-e3fc-498a-83b7-a38542761b34'], 'embeddings': array([[-0.08726904,  0.00863323,  0.03240667, ..., -0.01087549,
         0.03119643, -0.01890796],
       [-0.09454282,  0.03366246, -0.10063555, ...,  0.01545668,
         0.00646383, -0.05681656],
       [-0.08751205,  0.00945656, -0.06109304, ...,  0.07134798,
         0.04969556, -0.02960439],
       ...,
       [ 0.00566728,  0.05578964,  0.01165499, ...,  0.04982461,
         0.00703615,  0.11334594],
       [-0.02439189,  0.00324442,  0.05426768, ...,  0.03536279,
        -0.02179913, -0.05384536],
       [ 0.03419754,  0.06116725,  0.01045411, ...,  0.03658027,

10

In [13]:
# skills relevant to the job description
print(json_obj['skills'])
print()

links = collection.query(
    query_texts=json_obj['skills'],
    n_results=1
).get('metadatas')

links

['Python', 'SQL', 'Database management systems', 'API development using REST and SOAP', 'Linux', 'Shell scripting', 'System administration', 'Version control systems like Git']



[[{'secondary_links': ' https://github.com/anniechakraborty/BookQuest',
   'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend'}],
 [{'secondary_links': ' https://github.com/anniechakraborty/RegressionModels',
   'primary_links': ' https://github.com/anniechakraborty/ClassificationModels'}],
 [{'secondary_links': ' https://github.com/anniechakraborty/BookQuest',
   'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend'}],
 [{'primary_links': ' https://github.com/anniechakraborty/BookQuest',
   'secondary_links': ' https://github.com/anniechakraborty/BookQuest'}],
 [{'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend',
   'secondary_links': ' https://github.com/anniechakraborty/BookQuest'}],
 [{'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend',
   'secondary_links': ' https://github.com/anniechakraborty/BookQuest'}],
 [{'primary_links': ' https://github.com/anniechakraborty/simpleKanbanBackend'

In [14]:
job_desc = json_obj['description']
job_tasks = json_obj['tasks']

prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_desc}
        and TASKS:
        {job_tasks}
        ### INSTRUCTION:
        You are Annie Chakraborty, an MS Computer Science student at the University of Stuttgart. As a CS student you have gained several skills
        in the fields of full stack web development, python programming, API development, mobile apps development, and AI/ML model development.
        
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase your portfolio: {links}
        Remember you are Annie, MS student in University of Stuttgart. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_desc": str(job_desc), 'job_tasks': job_tasks, "links": links})
print(res.content)

Subject: Application for Python Developer Position at Tata Consultancy Services

Dear Hiring Manager,

I am excited to apply for the Python Developer position at Tata Consultancy Services in Ames, IA. As an MS Computer Science student at the University of Stuttgart, I am confident that my skills and experience make me an ideal candidate for this role.

With a strong foundation in Python programming, I have developed a range of skills in full stack web development, API development, mobile apps development, and AI/ML model development. I am well-versed in writing clean, efficient, and well-documented code, and I possess excellent problem-solving skills with attention to detail. My strong communication skills and ability to work well in a team will enable me to collaborate effectively with your team.

I am familiar with Agile development methodologies and have experience working in a fast-paced environment. My portfolio includes several projects that demonstrate my capabilities, including