In [83]:
# !pip install langchain-groq
# !pip install  langchain_community
# !pip install chromadb

In [84]:
from langchain_groq import ChatGroq

# llm = ChatGroq(
#     model="gemma2-9b-it",
#     temperature=0.0,
#     groq_api_key="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"
# )

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.0,
    groq_api_key = ""
)

In [None]:
# sample_res = llm.invoke("tell me where is mumbai?")
# print(sample_res.content)

In [85]:
from langchain_community.document_loaders import WebBaseLoader

In [86]:
loader = WebBaseLoader("https://www.amazon.jobs/en/jobs/2841524/business-intel-engineer-i-global-operations-artificial-intelligence?cmpid=SPLICX0248M&ss=paid&utm_campaign=cxro&utm_content=job_posting&utm_medium=social_media&utm_source=linkedin.com")

In [87]:
page_data = loader.load().pop().page_content

In [88]:
# print(page_data)

In [89]:
from langchain_core.prompts import PromptTemplate

In [90]:
prompt_extract = PromptTemplate.from_template(
    """
    **SCRAPPED DATA FROM WEBSITE: **
    {page_data}
    **SCRAPPED DATA ENDS HERE**
    ** INSTRUCTIONS : ** The scrapped data is from a company's career website, your task is to extract the job postings data and return that into a JSON object containing the following keys: 'role', 'skills_required', 'experience', 'qualification'.
    Only return the valid JSON object.
    **NO PREAMBLE, ONLY VALID JSON OBJECT**
    """
)

In [91]:
chain_extract = prompt_extract | llm
response = chain_extract.invoke({"page_data": page_data})

In [92]:
print(response.content)

```json
{
  "role": "Business Intel Engineer I, Global Operations - Artificial Intelligence",
  "skills_required": [
    "Analyzing and interpreting data with Redshift, Oracle, NoSQL etc.",
    "Building and maintaining basic data artifacts (e.g., ETL, data models, queries)",
    "Experience with one or more industry analytics visualization tools (e.g. Excel, Tableau, QuickSight, MicroStrategy, PowerBI) and statistical methods (e.g. t-test, Chi-squared)",
    "Experience with scripting language (e.g., Python, Java, or R)",
    "Applying basic statistical methods (e.g. regression) to difficult business problems"
  ],
  "experience": "2+ years",
  "qualification": {
    "basic": "2+ years of analyzing and interpreting data",
    "preferred": [
      "Master's degree, or Advanced technical degree",
      "Experience with statistical analysis, co-relation analysis",
      "Knowledge of how to improve code quality and optimizes BI processes (e.g. speed, cost, reliability)",
      "Excellenc

In [93]:
type(response.content)

str

In [94]:
from langchain_core.output_parsers import JsonOutputParser

In [95]:
json_parser = JsonOutputParser()
json_response = json_parser.parse(response.content)

In [96]:
type(json_response)

dict

In [97]:
json_response

{'role': 'Business Intel Engineer I, Global Operations - Artificial Intelligence',
 'skills_required': ['Analyzing and interpreting data with Redshift, Oracle, NoSQL etc.',
  'Building and maintaining basic data artifacts (e.g., ETL, data models, queries)',
  'Experience with one or more industry analytics visualization tools (e.g. Excel, Tableau, QuickSight, MicroStrategy, PowerBI) and statistical methods (e.g. t-test, Chi-squared)',
  'Experience with scripting language (e.g., Python, Java, or R)',
  'Applying basic statistical methods (e.g. regression) to difficult business problems'],
 'experience': '2+ years',
 'qualification': {'basic': '2+ years of analyzing and interpreting data',
  'preferred': ["Master's degree, or Advanced technical degree",
   'Experience with statistical analysis, co-relation analysis',
   'Knowledge of how to improve code quality and optimizes BI processes (e.g. speed, cost, reliability)',
   'Excellence in technical communication with peers, partners, an

In [98]:
import pandas as pd

In [99]:
portfolio = pd.DataFrame()
portfolio['Portfolio'] =['Resume', 'GitHub', 'Linkedin']
portfolio['Link'] = ['https://drive.google.com/file/d/1JWMkdZr0rDVLhUbdufHwLoRYRzIsmRfc/view?usp=sharing', 'https://github.com/darpan-2001', 'https://www.linkedin.com/in/darpan-chanana-058914201/']

In [100]:
portfolio

Unnamed: 0,Portfolio,Link
0,Resume,https://drive.google.com/file/d/1JWMkdZr0rDVLh...
1,GitHub,https://github.com/darpan-2001
2,Linkedin,https://www.linkedin.com/in/darpan-chanana-058...


In [101]:
# portfolio.to_csv('/content/drive/MyDrive/portfolio.csv', index=False)

In [102]:
import uuid
import chromadb

In [103]:
client = chromadb.PersistentClient('vector_store')
collection = client.get_or_create_collection('portfolio')

In [104]:
if not collection.count():
  for _, row in portfolio.iterrows():
    collection.add(
        documents=[row['Portfolio']],
        metadatas=[{'link': row['Link']}],
        ids=[str(uuid.uuid4())]
    )

In [105]:
collection.get()

{'ids': ['198eb137-9406-43e0-813b-09009a068159',
  '37cd2594-fdea-4c32-8011-ca513efb2d80',
  '67414228-08cd-442b-9387-10da69040850'],
 'embeddings': None,
 'documents': ['Resume', 'GitHub', 'Linkedin'],
 'uris': None,
 'data': None,
 'metadatas': [{'link': 'https://drive.google.com/file/d/1JWMkdZr0rDVLhUbdufHwLoRYRzIsmRfc/view?usp=sharing'},
  {'link': 'https://github.com/darpan-2001'},
  {'link': 'https://www.linkedin.com/in/darpan-chanana-058914201/'}],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [106]:
# sample_output = collection.query(
#     query_texts=["experience in python", "need someone with experience in natural language processing"],
#     n_results=2
# ).get('metadatas', [])

sample_output = collection.query(
    query_texts=["experience in python", "need someone with experience in natural language processing"],
    n_results=1
)

print(sample_output)

{'ids': [['198eb137-9406-43e0-813b-09009a068159'], ['198eb137-9406-43e0-813b-09009a068159']], 'embeddings': None, 'documents': [['Resume'], ['Resume']], 'uris': None, 'data': None, 'metadatas': [[{'link': 'https://drive.google.com/file/d/1JWMkdZr0rDVLhUbdufHwLoRYRzIsmRfc/view?usp=sharing'}], [{'link': 'https://drive.google.com/file/d/1JWMkdZr0rDVLhUbdufHwLoRYRzIsmRfc/view?usp=sharing'}]], 'distances': [[1.509785682668869], [1.5986000984118787]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}


In [107]:
# json_response
json_response['skills_required']

['Analyzing and interpreting data with Redshift, Oracle, NoSQL etc.',
 'Building and maintaining basic data artifacts (e.g., ETL, data models, queries)',
 'Experience with one or more industry analytics visualization tools (e.g. Excel, Tableau, QuickSight, MicroStrategy, PowerBI) and statistical methods (e.g. t-test, Chi-squared)',
 'Experience with scripting language (e.g., Python, Java, or R)',
 'Applying basic statistical methods (e.g. regression) to difficult business problems']

In [108]:
email_content_required = collection.query(
    query_texts=json_response['skills_required'],
    n_results=1
)

# print(email_content_required)

In [123]:
portfolio_link = collection.query(
    query_texts=json_response['skills_required'],
    n_results=2
).get('metadatas', [])

In [124]:
job = json_response

In [125]:
doc_name = collection.query(
    query_texts=json_response['skills_required'],
    n_results=2
).get('documents', [])

print(doc_name)

[['GitHub', 'Resume'], ['Resume', 'GitHub'], ['GitHub', 'Linkedin'], ['GitHub', 'Resume'], ['Resume', 'GitHub']]


In [126]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}

        ### INSTRUCTION:
        You are a Software Engineer, working in the AI and Data Science domain. You are now looking for a new job.
        With your skills and experience, you have empowered your current company's business by building and testing deep learning models, which has heightened overall efficiency of AI models being pushed to production.
        Your job is to write a cold email to the hiring manager, explaining how your skillset and experience can be helpful for their organization.
        Also, include the most relevant portfolio link of yours with the document name in the following format:

        Take a look at my work here:
        {doc_name}: {link_list}

        Ensure that each link is listed only once, the name of the portfolio document is provided, and it is presented in a clean, readable format.
        Do not repeat any link, even if you are referencing them multiple times within the email and also include the name of documnet only before prinitng it's link.
        Remember you are a software engineer working in AI domain.
        Do not provide a preamble.

        ### EMAIL (NO PREAMBLE):
        """
        )

In [127]:
chain_email = prompt_email | llm
response = chain_email.invoke({"job_description": str(job), "doc_name": doc_name, "link_list": portfolio_link})
print(response.content)

Subject: Application for Business Intel Engineer I, Global Operations - Artificial Intelligence

Dear Hiring Manager,

I am excited to apply for the Business Intel Engineer I, Global Operations - Artificial Intelligence role at your organization. With over 2 years of experience in analyzing and interpreting data, I am confident that my skills and expertise can help drive business growth and improvement.

As a software engineer in the AI and Data Science domain, I have a strong background in building and testing deep learning models, which has heightened the overall efficiency of AI models being pushed to production. My experience with scripting languages such as Python, and industry analytics visualization tools like Tableau and PowerBI, has enabled me to effectively communicate insights and results to both technical and non-technical stakeholders.

I am well-versed in applying basic statistical methods, including regression analysis, to complex business problems. My expertise in data 