In [1]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
import chromadb

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
load_dotenv()

True

In [3]:
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

In [86]:
llm = ChatGroq(
    temperature=0,
    model="llama3-8b-8192",
    api_key=GROQ_API_KEY
)

In [5]:
def get_pdf_text(pdf_docs):
    loader = PyPDFLoader(file_path=pdf_docs)
    pages = loader.load()
    return pages

In [6]:
pages = get_pdf_text(pdf_docs="Praveen_S_CV.pdf")

In [7]:
pages

[Document(metadata={'source': 'Praveen_S_CV.pdf', 'page': 0}, page_content='Praveen S \n LinkedIn  |  875-410-1723  |  Portfolio  |  praveensivaprakasham@gmail.com  |  GitHub  \n \n \nEDUCATION  \n \nIIT Madras GUVI, Tamil Nadu, India  \nMaster Data Science program       Feb 2023 – Aug 2023  \n \nK.S.R College  of Engineering, Tamil Nadu, India  \nBachelor of Mechanical Engineering       June 2015 – April 2019  \n \nSUMMARY  \n \nMotivated recent graduate with a zeal for Data Science . Strong foundation in ML /AI concepts and \nhands -on project experience. Eager to contribute and learn  from experts . \n \nSKILLS  \n \n● Languages : Python  \n● Packages and Frameworks ( Python) : Sckit -Learn, TensorFlow (ANN, CNN), keras, \nlangchain, hugging faces, NumPy, Pandas, Matplotlib, Seaborn , NLTK , Spacy, Streamlit , \nZenML, MLFlow . \n● Technologies : PostgreSQL, MySQL, MongoDB , Docker, DagsHub, Ubuntu,  AWS (ECR), \nAWS (EC2), GitHub. \n \nEXPERIENCE  \n \nArtificial Intelligence  Virt

In [8]:
def get_text_chunks(pages):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1500, chunk_overlap = 200, 
        length_function=len, separators=['\n\n', '\n', ' ', '']
    )
    chunks = text_splitter.split_documents(pages)
    
    return chunks

In [9]:
chunks_data = get_text_chunks(pages=pages)

In [33]:
chunks_data

[Document(metadata={'source': 'Praveen_S_CV.pdf', 'page': 0}, page_content='Praveen S \n LinkedIn  |  875-410-1723  |  Portfolio  |  praveensivaprakasham@gmail.com  |  GitHub  \n \n \nEDUCATION  \n \nIIT Madras GUVI, Tamil Nadu, India  \nMaster Data Science program       Feb 2023 – Aug 2023  \n \nK.S.R College  of Engineering, Tamil Nadu, India  \nBachelor of Mechanical Engineering       June 2015 – April 2019  \n \nSUMMARY  \n \nMotivated recent graduate with a zeal for Data Science . Strong foundation in ML /AI concepts and \nhands -on project experience. Eager to contribute and learn  from experts . \n \nSKILLS  \n \n● Languages : Python  \n● Packages and Frameworks ( Python) : Sckit -Learn, TensorFlow (ANN, CNN), keras, \nlangchain, hugging faces, NumPy, Pandas, Matplotlib, Seaborn , NLTK , Spacy, Streamlit , \nZenML, MLFlow . \n● Technologies : PostgreSQL, MySQL, MongoDB , Docker, DagsHub, Ubuntu,  AWS (ECR), \nAWS (EC2), GitHub. \n \nEXPERIENCE  \n \nArtificial Intelligence  Virt

In [10]:
chunks_data[0].page_content

'Praveen S \n LinkedIn  |  875-410-1723  |  Portfolio  |  praveensivaprakasham@gmail.com  |  GitHub  \n \n \nEDUCATION  \n \nIIT Madras GUVI, Tamil Nadu, India  \nMaster Data Science program       Feb 2023 – Aug 2023  \n \nK.S.R College  of Engineering, Tamil Nadu, India  \nBachelor of Mechanical Engineering       June 2015 – April 2019  \n \nSUMMARY  \n \nMotivated recent graduate with a zeal for Data Science . Strong foundation in ML /AI concepts and \nhands -on project experience. Eager to contribute and learn  from experts . \n \nSKILLS  \n \n● Languages : Python  \n● Packages and Frameworks ( Python) : Sckit -Learn, TensorFlow (ANN, CNN), keras, \nlangchain, hugging faces, NumPy, Pandas, Matplotlib, Seaborn , NLTK , Spacy, Streamlit , \nZenML, MLFlow . \n● Technologies : PostgreSQL, MySQL, MongoDB , Docker, DagsHub, Ubuntu,  AWS (ECR), \nAWS (EC2), GitHub. \n \nEXPERIENCE  \n \nArtificial Intelligence  Virtual  Intern | Cognizant :    Aug – Sep 2023 \n● Created  a machine learning

In [46]:
def get_extracted_text(chunks_data):
    page_contents = []
    for chunk in chunks_data:
        page_contents.append(chunk.page_content)
    return ' '.join(page_contents)
        

In [47]:
extracted_text = get_extracted_text(chunks_data=chunks_data)

In [48]:
type(extracted_text)

str

In [49]:
print(extracted_text)

Praveen S 
 LinkedIn  |  875-410-1723  |  Portfolio  |  praveensivaprakasham@gmail.com  |  GitHub  
 
 
EDUCATION  
 
IIT Madras GUVI, Tamil Nadu, India  
Master Data Science program       Feb 2023 – Aug 2023  
 
K.S.R College  of Engineering, Tamil Nadu, India  
Bachelor of Mechanical Engineering       June 2015 – April 2019  
 
SUMMARY  
 
Motivated recent graduate with a zeal for Data Science . Strong foundation in ML /AI concepts and 
hands -on project experience. Eager to contribute and learn  from experts . 
 
SKILLS  
 
● Languages : Python  
● Packages and Frameworks ( Python) : Sckit -Learn, TensorFlow (ANN, CNN), keras, 
langchain, hugging faces, NumPy, Pandas, Matplotlib, Seaborn , NLTK , Spacy, Streamlit , 
ZenML, MLFlow . 
● Technologies : PostgreSQL, MySQL, MongoDB , Docker, DagsHub, Ubuntu,  AWS (ECR), 
AWS (EC2), GitHub. 
 
EXPERIENCE  
 
Artificial Intelligence  Virtual  Intern | Cognizant :    Aug – Sep 2023 
● Created  a machine learning model  to optimize supply cha

In [65]:
prompt_extract = PromptTemplate.from_template(
    """
    ### EXTRACTED TEXT FROM PDF FILE:
    {extracted_text}
    ### INSTRUCTION:
    The extracted text is from pdf file of a Resume.
    Your job is to extract the text and return them in JSON format containing the
    following keys: 'name', 'email', 'skills'.
    Only return the vaild JSON.
    ### VAILD JSON (NO PREAMBLE):
    """
)


In [66]:
chain_extract = prompt_extract | llm

In [67]:
res = chain_extract.invoke(input={'extracted_text': extracted_text})

In [68]:
print(res.content)

{
"name": "Praveen S",
"email": "praveensivaprakasham@gmail.com",
"skills": [
"Python",
"Sckit-Learn",
"TensorFlow (ANN, CNN)",
"keras",
"langchain",
"hugging faces",
"NumPy",
"Pandas",
"Matplotlib",
"Seaborn",
"NLTK",
"Spacy",
"Streamlit",
"ZenML",
"MLFlow",
"PostgreSQL",
"MySQL",
"MongoDB",
"Docker",
"DagsHub",
"Ubuntu",
"AWS (ECR)",
"AWS (EC2)",
"GitHub"
]
}


In [69]:
type(res.content)

str

In [70]:
loader = WebBaseLoader("https://jobs.nike.com/job/R-39879")
page_data = loader.load().pop().page_content
print(page_data)

Apply for Machine Learning Engineer - Supply Chain

Search JobsSkip navigationSearch JobsNIKE, INC. JOBSContract JobsJoin The Talent CommunityLife @ NikeOverviewBenefitsBrandsOverviewJordanConverseTeamsOverviewAdministrative SupportAdvanced InnovationAir Manufacturing InnovationAviationCommunicationsCustomer ServiceDesignDigitalFacilitiesFinance & AccountingGovernment & Public AffairsHuman ResourcesInsights & AnalyticsLegalManufacturing & EngineeringMarketingMerchandisingPlanningPrivacyProcurementProduct Creation, Development & ManagementRetail CorporateRetail StoresSalesSocial & Community ImpactSports MarketingStrategic PlanningSupply Chain, Distribution & LogisticsSustainabilityTechnologyLocationsOverviewNike WHQNike New York HQEHQ: Hilversum, The NetherlandsELC: Laakdal, BelgiumGreater China HQDiversity, Equity & InclusionOverviewMilitary InclusionDisability InclusionIndigenous InclusionInternshipsData & AnalyticsMachine Learning Engineer - Supply ChainBeaverton, OregonBecome a Part

In [71]:
web_prompt_extract = PromptTemplate.from_template(
    """
    ### SCRAPED TEXT FROM WEBSITE:
    {page_data}
    ### INSTRUCTION:
    The scraped text is from career's page of a website.
    Your job is to extract the job postings and return them in JSON format containing the
    following keys: 'role', 'experience', 'skills' and 'description'.
    Only return the vaild JSON.
    ### VAILD JSON (NO PREAMBLE):
    """
)

In [75]:
web_chain_extract = web_prompt_extract | llm

In [76]:
web_res = web_chain_extract.invoke(input={'page_data': page_data})

In [77]:
print(web_res.content)

[
  {
    "role": "Machine Learning Engineer - Supply Chain",
    "experience": "1-3 years of professional experience in software engineering, data engineering, machine learning, or related field",
    "skills": [
      "Python",
      "Docker",
      "CI/CD",
      "agile development",
      "test driven development",
      "data structures",
      "data modeling",
      "software architecture",
      "data sets",
      "ETL pipelines",
      "SQL",
      "MLOps",
      "API development",
      "mathematical optimization",
      "Amazon Web Services",
      "Spark",
      "Kubernetes",
      "Jenkins",
      "Databricks",
      "Terraform",
      "effective communication skills"
    ],
    "description": "As a Machine Learning Engineer, you will develop robust advanced analytics and machine learning solutions that have a direct impact on the Supply Chain infrastructure. You should have experience in Python, a strong background in algorithms and data structures, hands-on AWS experience

In [78]:
type(web_res.content)

str

In [79]:
json_parser = JsonOutputParser()
json_res = json_parser.parse(web_res.content)[0]
json_res

{'role': 'Machine Learning Engineer - Supply Chain',
 'experience': '1-3 years of professional experience in software engineering, data engineering, machine learning, or related field',
 'skills': ['Python',
  'Docker',
  'CI/CD',
  'agile development',
  'test driven development',
  'data structures',
  'data modeling',
  'software architecture',
  'data sets',
  'ETL pipelines',
  'SQL',
  'MLOps',
  'API development',
  'mathematical optimization',
  'Amazon Web Services',
  'Spark',
  'Kubernetes',
  'Jenkins',
  'Databricks',
  'Terraform',
  'effective communication skills'],
 'description': 'As a Machine Learning Engineer, you will develop robust advanced analytics and machine learning solutions that have a direct impact on the Supply Chain infrastructure. You should have experience in Python, a strong background in algorithms and data structures, hands-on AWS experience, as well as experience in database technology (e.g. Postgres, Redis) and data processing technology (e.g. EMR

In [82]:
type(json_res)

dict

In [91]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are name from {pdf_file} actively seeking opportunities in the fields, skills in {pdf_file} 
        
        Your job is to write a cold email to the manager regarding the job mentioned above describing the 
        capability of skills from {pdf_file} in fulfilling their needs. Also caution on giving context, mention only skills that
        are present in skills from {pdf_file} which relavent to the skills in {job_description}
        
        Remember you are name from {pdf_file}.
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        """
    )

In [93]:
chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(json_res), "pdf_file": res})
print(res.content)

Subject: Application for Machine Learning Engineer - Supply Chain Role

Dear Hiring Manager,

I came across the Machine Learning Engineer - Supply Chain role at Nike and was excited to see the opportunity to apply my skills and experience in machine learning and data science. With a strong foundation in Python and hands-on experience in developing machine learning models to optimize supply chain operations, I believe I would be a great fit for this role.

As a recent graduate with a Master's degree in Data Science, I have developed a strong understanding of machine learning algorithms and data structures. My experience with Python, including packages and frameworks such as Scikit-Learn, TensorFlow, Keras, and langchain, has prepared me to work with large datasets and develop robust models. I have also worked with technologies such as Docker, CI/CD, agile development, and test driven development.

I am excited about the opportunity to work with the Artificial Intelligence and Machine Le