In [278]:
import os
from dotenv import load_dotenv

In [279]:
load_dotenv()

True

In [280]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.getenv("GROQ_API_KEY")
    # other params...
)

In [281]:
#%pip install -qU langchain_community beautifulsoup4

In [282]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.mckinsey.com/careers/search-jobs/jobs/datascienceconsultant-quantumblackaibymckinsey-92304?appsource=LinkedIn")

In [283]:
page_data = loader.load().pop().page_content
print(page_data)





	Jobs | Careers | McKinsey & Company




















https://www.mckinsey.com915b5091-0d7e-44d2-a8c4-cf08267e52fe











                Skip to main content
            







Analytics
Data Science Consultant - QuantumBlack, AI by McKinsey
Job ID: 92304





Berlin
Cologne
Dusseldorf
Frankfurt
Hamburg
Munich
Stuttgart




Apply Now
Apply Later





Do you want to do work that matters, alongside supportive leaders who will help you grow faster than you ever thought possible? Are you a creative problem-solver who is energized by challenges? You’ve come to the right place. 

Who You'll Work With
You will work with other data scientists, data/ML engineers, designers, project managers and business subject matter experts on interdisciplinary projects across various industry sectors to enable business ambitions with data & analytics.Who you areYou are a highly collaborative individual who is capable of laying aside your own agenda, listening to and learning from colleagues, cha

In [284]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
    """ 

    ### SCRAPED TEXT FROM WEBSITE:
    {page_data}
    ### INSTRUCTION:
    The scraped is from the career's page of a website.
    Your job is to extract the job postings and return them in JSON format containing the 
    following keys if available: 'role','experience','skills','Who you are','Our Tech Stack','Role responsibilities' and 'description'.
    Only return the valid JSON.
    ### VALID JSON(NO PREAMBLE):
    """
)

chain_extractor = prompt_extract | llm

res = chain_extractor.invoke(input={'page_data':page_data})
print(res.content)

[
  {
    "role": "Data Science Consultant - QuantumBlack, AI by McKinsey",
    "experience": "Up to 2 years of professional hands-on experience in applying advanced data science methods and implementing technical data solutions",
    "skills": [
      "Python",
      "SQL",
      "PySpark",
      "TensorFlow",
      "PyTorch",
      "Databricks",
      "Docker",
      "Kubernetes",
      "AWS",
      "GCP",
      "Azure",
      "German",
      "English"
    ],
    "Who you are": "A highly collaborative individual who is capable of laying aside your own agenda, listening to and learning from colleagues, challenging thoughtfully and prioritizing impact. You search for ways to improve things and work collaboratively with colleagues. You believe in iterative change, experimenting with new approaches, learning and improving to move forward quickly.",
    "Our Tech Stack": "Python, PySpark, TensorFlow, PyTorch, Databricks, SQL, Docker and Kubernetes. We also leverage our own proprietary too

In [285]:
type(res.content)

str

In [286]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()

json_res = json_parser.parse(res.content)

json_res

[{'role': 'Data Science Consultant - QuantumBlack, AI by McKinsey',
  'experience': 'Up to 2 years of professional hands-on experience in applying advanced data science methods and implementing technical data solutions',
  'skills': ['Python',
   'SQL',
   'PySpark',
   'TensorFlow',
   'PyTorch',
   'Databricks',
   'Docker',
   'Kubernetes',
   'AWS',
   'GCP',
   'Azure',
   'German',
   'English'],
  'Who you are': 'A highly collaborative individual who is capable of laying aside your own agenda, listening to and learning from colleagues, challenging thoughtfully and prioritizing impact. You search for ways to improve things and work collaboratively with colleagues. You believe in iterative change, experimenting with new approaches, learning and improving to move forward quickly.',
  'Our Tech Stack': 'Python, PySpark, TensorFlow, PyTorch, Databricks, SQL, Docker and Kubernetes. We also leverage our own proprietary tools such as Kedro, CuasalNex, MLRun (check out more OSS here: <ht

In [287]:
type(json_res)

list

In [288]:
json_res

[{'role': 'Data Science Consultant - QuantumBlack, AI by McKinsey',
  'experience': 'Up to 2 years of professional hands-on experience in applying advanced data science methods and implementing technical data solutions',
  'skills': ['Python',
   'SQL',
   'PySpark',
   'TensorFlow',
   'PyTorch',
   'Databricks',
   'Docker',
   'Kubernetes',
   'AWS',
   'GCP',
   'Azure',
   'German',
   'English'],
  'Who you are': 'A highly collaborative individual who is capable of laying aside your own agenda, listening to and learning from colleagues, challenging thoughtfully and prioritizing impact. You search for ways to improve things and work collaboratively with colleagues. You believe in iterative change, experimenting with new approaches, learning and improving to move forward quickly.',
  'Our Tech Stack': 'Python, PySpark, TensorFlow, PyTorch, Databricks, SQL, Docker and Kubernetes. We also leverage our own proprietary tools such as Kedro, CuasalNex, MLRun (check out more OSS here: <ht

In [292]:
import chromadb
from chromadb.utils import embedding_functions

# Initialize the ChromaDB client
client = chromadb.PersistentClient(path="path/to/chroma_db_storage")

# Define your embedding function (using OpenAI's embeddings as an example)
#openai_embedding = embedding_functions.OpenAIEmbeddingFunction(api_key="your_openai_api_key")

# Create a collection
collection = client.create_collection(name="cv_dataa")

# Your JSON Data (as previously provided)
cv_dataa = {
  "name": "Jay Mehta",
  "title": "Data Scientist",
  "contact": {
    "phone": "+49 17676 980224",
    "email": "jay22mehta@gmail.com",
    "github": "https://github.com/jay22mehta",
    "huggingface": "https://huggingface.co/jay22mehta",
    "location": "Paderborn, Germany"
  },
  "summary": "Data Scientist deeply committed to crafting innovative, user-friendly solutions that drive business advancement. Currently striving to gain further expertise in programming, including machine learning, deep learning, and generative AI.",
  "experience": [
    {
      "title": "Data Scientist",
      "company": "BASF Agricultural Center",
      "location": "Limburgerhof, Germany",
      "duration": "12/2022 to Present",
      "responsibilities": [
        "Improved decision-making by 20% through advanced statistical modeling.",
        "Specialized in machine learning algorithms and time-series analysis for sales forecasting and trend analysis.",
        "Developed user-friendly digital products integrating ML models (LLMs) into applications.",
        "Implemented Python-based AI frameworks for chatbots to streamline user interactions and enhance satisfaction.",
        "Collaborated on cloud development projects with a focus on CI/CD pipelines."
      ]
    },
    {
      "title": "Energy System Technique Project",
      "organization": "Universität Paderborn",
      "location": "Paderborn, Germany",
      "duration": "10/2021 to 03/2022",
      "responsibilities": [
        "Classified heat consumption across Dortmunds districts using machine learning models.",
        "Improved data analysis efficiency by 30% through QGIS integration.",
        "Boosted data analysis speed by 50% with advanced statistical models.",
        "Generated GIS maps to visualize heat consumption patterns."
      ]
    },
    {
      "title": "Energy Supply Structure of the Future Project",
      "organization": "Universität Paderborn",
      "location": "Paderborn, Germany",
      "duration": "10/2021 to 03/2022",
      "responsibilities": [
        "Developed predictive models for future temperature and weather conditions using time-series analysis.",
        "Applied data preprocessing techniques, including normalization and handling missing values.",
        "Built and compared machine learning models (ARIMA, LSTM) for accurate weather forecasting.",
        "Utilized Python libraries (Pandas, NumPy, Scikit-learn) and Matplotlib for data processing and visualization."
      ]
    }
  ],
  "education": [
    {
      "degree": "Bachelor of Engineering",
      "field": "Electrical Engineering",
      "institution": "Gujarat Technological University",
      "location": "Ahmedabad",
      "duration": "06/2012 to 06/2016"
    },
    {
      "degree": "Master of Science",
      "field": "Electrical System Engineering",
      "institution": "Universität Paderborn",
      "location": "Paderborn, Germany",
      "graduation_date": "04/2019"
    }
  ],
  "skills": [
    "AWS", "Azure", "Data Visualization", "Business Intelligence", "Scrum",
    "CI/CD", "Computer Vision", "Data Science", "Docker", "Deep Learning",
    "Git", "GitHub", "Machine Learning", "NLP", "Power BI", "Python",
    "Scikit-learn", "SQL", "Statistical Analysis", "TensorFlow", "MongoDB",
    "GCP", "Streamlit", "PyTorch", "Azure Databricks", "Azure ML"
  ],
  "languages": [
    {
      "language": "English",
      "proficiency": "Proficient"
    },
    {
      "language": "German",
      "proficiency": "Intermediate"
    }
  ],
  "passions": ["Badminton", "Chess", "Cycling"]
}


# Add the JSON data to ChromaDB
collection.add(
    documents=[str(cv_dataa)],  # Convert to string or JSON format
    ids=["jay_mehta_cv"],      # Unique ID for the document
    metadatas=[{"userid":"jaym"}]  # Optional metadata
)

print("CV stored in ChromaDB successfully!")


CV stored in ChromaDB successfully!


In [293]:
role = json_res[0]['role']
print(role)

Data Science Consultant - QuantumBlack, AI by McKinsey


In [294]:
skills = json_res[0]['skills']
print(skills)

['Python', 'SQL', 'PySpark', 'TensorFlow', 'PyTorch', 'Databricks', 'Docker', 'Kubernetes', 'AWS', 'GCP', 'Azure', 'German', 'English']


In [295]:
experience =  json_res[0]['experience']
print(experience)

Up to 2 years of professional hands-on experience in applying advanced data science methods and implementing technical data solutions


In [297]:
queries = skills
ans_skills = [collection.query(query_texts=[query],n_results=1).get('metadatas',[]) for query in queries]
ans_skills

[[[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]],
 [[{'userid': 'jaym'}]]]

In [273]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are J, representing jay and you will write cover letter on behalf of him.
        Your are master in art of writing cover letters, write a cover letter to the client regarding the job mentioned above describing the capability of jay's 
        in fulfilling their needs.

        ### 1. STARTS WITH A PROFFETIONAL GREETINGS (5 to 10 WORDS)
        {From Careers Page: Role}
        Address the hiring manager by name whenever possible (e.g., Dear [First Name Last Name] or Dear [Position Title]).
        If the name is unknown, use a respectful general greeting like Dear Hiring Manager.

        ### 2. OPENING PARAGRAPHS: GRAB ATTENTION (50 to 70 WORDS)
        {From Careers Page: Description}
        Introduce yourself and state the position you are applying for.
        Mention a specific detail about the company or role that excites you.
        If applicable, briefly reference a referral or how you learned about the position.
        Example:
        I am excited to apply for the [Position Title] role at [Company Name].
        As a [Your Profession/Skill Set], I admire [specific detail about the company, such as a project, mission, or value]. 
        My skills in [specific skills] align closely with the requirements of this role.

        ### 3. MIDDLE PARAGRAPHS: SHOWCASE YOUR FIT  (80 TO 100 WORDS)
        {From Careers Page: Role Responsibilities}
        {From Careers Page: Who You Are}
        {From Careers Page: Tech Stack}
        Highlight Your Skills and Experience: Discuss your most relevant qualifications, using specific examples to demonstrate your accomplishments. Tie these to the job’s requirements.
        Quantify Your Impact: Use numbers, percentages, or tangible results to show your contributions.
        Connect to the Companys Needs: Explain how your background enables you to solve the companys challenges or add value.
        Example:
        In my previous role as [Position], I spearheaded a project that increased [result, e.g., revenue, efficiency] by [percentage/amount].
        By [specific actions you took], I was able to [positive outcome]. 
        These experiences have equipped me with the skills to [how you will apply them to the new role].

        ### 4. SHOWCASE PASSION AND CULTURE FIT
        {From Careers Page: Experience}
        Express enthusiasm for the companys mission, values, or recent accomplishments.
        Illustrate why you are particularly drawn to this role and how you align with the organizations culture.
        Example:
        What excites me most about [Company Name] is its dedication to [specific aspect, e.g., innovation, community impact].
        As someone who values [shared value or trait], I am eager to contribute to a team committed to [specific company goal].

        ### 5. CLOSING PARAGRAPH: END STRONG
        {From Careers Page: Skills}
        Reiterate your interest in the role and confidence in your ability to contribute.
        Politely encourage next steps, such as an interview, without being overly pushy.
        Thank the reader for their time and consideration.
        Example:
        I would welcome the opportunity to discuss how my experience and skills align with the [Position Title] role. 
        Thank you for considering my application, and I look forward to the possibility of contributing to [Company Name].
        ### 6. Professional Closing
        Use a formal sign-off like Sincerely, Best regards, or Thank you.
        Include your full name and contact information (if not already provided elsewhere).

        ### General Tips
        Be Concise: Limit your letter to 3 or 4 paragraphs, or about one page.
        Tailor It: Customize your letter for each role by researching the company and incorporating relevant details.
        Avoid Jargon: Use clear, professional language that is easy to understand.
        Proofread: Double-check for grammar, spelling, and formatting errors.




        Also add the most relevant experience and project that you have worked on based on client requerement and skill set add 
        relevant skills every thing about jay is astored here in  : {ans}
        Remember you are J.
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(Job),"ans":ans})
print(res.content)

Dear Hiring Manager,

I am writing to express my interest in the Data Scientist position at your esteemed organization, as advertised. My proficiency in Python, Machine Learning, and Data Science, coupled with my experience in AWS, Azure, and GCP, make me a strong candidate for this role.

In my previous role, I spearheaded a project that improved decision-making by 20% through the development of predictive weather models. I am confident that my skills and experience will enable me to contribute significantly to your team.

I am particularly drawn to your organization due to its commitment to innovation and community impact. As someone who values continuous learning and collaboration, I am eager to contribute to a team that shares these values.

I look forward to the opportunity to discuss how my experience and skills align with the Data Scientist role. Thank you for considering my application, and I eagerly anticipate the possibility of contributing to your esteemed organization.

Sin

In [271]:
print("Job Description:", Job)
print("Ans:", ans)


Job Description: ['Python', 'SQL', 'PySpark', 'TensorFlow', 'PyTorch', 'Databricks', 'Docker', 'Kubernetes', 'AWS', 'GCP', 'Azure', 'German', 'English']
Ans: [[[{'certifications': 'AWS Certified Data Analytics,Azure Data Scientist Associate', 'github': 'https://github.com/jay22mehta', 'highest_degree': 'Master of Science', 'id': 'jay_mehta_cv', 'institutions': 'Universität Paderborn,Gujarat Technological University', 'key_achievements': 'Improved decision-making by 20%,Built predictive weather models', 'languages': 'English: Proficient,German: Intermediate', 'location': 'Paderborn, Germany', 'name': 'Jay Mehta', 'primary_industry': 'Data Science and AI', 'profession': 'Data Scientist', 'project_focus_areas': 'Time-Series Forecasting,GIS Integration,Weather Modeling', 'search_tags': 'Data Scientist,Machine Learning,AI Development', 'source': 'user_cv', 'top_skills': 'Python,Machine Learning,Data Science', 'years_in_current_role': '1 year', 'years_of_experience': 5}]], [[{'certifications

In [272]:
print(str(Job))

['Python', 'SQL', 'PySpark', 'TensorFlow', 'PyTorch', 'Databricks', 'Docker', 'Kubernetes', 'AWS', 'GCP', 'Azure', 'German', 'English']


In [None]:
metadatas=[{
  "source": "user_cv",
  "id": "jay_mehta_cv",
  "name": "Jay Mehta",
  "location": "Paderborn, Germany",
  "profession": "Data Scientist",
  "experience": [
    {
      "title": "Data Scientist",
      "company": "BASF Agricultural Center",
      "location": "Limburgerhof, Germany",
      "duration": "12/2022 to Present",
      "responsibilities": [
        "Improved decision-making by 20% through advanced statistical modeling.",
        "Specialized in machine learning algorithms and time-series analysis for sales forecasting and trend analysis.",
        "Developed user-friendly digital products integrating ML models (LLMs) into applications.",
        "Implemented Python-based AI frameworks for chatbots to streamline user interactions and enhance satisfaction.",
        "Collaborated on cloud development projects with a focus on CI/CD pipelines."
      ]
    },
    {
      "title": "Energy System Technique Project",
      "organization": "Universität Paderborn",
      "location": "Paderborn, Germany",
      "duration": "10/2021 to 03/2022",
      "responsibilities": [
        "Classified heat consumption across Dortmunds districts using machine learning models.",
        "Improved data analysis efficiency by 30% through QGIS integration.",
        "Boosted data analysis speed by 50% with advanced statistical models.",
        "Generated GIS maps to visualize heat consumption patterns."
      ]
    },
    {
      "title": "Energy Supply Structure of the Future Project",
      "organization": "Universität Paderborn",
      "location": "Paderborn, Germany",
      "duration": "10/2021 to 03/2022",
      "responsibilities": [
        "Developed predictive models for future temperature and weather conditions using time-series analysis.",
        "Applied data preprocessing techniques, including normalization and handling missing values.",
        "Built and compared machine learning models (ARIMA, LSTM) for accurate weather forecasting.",
        "Utilized Python libraries (Pandas, NumPy, Scikit-learn) and Matplotlib for data processing and visualization."
      ]
    }
  ],
  "years_of_experience": 2,
  "skills": ",".join([["AWS", "Azure", "Data Visualization", "Business Intelligence", "Scrum",
    "CI/CD", "Computer Vision", "Data Science", "Docker", "Deep Learning",
    "Git", "GitHub", "Machine Learning", "NLP", "Power BI", "Python",
    "Scikit-learn", "SQL", "Statistical Analysis", "TensorFlow", "MongoDB",
    "GCP", "Streamlit", "PyTorch", "Azure Databricks", "Azure ML"]]),
  "primary_industry": "Data Science and AI",
  "project_focus_areas": ",".join(["Machine learning","Generative AI","Time-Series Forecasting"]),
  "highest_degree": "Master of Science",
  "institutions": ",".join(["Universität Paderborn", "Gujarat Technological University"]),
  "github": "https://github.com/jay22mehta",
  "years_in_current_role": "2 year",
  "search_tags": ",".join(["Data Scientist", "Machine Learning", "AI Development"]),
  "languages": ",".join(["English: Proficient", "German: Intermediate"])
}
] 