### Get The text from the PDF

In [2]:
#!pip install pdfplumber pytesseract pdf2image

In [3]:
import pdfplumber
import pytesseract
from pdf2image import convert_from_path

In [4]:
def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        # Try direct text extraction
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text

        if text.strip():
            return text.strip()
    except Exception as e:
        print(f"Direct text extraction failed: {e}")

    # Fallback to OCR for image-based PDFs
    print("Falling back to OCR for image-based PDF.")
    try:
        images = convert_from_path(pdf_path)
        for image in images:
            page_text = pytesseract.image_to_string(image)
            text += page_text + "\n"
    except Exception as e:
        print(f"OCR failed: {e}")

    return text.strip()

In [17]:
pdf_path = "Tejas Gupta Analyst.pdf"
resume_text = extract_text_from_pdf(pdf_path)

print("\nExtracted Text from PDF:")
print(resume_text)


Extracted Text from PDF:
TEJAS GUPTA
Faridabad,Haryana|9971297789|Gmail| LinkedIn |Portfolio
Proficient in Data Visualization, Reporting, Dashboarding, Insight analysis with Computer Science Engineering background with
hands on experience in SQL, Excel, and Power BI. Honing strong analytical skills, decision-making, problem solving abilities with
learningmindsetandproactiveapproach.
TECHNICALSKILLS
• PowerBIDesktop • AdvancedExcel • Python • DataModelling
• PowerBIService • SQL • Streamlit • DataVisualization
• PowerQuery • MySQL • MachineLearning • DataCleaning
• DAX • SSMS • Tableau • DataStorytelling
WORKEXPERIENCE
DataAnalystIntern|EYGDS-AICTE 02/2024–04/2024
• UtilizedPowerBItoanalyzehealthcaredata,contributingtoenhancedpatientcareandstreamlinedoperationsthroughactionable
insights.
• Analyzedpatientandadmissiondatatoidentifytrends,visualizekeymetrics,andprovideinsightsondiseaseanalysisbasedon
smokingandalcoholhabits,patientdemographics,andhospitalstaydurations,leadingtoevidence-b

### Set Google GenerativeAI Api Key

In [21]:
#!pip install google.generativeai python-dotenv

Collecting google.generativeai
  Downloading google_generativeai-0.8.4-py3-none-any.whl.metadata (4.2 kB)
Collecting google-ai-generativelanguage==0.6.15 (from google.generativeai)
  Downloading google_ai_generativelanguage-0.6.15-py3-none-any.whl.metadata (5.7 kB)
Collecting google-api-core (from google.generativeai)
  Downloading google_api_core-2.24.1-py3-none-any.whl.metadata (3.0 kB)
Collecting google-api-python-client (from google.generativeai)
  Downloading google_api_python_client-2.160.0-py2.py3-none-any.whl.metadata (6.7 kB)
Collecting google-auth>=2.15.0 (from google.generativeai)
  Downloading google_auth-2.38.0-py2.py3-none-any.whl.metadata (4.8 kB)
Collecting proto-plus<2.0.0dev,>=1.22.3 (from google-ai-generativelanguage==0.6.15->google.generativeai)
  Downloading proto_plus-1.26.0-py3-none-any.whl.metadata (2.2 kB)
Collecting googleapis-common-protos<2.0.dev0,>=1.56.2 (from google-api-core->google.generativeai)
  Downloading googleapis_common_protos-1.66.0-py2.py3-none-

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.32.0 requires protobuf<5,>=3.20, but you have protobuf 5.29.3 which is incompatible.


In [23]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

load_dotenv()
genai.configure(api_key=)
model = genai.GenerativeModel("gemini-1.5-flash")

In [25]:
response = model.generate_content("What is the capital of India?")
response

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "The capital of India is **New Delhi**.\n"
              }
            ],
            "role": "model"
          },
          "finish_reason": "STOP",
          "avg_logprobs": -0.0026347806677222254
        }
      ],
      "usage_metadata": {
        "prompt_token_count": 8,
        "candidates_token_count": 10,
        "total_token_count": 18
      },
      "model_version": "gemini-1.5-flash"
    }),
)

In [27]:
response.text

'The capital of India is **New Delhi**.\n'

### Resume Analysis

In [34]:
def analyze_resume(resume_text, job_description=None):
    if not resume_text:
        return {"error": "Resume text is required for analysis."}
    
    model = genai.GenerativeModel("gemini-1.5-flash")
    
    base_prompt = f"""
    You are an experienced HR with Technical Experience in the field of any one job role from Data Science, Data Analyst, DevOPS, Machine Learning Engineer, Prompt Engineer, AI Engineer, Full Stack Web Development, Big Data Engineering, Marketing Analyst, Human Resource Manager, Software Developer your task is to review the provided resume.
    Please share your professional evaluation on whether the candidate's profile aligns with the role. Also mention Skills he already have and suggest some skills to imporve his resume , also suggest some course he might take to improve the skills. Highlight the strengths and weaknesses.

    Resume:
    {resume_text}
    """

    if job_description:
        base_prompt += f"""
        Additionally, compare this resume to the following job description:
        
        Job Description:
        {job_description}
        
        Highlight the strengths and weaknesses of the applicant in relation to the specified job requirements. Give the percentage of match if the resume matches
        the job description. First the output should come as percentage and then keywords missing and last final thoughts.
        """

    response = model.generate_content(base_prompt)

    analysis = response.text.strip()
    return analysis

In [32]:
analyze_resume(resume_text)

'## Resume Review: Tejas Gupta\n\n**Role Focus:**  Assuming the target role is a Data Analyst (given the resume content), this review focuses on that.  My experience is in Data Science and overlaps significantly with Data Analyst roles.\n\n**Overall Assessment:** Tejas Gupta\'s resume demonstrates a promising start to a data analyst career.  His projects showcase a good understanding of data analysis techniques and tool utilization. However, there are areas for improvement in terms of quantifying achievements and showcasing more advanced skills.\n\n**Strengths:**\n\n* **Diverse Skillset:** He possesses a solid foundation in essential data analysis tools (SQL, Power BI, Excel, Python), data visualization, and data storytelling.\n* **Practical Experience:**  Internships at EY GDS-AICTE and Xebia IT Architects provide valuable real-world experience. The projects further strengthen this practical application.\n* **Project Portfolio:**  Multiple projects demonstrate initiative and a willing