In [2]:
import os
import json
import PyPDF2
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env file
env_path = "/home/labuser/VSCODE_training/.env"
load_dotenv(env_path)

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    print("Error: GEMINI_API_KEY not found in .env file")
    exit(1)

genai.configure(api_key=api_key)

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF resume"""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def extract_resume_details(resume_text: str) -> dict:
    """Extract resume details using Gemini and return as JSON"""
    model = genai.GenerativeModel("gemini-2.5-flash-lite")
    
    prompt = f"""Analyze the following resume text and extract all details in a structured JSON format.

Resume Text:
{resume_text}

Return ONLY a valid JSON object with the following structure (if any field is missing, use null):
{{
    "personal_info": {{
        "name": "string",
        "email": "string",
        "phone": "string",
        "location": "string",
        "linkedin": "string",
        "github": "string"
    }},
    "professional_summary": "string",
    "experience": [
        {{
            "job_title": "string",
            "company": "string",
            "duration": "string",
            "description": "string"
        }}
    ],
    "education": [
        {{
            "degree": "string",
            "field": "string",
            "institution": "string",
            "graduation_year": "string",
            "gpa": "string"
        }}
    ],
    "skills": [
        "skill1",
        "skill2"
    ],
    "certifications": [
        {{
            "name": "string",
            "issuer": "string",
            "date": "string"
        }}
    ],
    "projects": [
        {{
            "name": "string",
            "description": "string",
            "technologies": "string"
        }}
    ]
}}

Return ONLY the JSON, no other text."""
    
    response = model.generate_content(prompt)
    
    try:
        # Parse the response as JSON
        resume_json = json.loads(response.text)
        return resume_json
    except json.JSONDecodeError:
        print("Error parsing JSON response")
        return {"error": "Could not parse resume details"}

# Main execution
resume_path = input("/home/labuser/Downloads/Stockholm-Resume-Template-Simple.pdf ").strip()

if os.path.exists(resume_path):
    print("\nExtracting resume text...\n")
    resume_text = extract_text_from_pdf(resume_path)
    
    if resume_text:
        print("Analyzing resume details...\n")
        resume_details = extract_resume_details(resume_text)
        
        print("=== Resume Details (JSON Format) ===\n")
        print(json.dumps(resume_details, indent=2, ensure_ascii=False))
        
        # Optionally save to file
        save_option = input("\nSave to JSON file? (y/n): ").strip().lower()
        if save_option == 'y':
            output_file = "resume_details.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(resume_details, f, indent=2, ensure_ascii=False)
            print(f"Saved to {output_file}")
    else:
        print("Could not extract text from PDF")
else:
    print("File not found. Please check the path.")


All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  import google.generativeai as genai



Extracting resume text...

Analyzing resume details...

Error parsing JSON response
=== Resume Details (JSON Format) ===

{
  "error": "Could not parse resume details"
}


In [3]:
import os
import PyPDF2
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env file
env_path = "/home/labuser/VSCODE_training/.env"
load_dotenv(env_path)

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    print("Error: GEMINI_API_KEY not found in .env file")
    exit(1)

genai.configure(api_key=api_key)

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF resume"""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def extract_resume_details(resume_text: str) -> str:
    """Extract resume details using Gemini"""
    model = genai.GenerativeModel("gemini-2.5-flash-lite")
    
    prompt = f"""Analyze the following resume and extract all details in a clear, organized format.

Resume Text:
{resume_text}

Extract and organize the following information:
- Personal Information (name, email, phone, location, linkedin, github)
- Professional Summary
- Work Experience (job title, company, duration, description)
- Education (degree, field, institution, graduation year, GPA)
- Skills
- Certifications
- Projects

Format the output in a clear, readable way."""
    
    response = model.generate_content(prompt)
    return response.text

# Main execution
resume_path = input("/home/labuser/Downloads/Stockholm-Resume-Template-Simple.pdf  ").strip()

if os.path.exists(resume_path):
    print("\nExtracting resume text...\n")
    resume_text = extract_text_from_pdf(resume_path)
    
    if resume_text:
        print("Extracting resume details...\n")
        resume_details = extract_resume_details(resume_text)
        
        print("=== Resume Details ===\n")
        print(resume_details)
    else:
        print("Could not extract text from PDF")
else:
    print("File not found. Please check the path.")


Extracting resume text...

Extracting resume details...

=== Resume Details ===

Here's a breakdown of Jason Miller's resume in a clear, organized format:

---

### **Personal Information**

*   **Name:** Jason Miller
*   **Email:** email@email.com
*   **Phone:** 3868683442
*   **Location:** 1515 Pacific Ave, Los Angeles, CA 90291, United States
*   **Place of Birth:** San Antonio
*   **Driving License:** Full Commercial Driver's License
*   **LinkedIn:** LinkedIn (link not provided)
*   **Pinterest:** Pinterest (link not provided)
*   **GitHub:** Not provided

---

### **Professional Summary**

Experienced Amazon Associate with five years' tenure in a shipping yard setting. Achieved an average picking/packing speed of 98%. Holds a zero error% score in adhering to packing specs and a 97% error-free ratio on packing records. Completed a certificate in Warehouse Sanitation and possesses a valid commercial driver's license.

---

### **Work Experience**

**1. Amazon Warehouse Associate**

In [None]:
import os
import json
import PyPDF2
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env file
env_path = "/home/labuser/VSCODE_training/.env"
load_dotenv(env_path)

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    print("Error: GEMINI_API_KEY not found in .env file")
    exit(1)

genai.configure(api_key=api_key)

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF resume"""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def extract_resume_details(resume_text: str) -> dict:
    """Extract resume details using Gemini and return as JSON"""
    model = genai.GenerativeModel("gemini-2.5-flash-lite")
    
    prompt = f"""Analyze the following resume text and extract all details in a structured JSON format.

Resume Text:
{resume_text}

Return ONLY a valid JSON object with the following structure (if any field is missing, use null):
{{
    "personal_info": {{
        "name": "string",
        "email": "string",
        "phone": "string",
        "location": "string",
        "linkedin": "string",
        "github": "string"
    }},
    "professional_summary": "string",
    "experience": [
        {{
            "job_title": "string",
            "company": "string",
            "duration": "string",
            "description": "string"
        }}
    ],
    "education": [
        {{
            "degree": "string",
            "field": "string",
            "institution": "string",
            "graduation_year": "string",
            "gpa": "string"
        }}
    ],
    "skills": [
        "skill1",
        "skill2"
    ],
    "certifications": [
        {{
            "name": "string",
            "issuer": "string",
            "date": "string"
        }}
    ],
    "projects": [
        {{
            "name": "string",
            "description": "string",
            "technologies": "string"
        }}
    ]
}}

Return ONLY the JSON, no other text."""
    
    response = model.generate_content(prompt)
    
    try:
        # Parse the response as JSON
        resume_json = json.loads(response.text)
        return resume_json
    except json.JSONDecodeError:
        print("Error parsing JSON response")
        return {"error": "Could not parse resume details"}

# Main execution
resume_path = input("Enter the path to your resume (PDF): ").strip()

if os.path.exists(resume_path):
    print("\nExtracting resume text...\n")
    resume_text = extract_text_from_pdf(resume_path)
    
    if resume_text:
        print("Extracting and analyzing resume details...\n")
        resume_details = extract_resume_details(resume_text)
        
        print("=== Resume Details (JSON Format) ===\n")
        print(json.dumps(resume_details, indent=2, ensure_ascii=False))
        
        # Optionally save to file
        save_option = input("\nSave to JSON file? (y/n): ").strip().lower()
        if save_option == 'y':
            output_file = "resume_details.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(resume_details, f, indent=2, ensure_ascii=False)
            print(f"Saved to {output_file}")
    else:
        print("Could not extract text from PDF")
else:
    print("File not found. Please check the path.")


Extracting resume text...

Extracting and analyzing resume details...

Error parsing JSON response
=== Resume Details (JSON Format) ===

{
  "error": "Could not parse resume details"
}


In [2]:
import os
import json
import PyPDF2
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env file
env_path = "/home/labuser/VSCODE_training/.env"
load_dotenv(env_path)

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    print("Error: GEMINI_API_KEY not found in .env file")
    exit(1)

genai.configure(api_key=api_key)

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF resume"""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def extract_resume_details(resume_text: str) -> dict:
    """Extract resume details using Gemini and convert to JSON"""
    model = genai.GenerativeModel("gemini-2.5-flash-lite")
    
    prompt = f"""Analyze the following resume and extract all details in JSON format.
Extract whatever information is available - don't force any fields.

Resume Text:
{resume_text}

Return a JSON object with the extracted details. Use the actual section names from the resume.
Return ONLY valid JSON, no other text."""
    
    response = model.generate_content(prompt)
    
    try:
        # Parse the response as JSON
        resume_json = json.loads(response.text)
        return resume_json
    except json.JSONDecodeError:
        print("Error parsing JSON response. Returning raw text as JSON.")
        return {"extracted_details": response.text}

# Main execution
resume_path = input("Enter the path to your resume (PDF): ").strip()

if os.path.exists(resume_path):
    print("\nExtracting resume text...\n")
    resume_text = extract_text_from_pdf(resume_path)
    
    if resume_text:
        print("Extracting and converting resume details to JSON...\n")
        resume_details = extract_resume_details(resume_text)
        
        print("=== Resume Details (JSON Format) ===\n")
        print(json.dumps(resume_details, indent=2, ensure_ascii=False))
        
        # Optionally save to file
        save_option = input("\nSave to JSON file? (y/n): ").strip().lower()
        if save_option == 'y':
            output_file = "resume_details.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(resume_details, f, indent=2, ensure_ascii=False)
            print(f"Saved to {output_file}")
    else:
        print("Could not extract text from PDF")
else:
    print("File not found. Please check the path.")


Extracting resume text...

Extracting and converting resume details to JSON...

Error parsing JSON response. Returning raw text as JSON.
=== Resume Details (JSON Format) ===

{
  "extracted_details": "```json\n{\n  \"basics\": {\n    \"name\": \"Jason Miller\",\n    \"label\": \"Amazon Associate\",\n    \"email\": \"email@email.com\",\n    \"phone\": \"3868683442\",\n    \"location\": {\n      \"address\": \"1515 Pacific Ave\",\n      \"city\": \"Los Angeles\",\n      \"state\": \"CA\",\n      \"postalCode\": \"90291\",\n      \"country\": \"United States\"\n    },\n    \"profiles\": [\n      {\n        \"network\": \"LinkedIn\",\n        \"url\": null\n      },\n      {\n        \"network\": \"Pinterest\",\n        \"url\": null\n      }\n    ]\n  },\n  \"work\": [\n    {\n      \"institution\": \"Amazon\",\n      \"area\": \"Amazon Warehouse Associate\",\n      \"location\": \"Miami Gardens\",\n      \"startDate\": \"2021-01\",\n      \"endDate\": \"2022-07\",\n      \"highlights\":

In [3]:
import os
import json
import sqlite3
import PyPDF2
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env file
env_path = "/home/labuser/VSCODE_training/.env"
load_dotenv(env_path)

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    print("Error: GEMINI_API_KEY not found in .env file")
    exit(1)

genai.configure(api_key=api_key)

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF resume"""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def extract_resume_details(resume_text: str) -> dict:
    """Extract resume details using Gemini and convert to JSON"""
    model = genai.GenerativeModel("gemini-2.5-flash-lite")
    
    prompt = f"""Analyze the following resume and extract all details in JSON format.
Extract whatever information is available - don't force any fields.

Resume Text:
{resume_text}

Return a JSON object with the extracted details. Use the actual section names from the resume.
Return ONLY valid JSON, no other text."""
    
    response = model.generate_content(prompt)
    
    try:
        resume_json = json.loads(response.text)
        return resume_json
    except json.JSONDecodeError:
        print("Error parsing JSON response. Returning raw text as JSON.")
        return {"extracted_details": response.text}

def create_database():
    """Create SQLite database and tables"""
    conn = sqlite3.connect("resume_database.db")
    cursor = conn.cursor()
    
    # Create main resume table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS resumes (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT,
            email TEXT,
            phone TEXT,
            location TEXT,
            linkedin TEXT,
            github TEXT,
            professional_summary TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')
    
    # Create experience table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS experience (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            job_title TEXT,
            company TEXT,
            duration TEXT,
            description TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create education table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS education (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            degree TEXT,
            field TEXT,
            institution TEXT,
            graduation_year TEXT,
            gpa TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create skills table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS skills (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            skill TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create certifications table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS certifications (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            name TEXT,
            issuer TEXT,
            date TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create projects table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS projects (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            name TEXT,
            description TEXT,
            technologies TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    conn.commit()
    conn.close()

def store_resume_in_database(resume_details: dict) -> int:
    """Store resume details in SQLite database"""
    conn = sqlite3.connect("resume_database.db")
    cursor = conn.cursor()
    
    # Extract personal info
    personal_info = resume_details.get("personal_info", {}) if isinstance(resume_details.get("personal_info"), dict) else {}
    name = personal_info.get("name")
    email = personal_info.get("email")
    phone = personal_info.get("phone")
    location = personal_info.get("location")
    linkedin = personal_info.get("linkedin")
    github = personal_info.get("github")
    professional_summary = resume_details.get("professional_summary")
    
    # Insert main resume data
    cursor.execute('''
        INSERT INTO resumes (name, email, phone, location, linkedin, github, professional_summary)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    ''', (name, email, phone, location, linkedin, github, professional_summary))
    
    resume_id = cursor.lastrowid
    
    # Insert experience
    experience = resume_details.get("experience", [])
    if isinstance(experience, list):
        for exp in experience:
            if isinstance(exp, dict):
                cursor.execute('''
                    INSERT INTO experience (resume_id, job_title, company, duration, description)
                    VALUES (?, ?, ?, ?, ?)
                ''', (resume_id, exp.get("job_title"), exp.get("company"), 
                      exp.get("duration"), exp.get("description")))
    
    # Insert education
    education = resume_details.get("education", [])
    if isinstance(education, list):
        for edu in education:
            if isinstance(edu, dict):
                cursor.execute('''
                    INSERT INTO education (resume_id, degree, field, institution, graduation_year, gpa)
                    VALUES (?, ?, ?, ?, ?, ?)
                ''', (resume_id, edu.get("degree"), edu.get("field"), 
                      edu.get("institution"), edu.get("graduation_year"), edu.get("gpa")))
    
    # Insert skills
    skills = resume_details.get("skills", [])
    if isinstance(skills, list):
        for skill in skills:
            cursor.execute('''
                INSERT INTO skills (resume_id, skill)
                VALUES (?, ?)
            ''', (resume_id, skill))
    
    # Insert certifications
    certifications = resume_details.get("certifications", [])
    if isinstance(certifications, list):
        for cert in certifications:
            if isinstance(cert, dict):
                cursor.execute('''
                    INSERT INTO certifications (resume_id, name, issuer, date)
                    VALUES (?, ?, ?, ?)
                ''', (resume_id, cert.get("name"), cert.get("issuer"), cert.get("date")))
    
    # Insert projects
    projects = resume_details.get("projects", [])
    if isinstance(projects, list):
        for proj in projects:
            if isinstance(proj, dict):
                cursor.execute('''
                    INSERT INTO projects (resume_id, name, description, technologies)
                    VALUES (?, ?, ?, ?)
                ''', (resume_id, proj.get("name"), proj.get("description"), proj.get("technologies")))
    
    conn.commit()
    conn.close()
    
    return resume_id

# Main execution
resume_path = input("Enter the path to your resume (PDF): ").strip()

if os.path.exists(resume_path):
    print("\nExtracting resume text...\n")
    resume_text = extract_text_from_pdf(resume_path)
    
    if resume_text:
        print("Extracting and converting resume details to JSON...\n")
        resume_details = extract_resume_details(resume_text)
        
        print("=== Resume Details (JSON Format) ===\n")
        print(json.dumps(resume_details, indent=2, ensure_ascii=False))
        
        # Create database and store data
        print("\n\nCreating database and storing resume details...\n")
        create_database()
        resume_id = store_resume_in_database(resume_details)
        print(f"Resume stored in database with ID: {resume_id}")
        
        # Optional: Save JSON to file
        save_option = input("\nSave JSON to file? (y/n): ").strip().lower()
        if save_option == 'y':
            output_file = "resume_details.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(resume_details, f, indent=2, ensure_ascii=False)
            print(f"Saved to {output_file}")
    else:
        print("Could not extract text from PDF")
else:
    print("File not found. Please check the path.")


Extracting resume text...

Extracting and converting resume details to JSON...

Error parsing JSON response. Returning raw text as JSON.
=== Resume Details (JSON Format) ===

{
  "extracted_details": "```json\n{\n  \"Personal Information\": {\n    \"Name\": \"Jason Miller\",\n    \"Address\": \"1515 Pacific Ave\\nLos Angeles, CA 90291\\nUnited States\",\n    \"Phone\": \"3868683442\",\n    \"Email\": \"email@email.com\",\n    \"Place of birth\": \"San Antonio\"\n  },\n  \"Profile\": \"Experienced Amazon Associate with five years’ tenure in a shipping yard setting, maintaining an average picking/packing speed of 98%. Holds a zero error% score in adhering to packing specs and 97% error-free ratio on packing records. Completed a certificate in Warehouse Sanitation and has a valid commercial driver’s license.\",\n  \"Employment History\": [\n    {\n      \"Title\": \"Amazon Warehouse Associate\",\n      \"Company\": \"Amazon\",\n      \"Location\": \"Miami Gardens\",\n      \"Dates\": \"J

In [None]:
/home/labuser/Downloads/Stockholm-Resume-Template-Simple.pdf

In [1]:
import os
import json
import sqlite3
import PyPDF2
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env file
env_path = "/home/labuser/VSCODE_training/.env"
load_dotenv(env_path)

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    print("Error: GEMINI_API_KEY not found in .env file")
    exit(1)

genai.configure(api_key=api_key)

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF resume"""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def extract_resume_details(resume_text: str) -> dict:
    """Extract resume details using Gemini and convert to JSON"""
    model = genai.GenerativeModel("gemini-2.5-flash-lite")
    
    prompt = f"""Analyze the following resume and extract all details in JSON format.
Extract whatever information is available - don't force any fields.

Resume Text:
{resume_text}

Return a JSON object with the extracted details. Use the actual section names from the resume.
Return ONLY valid JSON, no other text."""
    
    response = model.generate_content(prompt)
    
    try:
        resume_json = json.loads(response.text)
        return resume_json
    except json.JSONDecodeError:
        print("Error parsing JSON response. Returning raw text as JSON.")
        return {"extracted_details": response.text}

def create_database():
    """Create SQLite database and tables"""
    conn = sqlite3.connect("resume_database.db")
    cursor = conn.cursor()
    
    # Create main resume table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS resumes (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT,
            email TEXT,
            phone TEXT,
            location TEXT,
            linkedin TEXT,
            github TEXT,
            professional_summary TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')
    
    # Create experience table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS experience (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            job_title TEXT,
            company TEXT,
            duration TEXT,
            description TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create education table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS education (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            degree TEXT,
            field TEXT,
            institution TEXT,
            graduation_year TEXT,
            gpa TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create skills table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS skills (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            skill TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create certifications table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS certifications (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            name TEXT,
            issuer TEXT,
            date TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    # Create projects table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS projects (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            resume_id INTEGER,
            name TEXT,
            description TEXT,
            technologies TEXT,
            FOREIGN KEY (resume_id) REFERENCES resumes(id)
        )
    ''')
    
    conn.commit()
    conn.close()

def store_resume_in_database(resume_details: dict) -> int:
    """Store resume details in SQLite database"""
    conn = sqlite3.connect("resume_database.db")
    cursor = conn.cursor()
    
    # Extract personal info
    personal_info = resume_details.get("personal_info", {}) if isinstance(resume_details.get("personal_info"), dict) else {}
    name = personal_info.get("name")
    email = personal_info.get("email")
    phone = personal_info.get("phone")
    location = personal_info.get("location")
    linkedin = personal_info.get("linkedin")
    github = personal_info.get("github")
    professional_summary = resume_details.get("professional_summary")
    
    # Insert main resume data
    cursor.execute('''
        INSERT INTO resumes (name, email, phone, location, linkedin, github, professional_summary)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    ''', (name, email, phone, location, linkedin, github, professional_summary))
    
    resume_id = cursor.lastrowid
    
    # Insert experience
    experience = resume_details.get("experience", [])
    if isinstance(experience, list):
        for exp in experience:
            if isinstance(exp, dict):
                cursor.execute('''
                    INSERT INTO experience (resume_id, job_title, company, duration, description)
                    VALUES (?, ?, ?, ?, ?)
                ''', (resume_id, exp.get("job_title"), exp.get("company"), 
                      exp.get("duration"), exp.get("description")))
    
    # Insert education
    education = resume_details.get("education", [])
    if isinstance(education, list):
        for edu in education:
            if isinstance(edu, dict):
                cursor.execute('''
                    INSERT INTO education (resume_id, degree, field, institution, graduation_year, gpa)
                    VALUES (?, ?, ?, ?, ?, ?)
                ''', (resume_id, edu.get("degree"), edu.get("field"), 
                      edu.get("institution"), edu.get("graduation_year"), edu.get("gpa")))
    
    # Insert skills
    skills = resume_details.get("skills", [])
    if isinstance(skills, list):
        for skill in skills:
            cursor.execute('''
                INSERT INTO skills (resume_id, skill)
                VALUES (?, ?)
            ''', (resume_id, skill))
    
    # Insert certifications
    certifications = resume_details.get("certifications", [])
    if isinstance(certifications, list):
        for cert in certifications:
            if isinstance(cert, dict):
                cursor.execute('''
                    INSERT INTO certifications (resume_id, name, issuer, date)
                    VALUES (?, ?, ?, ?)
                ''', (resume_id, cert.get("name"), cert.get("issuer"), cert.get("date")))
    
    # Insert projects
    projects = resume_details.get("projects", [])
    if isinstance(projects, list):
        for proj in projects:
            if isinstance(proj, dict):
                cursor.execute('''
                    INSERT INTO projects (resume_id, name, description, technologies)
                    VALUES (?, ?, ?, ?)
                ''', (resume_id, proj.get("name"), proj.get("description"), proj.get("technologies")))
    
    conn.commit()
    conn.close()
    
    return resume_id

# Main execution
resume_path = input("Enter the path to your resume (PDF): ").strip()

if os.path.exists(resume_path):
    print("\nExtracting resume text...\n")
    resume_text = extract_text_from_pdf(resume_path)
    
    if resume_text:
        print("Extracting and converting resume details to JSON...\n")
        resume_details = extract_resume_details(resume_text)
        
        print("=== Resume Details (JSON Format) ===\n")
        print(json.dumps(resume_details, indent=2, ensure_ascii=False))
        
        # Create database and store data
        print("\n\nCreating database and storing resume details...\n")
        create_database()
        resume_id = store_resume_in_database(resume_details)
        print(f"Resume stored in database with ID: {resume_id}")
        
        # Optional: Save JSON to file
        save_option = input("\nSave JSON to file? (y/n): ").strip().lower()
        if save_option == 'y':
            output_file = "resume_details.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(resume_details, f, indent=2, ensure_ascii=False)
            print(f"Saved to {output_file}")
    else:
        print("Could not extract text from PDF")
else:
    print("File not found. Please check the path.")


All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  import google.generativeai as genai



Extracting resume text...

Extracting and converting resume details to JSON...

Error parsing JSON response. Returning raw text as JSON.
=== Resume Details (JSON Format) ===

{
  "extracted_details": "```json\n{\n  \"contact_information\": {\n    \"name\": \"Jason Miller\",\n    \"address\": \"1515 Pacific Ave\\nLos Angeles, CA 90291\\nUnited States\",\n    \"phone\": \"3868683442\",\n    \"email\": \"email@email.com\"\n  },\n  \"profile\": \"Experienced Amazon Associate with five years’ tenure in a shipping yard setting, maintaining an average picking/packing speed of 98%. Holds a zero error% score in adhering to packing specs and 97% error-free ratio on packing records. Completed a certificate in Warehouse Sanitation and has a valid commercial driver’s license.\",\n  \"employment_history\": [\n    {\n      \"title\": \"Amazon Warehouse Associate\",\n      \"company\": \"Amazon\",\n      \"location\": \"Miami Gardens\",\n      \"start_date\": \"January 2021\",\n      \"end_date\": \"

In [None]:
import os
import json
import sqlite3
import PyPDF2
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env file
env_path = "/home/labuser/VSCODE_training/.env"
load_dotenv(env_path)

api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    print("Error: GEMINI_API_KEY not found in .env file")
    exit(1)

genai.configure(api_key=api_key)

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF resume"""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def extract_resume_details(resume_text: str) -> dict:
    """Extract resume details using Gemini with proper JSON formatting"""
    model = genai.GenerativeModel("gemini-2.5-flash-lite")
    
    prompt = f"""Analyze the following resume and extract all details in a well-structured JSON format.
Be thorough and ensure all text is properly formatted and cleaned.

Resume Text:
{resume_text}

Return ONLY valid JSON with this exact structure:
{{
    "contact_information": {{
        "name": "string or null",
        "email": "string or null",
        "phone": "string or null",
        "address": "string or null",
        "linkedin": "string or null",
        "github": "string or null"
    }},
    "profile": "string or null",
    "employment_history": [
        {{
            "title": "string",
            "company": "string",
            "location": "string or null",
            "start_date": "string or null",
            "end_date": "string or null",
            "description": "string or null",
            "achievements": ["string"]
        }}
    ],
    "education": [
        {{
            "degree": "string",
            "institution": "string",
            "location": "string or null",
            "start_date": "string or null",
            "end_date": "string or null",
            "majors": ["string"],
            "minors": ["string"]
        }}
    ],
    "courses": [
        {{
            "title": "string",
            "institution": "string",
            "location": "string or null",
            "start_date": "string or null",
            "end_date": "string or null"
        }}
    ],
    "skills": ["string"],
    "certifications": ["string"],
    "licenses": ["string"],
    "languages": ["string"],
    "achievements": ["string"],
    "hobbies": ["string"],
    "additional_information": {{
        "place_of_birth": "string or null",
        "driving_license": "string or null",
        "links": ["string"]
    }}
}}

Ensure:
- All text is properly cleaned and formatted
- Empty arrays for missing list items
- null for missing individual fields
- Return ONLY the JSON, no markdown code blocks"""
    
    response = model.generate_content(prompt)
    
    try:
        # Try to parse the response directly
        resume_json = json.loads(response.text)
        return resume_json
    except json.JSONDecodeError:
        # If parsing fails, try to extract JSON from the response
        try:
            # Remove markdown code blocks if present
            cleaned_text = response.text.strip()
            if cleaned_text.startswith("```"):
                cleaned_text = cleaned_text.split("```")[1]
                if cleaned_text.startswith("json"):
                    cleaned_text = cleaned_text[4:]
            cleaned_text = cleaned_text.strip()
            resume_json = json.loads(cleaned_text)
            return resume_json
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON response: {e}")
            return {"error": "Could not parse resume details", "raw_response": response.text}

def validate_and_clean_json(data: dict) -> dict:
    """Validate and clean the JSON structure"""
    # Ensure all required top-level keys exist
    required_keys = [
        "contact_information", "profile", "employment_history",
        "education", "courses", "skills", "certifications",
        "licenses", "languages", "achievements", "hobbies",
        "additional_information"
    ]
    
    for key in required_keys:
        if key not in data:
            if key in ["employment_history", "education", "courses", "skills", 
                      "certifications", "licenses", "languages", "achievements", "hobbies"]:
                data[key] = []
            else:
                data[key] = {} if key != "profile" else None
    
    return data

# Main execution
resume_path = input("Enter the path to your resume (PDF): ").strip()

if os.path.exists(resume_path):
    print("\nExtracting resume text...\n")
    resume_text = extract_text_from_pdf(resume_path)
    
    if resume_text:
        print("Extracting and formatting resume details to JSON...\n")
        resume_details = extract_resume_details(resume_text)
        
        # Validate and clean the structure
        resume_details = validate_and_clean_json(resume_details)
        
        print("=== Resume Details (Formatted JSON) ===\n")
        print(json.dumps(resume_details, indent=2, ensure_ascii=False))
        
        # Save JSON to file
        output_file = "resume_details.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(resume_details, f, indent=2, ensure_ascii=False)
        print(f"\n✓ Saved to {output_file}")
    else:
        print("Could not extract text from PDF")
else:
    print("File not found. Please check the path.")


Extracting resume text...

Extracting and formatting resume details to JSON...

=== Resume Details (Formatted JSON) ===

{
  "contact_information": {
    "name": "Jason Miller",
    "email": "email@email.com",
    "phone": "3868683442",
    "address": "1515 Pacific Ave Los Angeles, CA 90291 United States",
    "linkedin": null,
    "github": null
  },
  "profile": "Experienced Amazon Associate with five years’ tenure in a shipping yard setting, maintaining an average picking/packing speed of 98%. Holds a zero error% score in adhering to packing specs and 97% error-free ratio on packing records. Completed a certificate in Warehouse Sanitation and has a valid commercial driver’s license.",
  "employment_history": [
    {
      "title": "Amazon Warehouse Associate",
      "company": "Amazon",
      "location": "Miami Gardens",
      "start_date": "January 2021",
      "end_date": "July 2022",
      "description": "Performed all warehouse laborer duties such as packing, picking, counting,