# Installing Required Libraries & Importing Modules

In [None]:
!pip install pytesseract pdf2image pillow json5 pandas tqdm psycopg2-binary python-dotenv transformers huggingface_hub torch accelerate datasets pymongo pymupdf pdfplumber PyPDF2 pdfminer.six

Collecting layoutparser
  Downloading layoutparser-0.3.4-py3-none-any.whl.metadata (7.7 kB)
Collecting iopath (from layoutparser)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath->layoutparser)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading layoutparser-0.3.4-py3-none-any.whl (19.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.2/19.2 MB[0m [31m106.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Building wheels for collected packages: iopath
  Building wheel for iopath (setup.py) ... [?25l[?25hdone
  Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31527 sha256=c0bf8fe8a36aac5802eecaf8cf7c53ceeaf783ddb3940979f7323e1f83f5f920
  Stored in dir

In [None]:
# Windows-compatible poppler installation
import platform
import os
import urllib.request
import zipfile
from pathlib import Path

if platform.system() == 'Windows':
    print("Setting up poppler for Windows...")
    
    # Check if poppler is already available
    poppler_path = r"C:\Program Files\poppler\bin"
    if not os.path.exists(poppler_path):
        print("Poppler not found in standard location. Checking environment...")
        # For Windows, we'll assume poppler-utils is installed or use pdf2image without it
        print("Note: If PDF processing fails, please install poppler-utils for Windows")
        print("Download from: https://github.com/oschwartz10612/poppler-windows/releases/")
    else:
        print(f"Poppler found at: {poppler_path}")
        # Add to PATH if not already there
        if poppler_path not in os.environ.get('PATH', ''):
            os.environ['PATH'] = poppler_path + ';' + os.environ.get('PATH', '')
            
print("Poppler setup complete")

In [3]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.5


In [54]:
import os
import pytesseract
from pdf2image import convert_from_path
from PIL import Image
import re
import json5
import pandas as pd
from tqdm import tqdm
import psycopg2
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from huggingface_hub import login
from pymongo import MongoClient
import json
import torch
import pdfplumber
import pytesseract
from pdf2image import convert_from_path
import logging

# Loading Environment Variables

In [None]:
load_dotenv()

# Hugging Face Token (optional)
hf_token = os.getenv('HF_TOKEN')
if hf_token:
    print("Hugging Face token loaded")
else:
    print("No Hugging Face token found - some features may be limited")

# MongoDB Configuration (optional)
mongo_host = os.getenv('MONGO_HOST')
mongo_port = os.getenv('MONGO_PORT')
mongo_db = os.getenv('MONGO_DB')
mongo_collection = os.getenv('MONGO_COLLECTION')

# PostgreSQL Configuration (with defaults)
postgres_host = os.getenv('POSTGRES_HOST', 'localhost')
postgres_port = os.getenv('POSTGRES_PORT', '5432')
postgres_db = os.getenv('POSTGRES_DB', 'pfe_db')
postgres_user = os.getenv('POSTGRES_USER', 'postgres')
postgres_password = os.getenv('POSTGRES_PASSWORD', '1986')

print(f"PostgreSQL config: {postgres_host}:{postgres_port}/{postgres_db}")
print(f"MongoDB config: {mongo_host}:{mongo_port}/{mongo_db} (collection: {mongo_collection})")

# Hugging Face Connection

In [None]:
# Hugging Face Login (optional)
if hf_token:
    try:
        login(hf_token)
        print("Hugging Face login successful")
    except Exception as e:
        print(f"Hugging Face login failed: {e}")
        print("Continuing without HF authentication - some features may be limited")
else:
    print("Skipping Hugging Face login - no token provided")

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


# MongoDB Connection

In [None]:
# MongoDB Connection with error handling
client = None
db = None
resumes = None

try:
    # Check if MongoDB variables are properly set
    if mongo_host and mongo_port and mongo_db and mongo_collection:
        print(f"Connecting to MongoDB at {mongo_host}:{mongo_port}")
        client = MongoClient(f"{mongo_host}:{mongo_port}")
        db = client[mongo_db]
        resumes = db[mongo_collection]
        print("MongoDB connection successful")
    else:
        print("MongoDB configuration incomplete. Skipping MongoDB connection.")
        print(f"MONGO_HOST: {mongo_host}, MONGO_PORT: {mongo_port}")
        print(f"MONGO_DB: {mongo_db}, MONGO_COLLECTION: {mongo_collection}")
except Exception as e:
    print(f"MongoDB connection failed: {e}")
    print("Continuing without MongoDB. Results will only be saved to PostgreSQL.")
    client = None
    db = None
    resumes = None

# Postgres Connection

In [18]:
def get_postgres_connection():
    try:
        conn = psycopg2.connect(
            dbname=postgres_db,
            user=postgres_user,
            password=postgres_password,
            host=postgres_host,
            port=postgres_port
        )
        print("✅ Connected to PostgreSQL database.")
        return conn
    except Exception as e:
        print(f"❌ Error connecting to PostgreSQL: {e}")
        return None

# Loading Model and Tokenizer

In [1]:
# Set the path where you want to store the model
drive_model_path = "/content/drive/MyDrive/PFE/ai_models/mixtral-8x7b"

# Check if model is already downloaded to Drive
if os.path.exists(drive_model_path):
    print("Loading model from Drive cache...")
    tokenizer = AutoTokenizer.from_pretrained(drive_model_path)
    model = AutoModelForCausalLM.from_pretrained(
        drive_model_path,
        device_map="auto"
    )
else:
    print("Downloading model from HuggingFace...")
    MODEL_NAME = "mistralai/Mixtral-8x7B-Instruct-v0.1"

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config,
        device_map="auto"
    )

    # Save to Drive for future use
    print("Saving model to Google Drive for reuse...")
    tokenizer.save_pretrained(drive_model_path)
    model.save_pretrained(drive_model_path)

# Set padding token if needed
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Create pipeline
chat_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    pad_token_id=tokenizer.pad_token_id,
    return_full_text=False
)


💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

💾 Available RAM: 2.4 GB
🖥️  GPU Available: False
⚡ System resources limited - using lightweight extraction
🪶 Loading lightweight extraction model (DistilBERT-based)...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


✅ Lightweight model (distilgpt2) loaded successfully!
📝 Note: Using rule-based extraction with AI assistance for better results

🎯 Using: Lightweight AI model (distilgpt2) + rule-based extraction


# Text extraction (OCR or PyMuPDF)

In [52]:
def extract_text(pdf_path):
    try:
        # Try PDFPlumber first (more accurate than PyMuPDF for many layouts)
        full_text = ""
        logging.getLogger("pdfminer").setLevel(logging.ERROR)
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    full_text += text + "\n"

        full_text = full_text.strip()

        # If text is empty or too short, fallback to OCR
        if not full_text or len(full_text) < 50:
            print("🔁 Switching to OCR (image-based or low-quality PDF)")
            images = convert_from_path(pdf_path)
            full_text = ""
            for i, img in enumerate(images):
                ocr_result = pytesseract.image_to_string(img)
                full_text += ocr_result + "\n"

        return full_text.strip()

    except Exception as e:
        print(f"❌ Error during text extraction: {e}")
        return None

# Clean up the text

In [44]:
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# Prompt creation


In [12]:
def create_prompt(resume_text):
    prompt = f"""
Extract structured data from the following resume text and return a valid JSON object with the fields:

- name, age, email, phone, personal_links (github, linkedin, portfolio, indeed, others), skills, languages, experiences (job_title, company, duration, description), projects (project_title, project_summary), education (degree, institution, year), certifications, years_of_experience.

Return only a **raw JSON object**. Do NOT include any explanation, markdown, or code block (like ```json or ```python). Just the JSON like this example (no comments or extra text):
Example:
{{
  "name": "Jane Doe",
  "age": 28,
  "email": "jane.doe@example.com",
  "phone": "+212612345678",
  "personal_links": {{
    "github": "https://github.com/janedoe",
    "linkedin": "https://linkedin.com/in/janedoe",
    "portfolio": "https://janedoe.dev",
    "indeed": "https://indeed.com/profile/janedoe",
    "others": ["https://behance.net/janedoe", "https://medium.com/@janedoe"]
  }},
  "skills": ["Python", "React", "MongoDB", "Docker", "problem solving", "communication"],
  "languages": ["English", "Arabic", "French"],
  "experiences": [
    {{
      "job_title": "AI Engineer",
      "company": "TechCorp",
      "duration": "Jan 2020 - Present",
      "description": "Led a team of engineers to develop NLP models for document parsing."
    }}
  ],
  "projects": [
    {{
      "project_title": "Resume Parser",
      "project_summary": "Developed a parser using Transformers and MongoDB."
    }}
  ],
  "education": [
    {{
      "degree": "BSc in Computer Engineering",
      "institution": "ENSA",
      "year": "2022"
    }}
  ],
  "certifications": ["AWS Certified ML Engineer"],
  "years_of_experience": 5
}}

Resume Text:
{resume_text}
"""
    return prompt

# Call Hugging Face model

In [2]:
# Call Hugging Face model
def call_AI_model(prompt):
    output = chat_pipeline(
        prompt,
        max_new_tokens=1200,
        do_sample=True,
        temperature=0.2,
        return_full_text=False
    )[0]['generated_text']
    return output

# Parse model response

In [14]:
def parse_AI_model_output(ai_output):
    try:
        json_str = ai_output.strip().strip('```json').strip('```').strip()
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"JSON parsing error: {e}")
        return None

# Save to MongoDB

In [15]:
def save_to_mongodb(resume):
    try:
        resumes.insert_one(resume)
        print("✅ Saved to MongoDB!")
    except Exception as e:
        print(f"❌ Error saving to MongoDB: {e}")

# Full resume processing

In [25]:
def process_resume(pdf_path, uuid=None):
    resume_text = extract_text(pdf_path)
    if not resume_text:
        print("❌ No text found in resume. Skipping.")
        return False

    resume_text = preprocess_text(resume_text)
    print("📄 Extracted and preprocessed text:", resume_text)
    prompt = create_prompt(resume_text)
    AI_model_output = call_AI_model(prompt)
    print("ai model output:", AI_model_output)
    parsed_data = parse_AI_model_output(AI_model_output)
    print("parsed data output:", parsed_data)

    if parsed_data:
        parsed_data['uuid'] = uuid
        save_to_mongodb(parsed_data)
        print(f"✅ Successfully processed and stored resume UUID {uuid}")
        return True
    else:
        print("❌ Failed to parse resume.")
        return False

# Process all resumes from Postgres database

In [None]:
def update_parsed_flag(resume_id):
    try:
        conn = get_postgres_connection()
        cursor = conn.cursor()
        cursor.execute("UPDATE resumes SET parsed = TRUE WHERE id = %s;", (resume_id,))
        conn.commit()
        conn.close()
        return True
    except Exception as e:
        print(f"❌ Failed to update resume ID {resume_id}: {e}")
        return False

In [19]:
def process_all_resumes_from_db():
    conn = get_postgres_connection()
    cursor = conn.cursor()

    cursor.execute("SELECT resume_id, uuid, storage_path FROM resumes WHERE parsed = FALSE;")

    failed_resumes = []
    rows = cursor.fetchall()

    if not rows:
        print("✅ No new resumes to process.")
        return

    for row in tqdm(rows, desc="Processing Resumes"):
        resume_id, uuid, file_path = row
        print(f"\n🔍 Processing resume ID {resume_id}: {file_path}")
        try:
            success = process_resume(file_path, uuid)
            if success:
                update_parsed_flag(resume_id)
        except Exception as e:
            print(f"❌ Unexpected error while processing {file_path}: {e}")
            failed_resumes.append(file_path)

    conn.close()

    if failed_resumes:
        print("\n⚠️ Failed to process the following resumes:")
        for path in failed_resumes:
            print(f" - {path}")

# Run the processing function

In [53]:
process_all_resumes_from_db()

✅ Connected to PostgreSQL database.





🔍 Processing resume ID 12: /content/drive/MyDrive/PFE/resumes_dataset/20.pdf




📄 Extracted and preprocessed text: CURRICULUM VITAE CONTACT INFORMATION: Surili Chawla +91-2382257665 chawlasurili@gmail.com EDUCATION: Qualification/Degree School/College University or Board Year Percent age/ CGPA Jaypee Institute of Jaypee Institute of 2019-2021 9.0 MTech (CSE with Information Information Technology specialization in Technology, Noida Data Analytics) Chitkara University 2014-2018 8.1 Bachelor of Chitkara School Of Engineering Engg. & Tech., (CSE) Himachal Pradesh WORK EXPERIENCE: Infosys Limited, Chandigarh (July,2021-Present) I worked on Data Visualization, Data Analytics, Observability, Monitoring and Alerting as a Systems Engineer. My role revolved around understanding the client requirements and creating dashboards using the streaming data from data sources like elastic search, Prometheus and Thanos. Identifying and selecting best representations/visualization in Grafana to depict the required information helped the client in making critical business decisions wi




🔍 Processing resume ID 23: /content/drive/MyDrive/PFE/resumes_dataset/18.pdf




📄 Extracted and preprocessed text: Farukh Sharma WORK EXPERIENCE Sabudh Foundation – Mohali, Punjab, India Data Science Internship July 2022 – Dec 2022 ● Worked as Data Scientist Intern with Sabudh Foundation, it was a 6-month program covering technologies like, Python Programming, MachineLearning – Regression, Classification, Clustering, Deep Learning – MLP, ANN,CNN, RNN, LSTM, NLP, RASA for Chatbot, TensorFlow Flow, Keras, NumPy, Pandas, Matplotlib, Scipy, Scikit Learn, DSA, EDA, Statistics and Probability. ● Worked on Hateful Meme Detection Project. Used 3 models for Hateful and non-hateful meme classification. Built CNN model from scratch or image classification, fine-tuned already trained models, and tested multimodel algorithms based on CNN+MLP, Auto Encoders, Vision Transformers, ResNet-50(based on residual networks), and BERT. ● Built News Recommender System for JhakaasNewsVala with the help of NLP and using libraries like NLTK, Gensim, etc. ● Worked on a Chabot using the Rasa 




🔍 Processing resume ID 24: /content/drive/MyDrive/PFE/resumes_dataset/3.pdf




📄 Extracted and preprocessed text: a A data scientist with 2+ years of experience in multiple projects and freelancing. I am fascinated by technology and like to keep myself updated about the latest events Technical Skills Interests Python Machine Learning algorithms Fundamentals of NLP Statistics and Probability uncover it. The world is written in the language of mathematics and as Python libraries a data scientist, I am fluent in it. With my experience of working with SQL machine learning models, I seek to add value to an organization which Tableau gives me an opportunity to work at the bleeding edge of technology. Power BI Google Colab, Professional Experience Anaconda Education Digz Placements I Data Scientist May 2022 - Present B. Tech in Mechanical Engineering Key responsibilities: Bahra University, Shimla hills • Developed Python scripts for data pre-processing,Exploratory Data analysis, feature extraction, Machine learning models to perform Certifications regression and time se




🔍 Processing resume ID 25: /content/drive/MyDrive/PFE/resumes_dataset/16.pdf
📄 Extracted and preprocessed text: SAGAR KURUVA : +91 1160601004 ~ kuruvasagar@gmail.com Data Scientist EXECUTIVE SUMMARY Having 6+ years of IT experience. Highly skilled in Machine learning, Deep Learning, Data Visualization and creative thinking to solve challenging business problems. Ability to work in different work environment and provide data driven solutions. Instrumental in building end-to-end data Science models with Data Acquisition, Transformation & modelling capabilities; Extensive exposure in building applications following MLOps maturity model as continuous integration & deployment. SIGNATURE SKILLS Machine Learning Algorithms MLOPS Knowledge on Big Data and Deep Neural Networks Python, R HADOOP ecosystem Data Visualization (Knowledge on Oracle,MY SQL,DB2, (HDFS, MAP REDUCE,YARN, Tableau) Snowflake,BrewDat. SPARK) Core Java,SQL, Data Structures and Spring ,Spring boot, Micro Natural Language Pr




🔍 Processing resume ID 26: /content/drive/MyDrive/PFE/resumes_dataset/14.pdf




📄 Extracted and preprocessed text: KANOHARAPV Machine Learning |Deep Learning | Image Processing | Computer Vision | NLP Ph.No- +11-7192698493 pvkanohara@gmail.com | linkedin.com/in/kanohar-p-v-477a52a3 Passionate about Machine learning cutting-edge technology and solving real world problems with positive work attitude, highly motivated, fast learner and a team player with goodtechnical and analytical skills to work independently as well as in team. Hands on experience in image classification, object detection, image processing, text classification& text extraction; possesses strong coding ability, adept at delivering enterprise grade, salable, secure & reliable software systems. EXPERIENCE Data Science Intern, Rubixe (09/ 2022 - Present) Key Responsibilities: • Exploratory data analysis • Feature Engineering • Feature scaling • Image pre-processing • Neural Network Building HVAC ENGINEER, Duracool aircondtioning (09-2020 -09/ 2022) Key Responsibilities: • Project estimation • client c




🔍 Processing resume ID 27: /content/drive/MyDrive/PFE/resumes_dataset/13.pdf




📄 Extracted and preprocessed text: https://github.com/gkapil801 WORK EXPERIENCE SKILL AI & ML ENGINEER Daily Code Solutions - Designed and implemented an efficient chatbot for DailyCodeSolution using OpenAI library for automating customer service. Utilized the library's fine-tuning model for performing various tasks including summarizing reviews, suggesting improvements, answering user questions, and writing chargeback emails. DATA SCIENCE 06\2022 - 3\2023 Alma Better - Trainee Proficient in Python, SQL, Regression, Classification, NLP, Clustering, Recommendation Systems, Time Series Analysis, ACHIEVEMENTS Data Cleaning, Feature Engineering and Data Visualization. Experienced in providing Subject Matter Expertise and solving data science queries for effective decisionmaking and problem- Python for Data Science, AI & Development solving. By - IBM ML Badge (2022) by LinkedIn Python for Data Science, AI & Development By - IBM JUNIOR ENGINEER 07\2020 - 05\2021 Chat-Bot NLP Model on Hugging




🔍 Processing resume ID 28: /content/drive/MyDrive/PFE/resumes_dataset/4.pdf




📄 Extracted and preprocessed text: development. of System Engineer. Implemented various Machine Learning Algorithms including Logistic Regression, Support Vector Machine, K-Fold • Cross Validation, Random Forest, K-Nearest Neighbor, and Artificial Neural Network to achieve optimal results. Achieved the highest accuracy in predictions by utilizing the Artificial Neural Network Algorithm. • Skills Certificates Certified - Azure AI Fundamentals (AI 900) (01/2023 - Present), Certified from Azure AWS Certified Cloud Practitioner (AWS CCP) (12/2022 - 12/2025), Certified from AWS Microso Certified - Azure Fundamentals (AZ 900) (11/2022 - Present), Certified from Azure Google Cloud Certified - Associate Cloud Engineer (GCP ACE) (10/2022 - 10/2025), Certified from GCP Machine Learning and Statistical Analysis Unit II (03/2020 - Present), Certified from World Quant University (WQU) Scientific Computing and Python for Data Science Unit I (12/2019 - Present), Certified from World Quant University 



ai model output: 
Name: John Doe
Email: johndoe@gmail.com
Phone: +212612345678

Personal Links:
Github: https://github.com/johndoe
Linkedin: https://linkedin.com/in/johndoe
Portfolio: https://johndoe.dev
Indeed: https://indeed.com/profile/johndoe

Skills:
Python, React, MongoDB, Docker, problem solving, communication

Languages:
English, Arabic

Experiences:
AI Engineer
TechCorp
Jan 2020 - Present
- Led a team of engineers to develop NLP models for document parsing.

Education:
BSc in Computer Engineering
ENSA
2022

Certifications:
AWS Certified ML Engineer

Years of Experience:
5

Projects:
Resume Parser
Developed a parser using Transformers and MongoDB.




















































































































































































































































































































































ai model output: 

```json
{
  "name": "Dr. Azam Rafique",
  "email": null,
  "phone": null,
  "personal_links": {
    "github": null,
    "linkedin": null,
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": [
    "Python",
    "Machine Learning",
    "Deep Learning",
    "Artificial Intelligence",
    "Tensorflow",
    "Keras",
    "Anaconda",
    "Docke",
    "OpenCV",
    "ROS",
    "PyTorch",
    "MATLAB",
    "Android",
    "Flutter",
    "HTML/ CSS / Bootstrap/ Javascript/ Jquery",
    "MySQL",
    "PHP",
    "Basic 4 Android",
    "Embedded System Design",
    "uP Board",
    "Raspberry Pi",
    "Arduino",
    "LabView",
    "Data Acquisition",
    "Image Processing & Signal Processing Toolboxes",
    "Final Year Projects",
    "Traffic Congestion and Web Development",
    "Full Stack Developer",
    "Communication",
    "File Sharing and Android App Development",
    "Information Dissemination in VANETs",
    "EXPERIENCE: (10+ YEARS)"
  ],
  "languages



✅ Saved to MongoDB!
✅ Successfully processed and stored resume ID 29
❌ Unexpected error while processing /content/drive/MyDrive/PFE/resumes_dataset/azam rafique_cv_master (1).pdf: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.


🔍 Processing resume ID 30: /content/drive/MyDrive/PFE/resumes_dataset/Rizwan chughtai CV.pdf
📄 Extracted and preprocessed text: Muhammad Rizwan Chughtai Father Name : Muhammad Asif Masood Email ID : Date of birth : Contact No : Domicile : CNIC No : Passport No : PEC No : Nationality : Address : Objective To work with an aim to gain valuable hands on experience and professional skills in an organization which provides a challenging, learning and developing atmosphere. Education BSc. Electrical Engineering CGPA : 3.79 UET Taxila, Pakistan 2010 - 2014 (Electronics) MSc. Electrical Engineering UET Taxila, Pakistan 2014 - 2018 CGPA : 3.00 (Communication System) CGPA : 3.50 PhD.



ai model output: 
Skills
- Machine Learning
- Deep Learning
- Neural Networks
- Computer Vision
- Data Science
- Data Analysis
- Data Mining
- Data Warehousing
- Data Modeling
- Data Visualization
- Data Preprocessing
- Data Wrangling
- Data Cleaning
- Data Transformation
- Data Migration
- Data Integration
- Database Design
- Database Administration
- Database Management
- Database Optimization
- Database Security
- Database Backup
- Database Recovery
- Database Migration
- Database Integration
- Database Design
- Database Administration
- Database Management
- Database Optimization
- Database Security
- Database Backup
- Database Recovery
- Database Migration
- Database Integration
- Database Design
- Database Administration
- Database Management
- Database Optimization
- Database Security
- Database Backup
- Database Recovery
- Database Migration
- Database Integration
- Database Design
- Database Administration
- Database Management
- Database Optimization
- Database Security
- Dat



📄 Extracted and preprocessed text: D A HAFER LMAKHLES ASSOCIATE PROFESSOR | SENIOR MEMBER, IEEE FACULTY MEMBER AUG 2016 - PRESENT COLLEGE OF ENGINEERING PRINCE SULTAN UNIVERSITY CHAIRMAN OF AUG 2019 - PRESENT ELECTRICAL ENGINEERING DEPARTMENT PRINCE SULTAN UNIVERSITY SS DIRECTOR OF MAY 2017 - PRESENT SCIENCE AND TECHNOLOGY UNIT AND INTELLECTUAL PROPERTY OFFICE PRINCE SULTAN UNIVERSITY FOUNDER AND LEADER JAN 2019 - PRESENT RENEWABLE ENERGY LABORATORY PRINCE SULTAN UNIVERSITY Education University of Auckland - Ranked 1st and Leading University in NZ Jan. 2011 – Sep. 2015 PhD in Electrical & Electronics Engineering Auckland, New Zealand Thesis: Two-level Dynamic Quantizers for Feedback Control Systems University of Auckland Jun. 2009 – July 2010 Master in Electrical & Electronics Engineering Auckland, New Zealand King Fahd University of Petroleum and Minerals Feb 2001 – Feb. 2006 Bachelor in Electrical Engineering Dhahran, Saudi Arabia Experience Chairman-Prince Sultan University Sep 2019



✅ Saved to MongoDB!
✅ Successfully processed and stored resume ID 31
❌ Unexpected error while processing /content/drive/MyDrive/PFE/resumes_dataset/Dhafer_CV_Nov2022.pdf: cursor already closed

🔍 Processing resume ID 32: /content/drive/MyDrive/PFE/resumes_dataset/CV-Dr-Amjad-Ali.pdf




📄 Extracted and preprocessed text: Dr. Amjad Ali Address Phone: E-mail: Citations (Google Scholar): Cumulative Impact Factor: Professional Member ACM No.: HEC Approved Supervisor From: COURSES TAUGHT: • Advanced Topics in Wireless Networks • Data Security and Encryption • Special Topics in Computer Networks • Information Security and Assurance • Advanced Topics in Computer Architecture • Network Security • Mobile Computing • Information Security • Networks Design and Analysis • Research Methodology in IT • Data Structure and Algorithms • Microprocessor and Computer Architecture • Operating Systems Concepts • Digital Logic Design • Communication and Embedded System ————————————————————————————– EDUCATION: Hamad Bin Khalifa University, Doha, Qatar Post-Doctoral Fellow at the College of Science & Engineering, Doha, Qatar May. 2022 - Present Korea University, Anam-dong, Seongbuk-gu, Seoul, South Korea Research Professor at Mobile Network and Communications Lab in the School of Electrical E



ai model output: 
{
  "name": "Dr. Amjad Ali",
  "email": null,
  "phone": null,
  "personal_links": {
    "github": null,
    "linkedin": null,
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": [
    "Cognitive Radio Networks",
    "Metaverse",
    "6G and Beyond Cellular Communication Networks",
    "Internet of Things",
    "Blockchain",
    "Haptic Communications & Tactile Internet",
    "Wireless Ad-Hoc & Sensor Networks",
    "Device-to-device",
    "Machine learning",
    "Deep Learning",
    "Smart Cities & Cloud Computing",
    "Image/video Processing",
    "Cyber Security/Network & Information Security"
  ],
  "languages": [],
  "experiences": [],
  "projects": [],
  "education": [
    {
      "degree": "Post-Doctoral Fellow",
      "institution": "Hamad Bin Khalifa University, Doha, Qatar",
      "year": "May 2022 - Present"
    },
    {
      "degree": "Research Professor",
      "institution": "Korea University, Anam-dong, Seongbuk-gu, Seoul, Sou



📄 Extracted and preprocessed text: NOUMAN ALI ELECTRICAL ENGINEER Education PhD ELECTRICAL ENGINEERING | ITU, Lahore Sep, 2021 – contd. (Course Work Complete) MS ELECTRICAL ENGINEERING | SEECS, NUST 2015 – 2017 | CGPA: 3.55/4.00 BS ELECTRICAL ENGINEERING | CUI, Lahore Campus 2011 – 2015 | CGPA: 3.56/4.00 (Silver Medalist) Experience RESEARCH OFFICER | National Institute of Electronics, ISB 12/2017 – 09/2021 (3 Years, 9 Months)  Supervision of project life-cycle from planning, design and procurement to implementation.  Designing and analyzing the schematics and PCBs of the projects.  Programming the embedded micro-controllers related to the project. RESEARCH ASSISTANT | SEECS, NUST 11/2016 – 10/2017 (11 Months)  R&D of Wi-Fi-based Smart Load Enabler device for making conventional sockets smart  Successful implementation of Client-Server Communication between multiple SLE devices  Human Presence-Based Automation of WISNET Lab at SEECS, NUST Research Publications  Muhammad Bilal Sa

Processing Resumes:  43%|████▎     | 12/28 [40:08<47:19, 177.48s/it]

ai model output: 

{
  "name": "NOUMAN ALI",
  "email": "",
  "phone": "",
  "personal_links": {
    "others": ["https://github.com/NOUMANALI", "https://linkedin.com/in/NOUMANALI"]
  },
  "skills": ["ELECTRICAL ENGINEERING", "RESEARCH", "PROGRAMMING", "MATLAB", "C/C++", "Python", "Raspberry Pi", "Arduino IDE", "STMCube IDE", "Eagle CAD", "PROTEUS", "Node Red", "Android Studio"],
  "languages": [],
  "experiences": [
    {
      "job_title": "RESEARCH OFFICER",
      "company": "National Institute of Electronics, ISB",
      "duration": "12/2017 – 09/2021 (3 Years, 9 Months)",
      "description": "Supervision of project life-cycle from planning, design and procurement to implementation. Designing and analyzing the schematics and PCBs of the projects. Programming the embedded micro-controllers related to the project."
    },
    {
      "job_title": "RESEARCH ASSISTANT",
      "company": "SEECS, NUST",
      "duration": "11/2016 – 10/2017 (11 Months)",
      "description": "R&D of Wi-Fi



📄 Extracted and preprocessed text: Shahid Mumtaz, Ph.D., FIET, SMIEEE, CEng. Email: Tel: Biography I am an IET Fellow, IEEE ComSoc, VTS, IAS and ACM Distinguished speaker, recipient of IEEE ComSoC Young Researcher Award, founder and EiC of IET "Journal of Quantum communication," EiC of Alexandria Engineering Journal – Elsevier, Vice-Chair: Europe/Africa Region- IEEE ComSoc: Green Communications & Computing society and Vice-chair for IEEE standard on P1932.1: Standard for Licensed/Unlicensed Spectrum Interoperability in Wireless Mobile Networks. His work resulted in technology transfer to companies and patented technology. His expertise lies in 5G/6G wireless technologies using AI/ML and Digital Twin(VR/XR) tools and innovation path towards industrial and academic. Moreover, he worked as Senior 5G Consultant at Huawei and InterDigital, contributing to RAN1/RAN2. He has more than 15 years of wireless industry/academic experience. He has received his Master's and Ph.D. degrees in Electric

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (32768). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.
Processing Resumes:  46%|████▋     | 13/28 [42:14<40:29, 161.99s/it]

ai model output: 
```json
{
  "name": "Shahid Mumtaz",
  "email": "shahid.mumtaz@ntu.ac.uk",
  "phone": "+212612345678",
  "personal_links": {
    "github": "https://github.com/shahidmumtaz",
    "linkedin": "https://www.linkedin.com/in/shahid-mumtaz-48987a12/",
    "portfolio": "https://shahidmumtaz.com",
    "indeed": "https://www.indeed.com/q-Shahid-Mumtaz-jobs.html",
    "others": [
      "https://scholar.google.com/citations?user=1PkQw8wAAAAJ&hl=en",
      "https://dblp.org/pid/177/6826.html"
    ]
  },
  "skills": [
    "Python",
    "React",
    "MongoDB",
    "Docker",
    "problem solving",
    "communication"
  ],
  "languages": [
    "English",
    "Arabic",
    "French"
  ],
  "experiences": [
    {
      "job_title": "AI Engineer",
      "company": "TechCorp",
      "duration": "Jan 2020 - Present",
      "description": "Led a team of engineers to develop NLP models for document parsing."
    }
  ],
  "projects": [
    {
      "project_title": "Resume Parser",
      "proje



📄 Extracted and preprocessed text: CHRISTOPER Summary Senior Web Developer specializing in front end development. MORGAN Experienced with all stages of the development cycle for dynamic web projects. Well-versed in numerous programming languages including HTML5, PHP OOP, JavaScript, CSS, MySQL. Strong background in project management and customer relations. Skill Highlights • Project management • Creative design • Strong decision maker • Innovative • Complex problem solver • Service-focused Experience Web Developer - 09/2015 to 05/2019 Luna Web Design, New York • Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. • Develop project concepts and maintain optimal workflow. Contact • Work with senior developer to manage large, complex Address: design projects for corporate clients. 177 Great Portland Street, • Complete detailed programming and development tasks London W5W 6PQ for front end public and internal websites as well as challeng

Processing Resumes:  50%|█████     | 14/28 [43:23<31:17, 134.08s/it]

ai model output: 

{
  "name": "Christoper M",
  "email": "christoper.m@gmail.com",
  "phone": "+44 (0)20 7666 8555",
  "personal_links": {
    "github": null,
    "linkedin": null,
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": [
    "project management",
    "creative design",
    "strong decision maker",
    "innovative",
    "complex problem solver",
    "service-focused"
  ],
  "languages": ["Spanish - C2", "Chinese - A1", "German - A2"],
  "experiences": [
    {
      "job_title": "Web Developer",
      "company": "Luna Web Design",
      "duration": "09/2015 to 05/2019",
      "description": "Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. Develop project concepts and maintain optimal workflow. Complete detailed programming and development tasks for front end public and internal websites as well as challenging back-end server code. Carry out quality assurance tests to discover errors and optimi



📄 Extracted and preprocessed text: Christopher Morgan Address: 177 Great Portland Street, London W5W 6PQ Phone: +44 (0)20 7666 8555 Email: christoper.morgan@gmail.com OBJECTIVE Provide analysis data support in a company as Data Analyst. WORK 04/2014 - 04/018 Data Analyst, GHT Company, Madrid Spain EXPERIENCE Responsibilities: • Establish operation strategy in a team for improving sales • Prepare data and information for making regular report data analysis • Perform data analysis for complex data and files 03/2012 – 05/2014 Data Analyst, Startup Corporation, Madrid Spain Responsibilities: • Composed Java program for interfacing with Oracle database • Performed data analysis especially financial data • Performed statistical data analysis using STATA • Showed data analysis in regular meetings for creating new program EDUCATION 2004 - 2008 Bachelor Degree of Computer Science, Technical University of Madrid 2002 - 2004 Certified as Data Analyst, Data Analyst Certification, Technical Univers

Processing Resumes:  54%|█████▎    | 15/28 [44:40<25:16, 116.63s/it]

ai model output: 

{
  "name": "Christopher Morgan",
  "email": "christoper.morgan@gmail.com",
  "phone": "+44 (0)20 7666 8555",
  "personal_links": {},
  "skills": ["data analysis", "Java", "Oracle", "STATA", "SPSS", "C++", "SQL", "HTML"],
  "languages": [],
  "experiences": [
    {
      "job_title": "Data Analyst",
      "company": "GHT Company",
      "duration": "04/2014 - 04/2018",
      "description": "Establish operation strategy in a team for improving sales, Prepare data and information for making regular report data analysis, Perform data analysis for complex data and files"
    },
    {
      "job_title": "Data Analyst",
      "company": "Startup Corporation",
      "duration": "03/2012 - 05/2014",
      "description": "Composed Java program for interfacing with Oracle database, Performed data analysis especially financial data, Performed statistical data analysis using STATA, Showed data analysis in regular meetings for creating new program"
    }
  ],
  "projects": [],
  

Processing Resumes:  57%|█████▋    | 16/28 [44:40<16:19, 81.65s/it] 

❌ Error during text extraction: Non-Ascii85 digit found: ~
❌ No text found in resume. Skipping.

🔍 Processing resume ID 38: /content/drive/MyDrive/PFE/resumes_dataset/29.pdf




📄 Extracted and preprocessed text: Christopher Summary Senior Web Developer specializing in front end development. Morgan Experienced with all stages of the development cycle for dynamic web projects. Well-versed in numerous programming languages including HTML5, PHP OOP, JavaScript, CSS, MySQL. Strong background in project management and customer relations. Skill Highlights • Project management • Creative design • Strong decision maker • Innovative • Complex problem • Service-focused solver Experience Contact Web Developer - 09/2015 to 05/2019 Address: Luna Web Design, New York 177 Great Portland Street, London • Cooperate with designers to create clean interfaces and W5W 6PQ simple, intuitive interactions and experiences. • Develop project concepts and maintain optimal Phone: workflow. +44 (0)20 7666 8555 • Work with senior developer to manage large, complex design projects for corporate clients. Email: • Complete detailed programming and development tasks christoper.m@gmail.com for 

Processing Resumes:  61%|██████    | 17/28 [46:01<14:56, 81.48s/it]

ai model output: 

{
  "name": "Christopher Summary",
  "email": "christoper.m@gmail.com",
  "phone": "+44 (0)20 7666 8555",
  "personal_links": {
    "github": null,
    "linkedin": "linkedin.com/christopher.morgan",
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": [
    "project management",
    "creative design",
    "strong decision maker",
    "innovative",
    "complex problem",
    "service-focused solver"
  ],
  "languages": [
    {
      "language": "Spanish",
      "level": "C2"
    },
    {
      "language": "Chinese",
      "level": "A1"
    },
    {
      "language": "German",
      "level": "A2"
    }
  ],
  "experiences": [
    {
      "job_title": "Web Developer",
      "company": "Luna Web Design",
      "duration": "09/2015 - 05/2019",
      "description": "Complete detailed programming and development tasks for front end public and internal websites as well as challenging back-end server code. Cooperate with designers to create clean inter



📄 Extracted and preprocessed text: Senior Web Developer specializing in front end development. Experienced with all stages of the development cycle for dynamic web projects. Well-versed in numerous programming languages including HTML5, PHP OOP, JavaScript, CSS, MySQL. Strong background in project management and customer relations. CHRISTOPHER MORGAN Experience Phone: +49 800 600 600 09/2015 to 05/2019 E-Mail: Web Developer - Luna Web Design, New York christoper.morgan@gmail.com • Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. Linkedin: • Develop project concepts and maintain optimal workflow. linkedin.com/christopher.morgan • Work with senior developer to manage large, complex design projects for corporate clients. • Complete detailed programming and development tasks Skill Highlights for front end public and internal websites as well as challenging back-end server code. • Skill Highlights • Carry out quality assurance tests to 

Processing Resumes:  64%|██████▍   | 18/28 [47:28<13:50, 83.10s/it]

ai model output: 

{
  "name": "Christopher Morgan",
  "email": "christoper.morgan@gmail.com",
  "phone": "+49 800 600 600 09",
  "personal_links": {
    "github": null,
    "linkedin": "linkedin.com/christopher.morgan",
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": [
    "HTML5",
    "PHP OOP",
    "JavaScript",
    "CSS",
    "MySQL",
    "project management",
    "customer relations",
    "quality assurance tests",
    "creative design",
    "innovative",
    "service-focused",
    "Spanish",
    "Chinese",
    "PHP Framework (certificate): Zend, Codeigniter, Symfony",
    "JavaScript",
    "HTML5",
    "PHP OOP",
    "CSS",
    "SQL",
    "MySQL"
  ],
  "languages": ["Spanish", "Chinese"],
  "experiences": [
    {
      "job_title": "Web Developer",
      "company": "Luna Web Design",
      "duration": "09/2015 to 05/2019",
      "description": "Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. Dev



📄 Extracted and preprocessed text: Elizabeth Holmes +1 (970) 333-3833 elizabeth.holmes@coolfreecv.com Store Manager equipped with extensive experience in automotive sales management. Employs excellent leadership skills and multi-tasking strengths. Demonstrated ability to improve store operations, increase top line sales, and reduce costs. Experience Highlights 09/2015 to 05/2019 • Results-oriented Store Manager • Revenue generation LUXURY CAR CENTER, New York • Business development • Motivate and coach employees to meet service, sales, • Effective marketing and repair goals. • Organisational capacity • Create and modify employee schedules with service • Operability and levels in mind. commitment • Recruit and hire top mechanics, service advisors, and • Ability to motivate staff sales people. and maintain good • Maintain detailed logs and reports of services relations performed, profit, and budget information. • Resistance to stress • Help out in sales and repair areas as needed and • G

Processing Resumes:  68%|██████▊   | 19/28 [48:30<11:31, 76.84s/it]

ai model output: 

{
  "name": "Elizabeth Holmes",
  "email": "elizabeth.holmes@coolfreecv.com",
  "phone": "+1 (970) 333-3833",
  "personal_links": {},
  "skills": ["automotive sales management", "leadership", "multi-tasking", "business development", "motivation", "marketing", "organizational capacity", "recruitment", "hiring", "inventory management"],
  "languages": [],
  "experiences": [
    {
      "job_title": "Store Manager",
      "company": "LUXURY CAR CENTER",
      "duration": "09/2015 to 05/2019",
      "description": "Revenue generation, business development, motivated and coached employees to meet service, sales, and repair goals. Created and modified employee schedules with service operability and levels in mind. Recruited and hired top mechanics, service advisors, and sales people. Maintained detailed logs and reports of services performed, profit, and budget information. Helped out in sales and repair areas as needed."
    }
  ],
  "projects": [],
  "education": [
    {



📄 Extracted and preprocessed text: Christopher Morgan Address: 177 Great Portland Street, London W5W 6PQ Phone: +44 (0)20 7666 8555 Email: christoper.morgan@gmail.com OBJECTIVE Provide analysis data support in a company as Data Analyst. WORK 04/2014 - 04/018 Data Analyst, GHT Company, Madrid Spain EXPERIENCE Responsibilities: • Establish operation strategy in a team for improving sales • Prepare data and information for making regular report data analysis • Perform data analysis for complex data and files 03/2012 – 05/2014 Data Analyst, Startup Corporation, Madrid Spain Responsibilities: • Composed Java program for interfacing with Oracle database • Performed data analysis especially financial data • Performed statistical data analysis using STATA • Showed data analysis in regular meetings for creating new program EDUCATION 2004 - 2008 Bachelor Degree of Computer Science, Technical University of Madrid 2002 - 2004 Certified as Data Analyst, Data Analyst Certification, Technical Univers

Processing Resumes:  71%|███████▏  | 20/28 [49:47<10:13, 76.71s/it]

ai model output: 

{
  "name": "Christopher Morgan",
  "email": "christoper.morgan@gmail.com",
  "phone": "+44 (0)20 7666 8555",
  "personal_links": {},
  "skills": ["data analysis", "Java", "Oracle", "STATA", "SPSS", "C++", "SQL", "HTML"],
  "languages": [],
  "experiences": [
    {
      "job_title": "Data Analyst",
      "company": "GHT Company",
      "duration": "04/2014 - 04/2018",
      "description": "Establish operation strategy in a team for improving sales, Prepare data and information for making regular report data analysis, Perform data analysis for complex data and files"
    },
    {
      "job_title": "Data Analyst",
      "company": "Startup Corporation",
      "duration": "03/2012 – 05/2014",
      "description": "Composed Java program for interfacing with Oracle database, Performed data analysis especially financial data, Performed statistical data analysis using STATA, Showed data analysis in regular meetings for creating new program"
    }
  ],
  "projects": [],
  



📄 Extracted and preprocessed text: Christopher Summary Senior Web Developer specializing in front end development. Morgan Experienced with all stages of the development cycle for dynamic web projects. Well-versed in numerous programming languages including HTML5, PHP OOP, JavaScript, CSS, MySQL. Strong background in project management and customer relations. Skill Highlights • Project management • Creative design • Strong decision maker • Innovative • Complex problem • Service-focused solver Experience Contact Web Developer - 09/2015 to 05/2019 Address: Luna Web Design, New York 177 Great Portland Street, London • Cooperate with designers to create clean interfaces and W5W 6PQ simple, intuitive interactions and experiences. • Develop project concepts and maintain optimal Phone: workflow. +44 (0)20 7666 8555 • Work with senior developer to manage large, complex design projects for corporate clients. Email: • Complete detailed programming and development tasks christoper.m@gmail.com for 

Processing Resumes:  75%|███████▌  | 21/28 [53:24<13:53, 119.02s/it]

ai model output: 
Resume Text:
Christopher Summary Senior Web Developer specializing in front end development. Morgan Experienced with all stages of the development cycle for dynamic web projects. Well-versed in numerous programming languages including HTML5, PHP OOP, JavaScript, CSS, MySQL. Strong background in project management and customer relations. Skill Highlights • Project management • Creative design • Strong decision maker • Innovative • Complex problem • Service-focused solver Experience Contact Web Developer - 09/2015 to 05/2019 Address: Luna Web Design, New York 177 Great Portland Street, London • Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. • Develop project concepts and maintain optimal workflow. Phone: +44 (0)20 7666 8555 • Work with senior developer to manage large, complex design projects for corporate clients. Email: [christoper.m@gmail.com](mailto:christoper.m@gmail.com) LinkedIn: linkedin.com/christopher.mo



📄 Extracted and preprocessed text: Elizabeth Holmes +1 (970) 333-3833 elizabeth.holmes@coolfreecv.com Store Manager equipped with extensive experience in automotive sales management. Employs excellent leadership skills and multi-tasking strengths. Demonstrated ability to improve store operations, increase top line sales, and reduce costs. Experience Highlights 09/2015 to 05/2019 • Results-oriented Store Manager • Revenue generation LUXURY CAR CENTER, New York • Business development • Motivate and coach employees to meet service, sales, • Effective marketing and repair goals. • Organisational capacity • Create and modify employee schedules with service • Operability and levels in mind. commitment • Recruit and hire top mechanics, service advisors, and • Ability to motivate staff sales people. and maintain good • Maintain detailed logs and reports of services relations performed, profit, and budget information. • Resistance to stress • Help out in sales and repair areas as needed and • G

Processing Resumes:  79%|███████▊  | 22/28 [54:22<10:04, 100.73s/it]

ai model output: 

{
  "name": "Elizabeth Holmes",
  "email": "elizabeth.holmes@coolfreecv.com",
  "phone": "+1 (970) 333-3833",
  "personal_links": {},
  "skills": ["automotive sales management", "leadership", "multi-tasking"],
  "languages": [],
  "experiences": [
    {
      "job_title": "Store Manager",
      "company": "LUXURY CAR CENTER",
      "duration": "09/2015 to 05/2019",
      "description": "Revenue generation, business development, motivate and coach employees to meet service, sales, and repair goals. Create and modify employee schedules with service operability and levels in mind. Recruit and hire top mechanics, service advisors, and sales people. Maintain detailed logs and reports of services performed, profit, and budget information. Help out in sales and repair areas as needed and maintain comprehensive current knowledge of operations."
    }
  ],
  "projects": [],
  "education": [
    {
      "degree": "Bachelor of Science: Automotive Store Manager Technology",
    

Processing Resumes:  82%|████████▏ | 23/28 [55:27<07:29, 89.85s/it] 

ai model output: 

{
  "name": "John W. Smith",
  "email": "jwsmith@colostate.edu",
  "skills": [],
  "languages": [],
  "experiences": [
    {
      "job_title": "Counseling Supervisor",
      "company": "The Wesley Center",
      "duration": "1999-2002",
      "description": "Determined work placement for 150 special needs adult clients. Maintained client databases and records. Coordinated client contact with local health care professionals on a monthly basis. Managed 25 volunteer workers."
    },
    {
      "job_title": "Client Specialist",
      "company": "Rainbow Special Care Center",
      "duration": "1997-1999",
      "description": "Coordinated service assignments for 20 part-time counselors and 100 client families. Oversaw daily activity and outing planning for 100 clients. Assisted families of special needs clients with researching financial assistance and healthcare. Assisted teachers with managing daily classroom activities. Oversaw daily and special student activities."



📄 Extracted and preprocessed text: James Carter Data Engineer +1 (555) 123-4567 | james.carter@datamail.com Profile Data Engineer with 5+ years of experience in designing, developing, and maintaining robust data pipelines, ETL processes, and data warehouses. Proficient in SQL, Python, and big data technologies like Hadoop and Spark. Strong analytical skills with a focus on optimizing data processes for performance and scalability. Experience 07/2019 to Present Data Engineer - Tech Solutions, San Francisco, CA - Designed and implemented scalable ETL processes for data integration. - Developed data pipelines using Apache Spark and Hadoop ecosystems. - Optimized SQL queries and data models for performance improvements. - Collaborated with data scientists to ensure data accuracy and accessibility. 06/2015 to 06/2019 Junior Data Engineer - DataCorp, New York, NY - Assisted in building data pipelines and data warehouses. - Maintained and monitored ETL jobs, ensuring data quality and reliabil

Processing Resumes:  86%|████████▌ | 24/28 [56:53<05:54, 88.68s/it]

ai model output: 

{
  "name": "James Carter",
  "email": "james.carter@datamail.com",
  "phone": "+1 (555) 123-4567",
  "personal_links": {
    "github": null,
    "linkedin": null,
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": [
    "Python",
    "SQL",
    "Hadoop",
    "Apache Spark",
    "Redshift",
    "Snowflake",
    "Apache NiFi",
    "Talend",
    "AWS",
    "GCP",
    "MySQL",
    "PostgreSQL"
  ],
  "languages": [],
  "experiences": [
    {
      "job_title": "Data Engineer",
      "company": "Tech Solutions",
      "duration": "07/2019 to Present",
      "description": "Designed and implemented scalable ETL processes for data integration. Developed data pipelines using Apache Spark and Hadoop ecosystems. Optimized SQL queries and data models for performance improvements. Collaborated with data scientists to ensure data accuracy and accessibility."
    },
    {
      "job_title": "Junior Data Engineer",
      "company": "DataCorp",
      "dura



📄 Extracted and preprocessed text: Senior Web Developer specializing in front end development. Experienced with all stages of the development cycle for dynamic web projects. Well-versed in numerous programming languages including HTML5, PHP OOP, JavaScript, CSS, MySQL. Strong background in project management and customer relations. CHRISTOPHER MORGAN Experience Phone: +49 800 600 600 09/2015 to 05/2019 E-Mail: Web Developer - Luna Web Design, New York christoper.morgan@gmail.com • Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. Linkedin: • Develop project concepts and maintain optimal workflow. linkedin.com/christopher.morgan • Work with senior developer to manage large, complex design projects for corporate clients. • Complete detailed programming and development tasks Skill Highlights for front end public and internal websites as well as challenging back-end server code. • Skill Highlights • Carry out quality assurance tests to 

Processing Resumes:  89%|████████▉ | 25/28 [57:57<04:04, 81.36s/it]

ai model output: 

{
  "name": "Christopher Morgan",
  "email": "christoper.morgan@gmail.com",
  "phone": "+49 800 600 600 09",
  "personal_links": {
    "linkedin": "linkedin.com/christopher.morgan"
  },
  "skills": ["HTML5", "PHP OOP", "JavaScript", "CSS", "MySQL", "project management", "customer relations", "quality assurance", "creative design", "service-focused"],
  "languages": ["Spanish", "Chinese"],
  "experiences": [
    {
      "job_title": "Web Developer",
      "company": "Luna Web Design",
      "duration": "09/2015 - 05/2019",
      "description": "Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. Develop project concepts and maintain optimal workflow. Work with senior developer to manage large, complex design projects for corporate clients. Complete detailed programming and development tasks. Carry out quality assurance tests to discover errors and optimize usability."
    }
  ],
  "projects": [],
  "education": [
   



📄 Extracted and preprocessed text: Sophia Williams Machine Learning Engineer +1 (555) 987-6543 | sophia.williams@mlmail.com Profile Machine Learning Engineer with 4+ years of experience in developing and deploying machine learning models for predictive analytics, natural language processing, and computer vision applications. Expert in Python, TensorFlow, and scikit-learn with a solid understanding of data preprocessing, model training, and model optimization techniques. Experience 08/2020 to Present Machine Learning Engineer - AI Innovations, Austin, TX - Developed and deployed machine learning models for customer behavior prediction. - Implemented NLP models for text classification and sentiment analysis. - Collaborated with data scientists to optimize models for performance and accuracy. - Managed and preprocessed large datasets using Pandas and NumPy. 07/2017 to 07/2020 Data Scientist - Data Insights, Boston, MA - Built and validated machine learning models for predictive analytics.

Processing Resumes:  93%|█████████▎| 26/28 [59:33<02:51, 85.72s/it]

ai model output: 

{
  "name": "Sophia Williams",
  "email": "sophia.williams@mlmail.com",
  "phone": "+1 (555) 987-6543",
  "personal_links": {
    "github": null,
    "linkedin": null,
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": [
    "Python",
    "R",
    "TensorFlow",
    "PyTorch",
    "scikit-learn",
    "Pandas",
    "NumPy",
    "Docker",
    "Kubernetes",
    "AWS",
    "Azure",
    "MySQL",
    "MongoDB"
  ],
  "languages": [],
  "experiences": [
    {
      "job_title": "Machine Learning Engineer",
      "company": "AI Innovations",
      "duration": "08/2020 to Present",
      "description": "Developed and deployed machine learning models for customer behavior prediction. Implemented NLP models for text classification and sentiment analysis. Collaborated with data scientists to optimize models for performance and accuracy. Managed and preprocessed large datasets using Pandas and NumPy."
    },
    {
      "job_title": "Data Scientist",
    

Processing Resumes:  96%|█████████▋| 27/28 [1:00:27<01:16, 76.06s/it]

ai model output: 

{
  "name": "John W. Smith",
  "email": "jwsmith@colostate.edu",
  "skills": [],
  "experiences": [
    {
      "job_title": "Counseling Supervisor",
      "company": "The Wesley Center",
      "duration": "1999-2002",
      "description": ""
    },
    {
      "job_title": "Client Specialist",
      "company": "Rainbow Special Care Center",
      "duration": "1997-1999",
      "description": ""
    },
    {
      "job_title": "Teacher’s Assistant",
      "company": "Cowell Elementary",
      "duration": "1996-1997",
      "description": ""
    }
  ],
  "education": [
    {
      "degree": "BS in Early Childhood Development",
      "institution": "University of Arkansas at Little Rock",
      "year": "1999"
    },
    {
      "degree": "BA in Elementary Education",
      "institution": "University of Arkansas at Little Rock",
      "year": "1998"
    }
  ],
  "years_of_experience": 4
}
parsed data output: {'name': 'John W. Smith', 'email': 'jwsmith@colostate.edu', 's



📄 Extracted and preprocessed text: CHRISTOPER Summary Senior Web Developer specializing in front end development. MORGAN Experienced with all stages of the development cycle for dynamic web projects. Well-versed in numerous programming languages including HTML5, PHP OOP, JavaScript, CSS, MySQL. Strong background in project management and customer relations. Skill Highlights • Project management • Creative design • Strong decision maker • Innovative • Complex problem solver • Service-focused Experience Web Developer - 09/2015 to 05/2019 Luna Web Design, New York • Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. • Develop project concepts and maintain optimal workflow. Contact • Work with senior developer to manage large, complex Address: design projects for corporate clients. 177 Great Portland Street, • Complete detailed programming and development tasks London W5W 6PQ for front end public and internal websites as well as challeng

Processing Resumes: 100%|██████████| 28/28 [1:01:33<00:00, 131.90s/it]

ai model output: 

{
  "name": "CHRISTOPER",
  "email": "christoper.m@gmail.com",
  "phone": "+44 (0)20 7666 8555",
  "personal_links": {
    "github": null,
    "linkedin": null,
    "portfolio": null,
    "indeed": null,
    "others": []
  },
  "skills": ["project management", "creative design", "strong decision maker", "innovative", "complex problem solver", "service-focused"],
  "languages": ["Spanish - C2", "Chinese - A1", "German - A2"],
  "experiences": [
    {
      "job_title": "Web Developer",
      "company": "Luna Web Design",
      "duration": "09/2015 to 05/2019",
      "description": "Cooperate with designers to create clean interfaces and simple, intuitive interactions and experiences. Develop project concepts and maintain optimal workflow. Complete detailed programming and development tasks for front end public and internal websites as well as challenging back-end server code. Carry out quality assurance tests to discover errors and optimize usability."
    }
  ],
  "p


