In [2]:
# Install required packages for local environment
# Note: Run this only once, then comment out or skip
import subprocess
import sys

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Uncomment and run if packages are not installed
install_package("spacy==3.7.5")
install_package("scikit-learn")
install_package("tqdm")
install_package("psutil")

print("Packages installation complete. Comment out the installation lines after first run.")

Packages installation complete. Comment out the installation lines after first run.



This notebook has been adapted to run in your local environment instead of Google Colab.

## Setup Instructions
1. Make sure you have activated your virtual environment
2. Install required packages using the cell below

In [3]:
# Download spaCy English model (run once)
import subprocess
import sys

try:
    import spacy
    nlp = spacy.load('en_core_web_sm')
    print("‚úÖ spaCy English model already installed")
except OSError:
    print("üì• Downloading spaCy English model...")
    subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
    print("‚úÖ spaCy English model installed successfully")



‚úÖ spaCy English model already installed


In [5]:
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm
import json

In [6]:
spacy.__version__

'3.7.5'

In [9]:
# Check system resources (adapted for local environment)
import os
import subprocess
import sys

# Install psutil if not available
try:
    import psutil
except ImportError:
    print("üì• Installing psutil...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "psutil"])
    import psutil

print("System Information:")
print(f"CPU Cores: {psutil.cpu_count()}")
print(f"RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB")
print(f"Available RAM: {psutil.virtual_memory().available / (1024**3):.1f} GB")

# Check if NVIDIA GPU is available (optional)
try:
    import torch
    if torch.cuda.is_available():
        print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")
        print(f"   GPU Memory: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.1f} GB")
    else:
        print("üíª GPU: Not available - using CPU (this is fine for resume parsing)")
except ImportError:
    print("üíª GPU: PyTorch not installed - using CPU (this is fine for resume parsing)")
    print("üí° PyTorch is optional. Uncomment the line below to install it if needed:")
    print("   # subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'torch'])")
    
    # Uncomment the next line if you want to install PyTorch
    # subprocess.check_call([sys.executable, "-m", "pip", "install", "torch"])

print("\nüéØ System is ready for resume parsing tasks!")

System Information:
CPU Cores: 8
RAM: 15.7 GB
Available RAM: 1.4 GB
üíª GPU: PyTorch not installed - using CPU (this is fine for resume parsing)
üí° PyTorch is optional. Uncomment the line below to install it if needed:
   # subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'torch'])

üéØ System is ready for resume parsing tasks!


In [10]:
# Load training data from local file system
import json
import os

# Update this path to point to your local data file
# For now, let's create a sample training data structure
training_data_path = 'data/training/train_data.json'

# Check if the file exists, if not create sample data
if os.path.exists(training_data_path):
    cv_data = json.load(open(training_data_path, 'r'))
    print(f"‚úÖ Loaded training data from {training_data_path}")
else:
    print("‚ö†Ô∏è Training data file not found. Creating sample data structure...")
    # Create sample training data format
    cv_data = [
        [
            "John Doe is a Software Engineer at Microsoft with 5 years of experience in Python development.",
            {
                "entities": [
                    (0, 8, "PERSON"),
                    (14, 31, "DESIGNATION"), 
                    (35, 44, "COMPANY"),
                    (50, 56, "EXPERIENCE"),
                    (71, 77, "SKILLS")
                ]
            }
        ],
        [
            "Jane Smith worked as Data Scientist at Google for 3 years specializing in Machine Learning.",
            {
                "entities": [
                    (0, 10, "PERSON"),
                    (21, 35, "DESIGNATION"),
                    (39, 45, "COMPANY"), 
                    (50, 56, "EXPERIENCE"),
                    (72, 88, "SKILLS")
                ]
            }
        ]
    ]
    
    # Create directory if it doesn't exist
    os.makedirs('data/training', exist_ok=True)
    
    # Save sample data
    with open(training_data_path, 'w') as f:
        json.dump(cv_data, f, indent=2)
    
    print(f"‚úÖ Created sample training data at {training_data_path}")

print(f"üìä Total training examples: {len(cv_data)}")

‚úÖ Loaded training data from data/training/train_data.json
üìä Total training examples: 2


In [11]:
len(cv_data)

2

In [12]:
# Create spaCy configuration file for local training
import os

# Create the base config content
base_config_content = """
[system]
gpu_allocator = null

[nlp]
lang = "en"
pipeline = ["tok2vec","ner"]
batch_size = 1000
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"@tokenizers": "spacy.Tokenizer.v1"}

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v2"

[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = 96
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
rows = [5000,1000,2500,2500]
include_static_vectors = false

[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = 96
depth = 4
window_size = 1
maxout_pieces = 3

[components.ner]
factory = "ner"
incorrect_spans_key = null
moves = null
scorer = {"@scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100

[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = true
nO = null

[components.ner.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = 96
upstream = "*"

[corpora]

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = "data/training/test_data.spacy"
max_length = 0
gold_preproc = false
limit = 0
augmenter = null

[corpora.train]
@readers = "spacy.Corpus.v1"
path = "data/training/train_data.spacy"
max_length = 0
gold_preproc = false
limit = 0
augmenter = null

[training]
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
seed = 0
gpu_allocator = null
dropout = 0.1
accumulate_gradient = 1
patience = 1600
max_epochs = 0
max_steps = 20000
eval_frequency = 200
frozen_components = []
annotating_components = []
before_to_disk = null

[training.batcher]
@batchers = "spacy.batch_by_words.v1"
discard_oversize = false
tolerance = 0.2
get_length = null

[training.batcher.size]
@schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
t = 0.0

[training.logger]
@loggers = "spacy.ConsoleLogger.v1"
progress_bar = false

[training.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
learn_rate = 0.001

[training.score_weights]
ents_f = 1.0
ents_p = 0.0
ents_r = 0.0
ents_per_type = null

[pretraining]

[initialize]
vectors = null
init_tok2vec = null
vocab_data = null
lookups = null
before_init = null
after_init = null

[initialize.components]

[initialize.tokenizer]
"""

# Create directories
os.makedirs('data/training', exist_ok=True)
os.makedirs('data/output', exist_ok=True)

# Save the config file
config_path = 'data/training/config.cfg'
with open(config_path, 'w') as f:
    f.write(base_config_content.strip())

print(f"‚úÖ Created spaCy config file at {config_path}")

‚úÖ Created spaCy config file at data/training/config.cfg


In [13]:
cv_data[0]

['John Doe is a Software Engineer at Microsoft with 5 years of experience in Python development.',
 {'entities': [[0, 8, 'PERSON'],
   [14, 31, 'DESIGNATION'],
   [35, 44, 'COMPANY'],
   [50, 56, 'EXPERIENCE'],
   [71, 77, 'SKILLS']]}]

In [14]:
def get_spacy_docs(file, data):
  nlp = spacy.blank('en')
  db = DocBin()

  for text, annot in tqdm(data):
    doc = nlp.make_doc(text)
    annot = annot['entities']

    ents = []
    entity_indices = []

    for start, end, label in annot:
      skip_entity = False
      for idx in range(start, end):
        if idx in entity_indices:
          skip_entity = True
          break
      if skip_entity == True:
        continue

      entity_indices = entity_indices + list(range(start, end))

      try:
        span = doc.char_span(start, end, label=label, alignment_mode='strict')
      except:
          continue

      if span is None:
        err_data = str([start, end]) + "    " + str(text)+ "\n"
        file.write(err_data)

      else:
        ents.append(span)

    try:
      doc.ents = ents
      db.add(doc)
    except:
      pass


  return db

In [15]:
# Install and import scikit-learn for data splitting
import subprocess
import sys

try:
    from sklearn.model_selection import train_test_split
    print("‚úÖ scikit-learn already available")
except ImportError:
    print("üì• Installing scikit-learn...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "scikit-learn"])
    from sklearn.model_selection import train_test_split
    print("‚úÖ scikit-learn installed and imported successfully")

# Split the data into training and testing sets
train, test = train_test_split(cv_data, test_size=0.3, random_state=42)
print(f"üìä Data split complete: {len(train)} training, {len(test)} testing examples")

‚úÖ scikit-learn already available
üìä Data split complete: 1 training, 1 testing examples


In [16]:
len(train), len(test)

(1, 1)

In [17]:
# Create spaCy training files from the processed data
import os

# Create directories if they don't exist
os.makedirs('data/training', exist_ok=True)

# Create error log file
error_file_path = 'data/training/error.txt'
with open(error_file_path, 'w') as file:
    
    print("üîÑ Processing training data...")
    # Process training data
    db_train = get_spacy_docs(file, train)
    train_data_path = 'data/training/train_data.spacy'
    db_train.to_disk(train_data_path)
    print(f"‚úÖ Training data saved to: {train_data_path}")
    
    print("üîÑ Processing test data...")
    # Process test data
    db_test = get_spacy_docs(file, test)
    test_data_path = 'data/training/test_data.spacy'
    db_test.to_disk(test_data_path)
    print(f"‚úÖ Test data saved to: {test_data_path}")

print(f"üìù Error log saved to: {error_file_path}")
print(f"üéØ Ready for model training with {len(train)} training and {len(test)} test examples")

üîÑ Processing training data...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 74.82it/s]



‚úÖ Training data saved to: data/training/train_data.spacy
üîÑ Processing test data...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 334.18it/s]

‚úÖ Test data saved to: data/training/test_data.spacy
üìù Error log saved to: data/training/error.txt
üéØ Ready for model training with 1 training and 1 test examples





In [19]:
# Check the number of tokens in the test dataset
# Note: db_test was created in the previous cell within the file context
# Let's recreate it to check token count

try:
    # Load the saved test data to check token count
    from spacy.tokens import DocBin
    
    test_data_path = 'data/training/test_data.spacy'
    if os.path.exists(test_data_path):
        db_test = DocBin().from_disk(test_data_path)
        print(f"üìä Test dataset contains {len(list(db_test.get_docs(spacy.blank('en').vocab)))} documents")
        
        # Count total tokens across all documents
        total_tokens = 0
        for doc in db_test.get_docs(spacy.blank('en').vocab):
            total_tokens += len(doc)
        
        print(f"üî§ Total tokens in test dataset: {total_tokens}")
    else:
        print("‚ö†Ô∏è Test data file not found. Please run cell 15 first to create the training files.")
        
except Exception as e:
    print(f"‚ùå Error reading test data: {e}")
    print("üí° Make sure you have run cell 15 to create the training files.")

üìä Test dataset contains 1 documents
üî§ Total tokens in test dataset: 16


In [None]:
# Train the spaCy model using local configuration
import subprocess
import sys
import os

# Check if training data exists before attempting to train
train_data_path = 'data/training/train_data.spacy'
test_data_path = 'data/training/test_data.spacy'
config_path = 'data/training/config.cfg'

# Verify all required files exist
missing_files = []
for file_path, name in [(train_data_path, "training data"), (test_data_path, "test data"), (config_path, "config file")]:
    if not os.path.exists(file_path):
        missing_files.append(f"{name} ({file_path})")

if missing_files:
    print("‚ùå Missing required files:")
    for file in missing_files:
        print(f"   ‚Ä¢ {file}")
    print("\nüí° Please run the previous cells to create these files:")
    print("   ‚Ä¢ Cell 9: Create config file")
    print("   ‚Ä¢ Cell 15: Create training/test data files")
else:
    print("‚úÖ All required files found. Starting training...")
    
    # Create output directory
    os.makedirs('data/output', exist_ok=True)
    
    # Train the model with correct spaCy 3.x format
    training_command = [
        sys.executable, "-m", "spacy", "train",
        config_path,
        "--output", "data/output",
        "--code", "import os; os.chdir(os.getcwd())"  # Ensure correct working directory
    ]
    
    print("üöÄ Starting model training...")
    print("‚è±Ô∏è  This may take several minutes depending on your data size and hardware.")
    print("üìä Training with sample data (2 examples) - expect quick completion...")
    
    # Set environment variables for paths (spaCy 3.x way)
    env = os.environ.copy()
    env['SPACY_CONFIG_OVERRIDES'] = f"paths.train={train_data_path},paths.dev={test_data_path}"
    
    try:
        result = subprocess.run(training_command, capture_output=True, text=True, 
                              cwd=os.getcwd(), env=env)
        
        if result.returncode == 0:
            print("‚úÖ Model training completed successfully!")
            print("üìÅ Model saved to: data/output/model-best")
            
            # Check if model files were actually created
            model_path = 'data/output/model-best'
            if os.path.exists(model_path):
                print("‚úÖ Model files verified in output directory")
            else:
                print("‚ö†Ô∏è Training reported success but model files not found")
                
        else:
            print(f"‚ùå Training failed with return code: {result.returncode}")
            print("\nüìù Error details:")
            if result.stdout:
                print("STDOUT:")
                print(result.stdout)
            if result.stderr:
                print("STDERR:")
                print(result.stderr)
                
            # Try alternative approach if this fails
            print("\nüí° Trying alternative approach with simpler config...")
            
            # Create a simpler config that references the files directly
            simple_config = f"""
[system]
gpu_allocator = null

[nlp]
lang = "en"
pipeline = ["ner"]
batch_size = 1000

[components]

[components.ner]
factory = "ner"

[corpora]

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = "{test_data_path.replace(chr(92), '/')}"
max_length = 0

[corpora.train]
@readers = "spacy.Corpus.v1"
path = "{train_data_path.replace(chr(92), '/')}"
max_length = 0

[training]
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
max_steps = 1000
eval_frequency = 100

[training.optimizer]
@optimizers = "Adam.v1"

[training.batcher]
@batchers = "spacy.batch_by_words.v1"

[initialize]
"""
            
            # Save the simpler config
            simple_config_path = 'data/training/simple_config.cfg'
            with open(simple_config_path, 'w') as f:
                f.write(simple_config.strip())
            
            # Try with simpler config
            simple_command = [
                sys.executable, "-m", "spacy", "train",
                simple_config_path,
                "--output", "data/output"
            ]
            
            result2 = subprocess.run(simple_command, capture_output=True, text=True, cwd=os.getcwd())
            
            if result2.returncode == 0:
                print("‚úÖ Training completed with simplified config!")
                print("? Model saved to: data/output/model-best")
            else:
                print(f"‚ùå Simple training also failed: {result2.returncode}")
                print("STDOUT:", result2.stdout)
                print("STDERR:", result2.stderr)
                
    except Exception as e:
        print(f"‚ùå Error during training: {e}")
        print("üí° Make sure you have:")
        print("   1. Activated your virtual environment")
        print("   2. Installed spaCy: pip install spacy")
        print("   3. Run previous cells to create training data")

# Alternative: Skip Custom Training

If you're having issues with custom model training, you can skip it and use the **Modern Resume Parser** we created, which works excellently without requiring custom training.

In [1]:
# Quick Alternative: Use spaCy's pre-trained model directly
# This bypasses training issues and works immediately

import spacy
import os

try:
    # Load the standard spaCy model (no custom training needed)
    nlp = spacy.load('en_core_web_sm')
    print("‚úÖ Standard spaCy model loaded successfully!")
    print("üí° This model can identify PERSON, ORG, GPE, and other entities")
    
    # Test with a sample resume sentence
    test_text = "John Doe is a Software Engineer at Microsoft with 5 years of experience in Python development."
    doc = nlp(test_text)
    
    print(f"\nüß™ Testing with: '{test_text}'")
    print("\nüìã Detected entities:")
    for ent in doc.ents:
        print(f"   ‚Ä¢ {ent.text} ‚Üí {ent.label_} ({spacy.explain(ent.label_)})")
    
    if len(doc.ents) > 0:
        print("\n‚úÖ Entity recognition working! You can proceed to the PDF processing cells.")
    else:
        print("\n‚ö†Ô∏è No entities detected, but the model is loaded and ready.")
        
except Exception as e:
    print(f"‚ùå Error loading spaCy model: {e}")
    print("üí° Make sure you've run cell 3 to download the English model")
    print("   Or run: python -m spacy download en_core_web_sm")



‚úÖ Standard spaCy model loaded successfully!
üí° This model can identify PERSON, ORG, GPE, and other entities

üß™ Testing with: 'John Doe is a Software Engineer at Microsoft with 5 years of experience in Python development.'

üìã Detected entities:
   ‚Ä¢ John Doe ‚Üí PERSON (People, including fictional)
   ‚Ä¢ Microsoft ‚Üí ORG (Companies, agencies, institutions, etc.)
   ‚Ä¢ 5 years ‚Üí DATE (Absolute or relative dates or periods)

‚úÖ Entity recognition working! You can proceed to the PDF processing cells.


In [None]:
#Model Test

In [2]:
# Load the trained model or use the existing modern parser
import os
import spacy

# Try to load the custom trained model first
model_path = 'data/output/model-best'

if os.path.exists(model_path):
    print("üì¶ Loading custom trained model...")
    nlp = spacy.load(model_path)
    print("‚úÖ Custom model loaded successfully!")
else:
    print("‚ö†Ô∏è Custom model not found. Using standard spaCy model...")
    print("üí° You can train a custom model by running the training cells above.")
    nlp = spacy.load('en_core_web_sm')
    print("‚úÖ Standard spaCy model loaded!")

‚ö†Ô∏è Custom model not found. Using standard spaCy model...
üí° You can train a custom model by running the training cells above.
‚úÖ Standard spaCy model loaded!
‚úÖ Standard spaCy model loaded!


In [3]:
doc = nlp('my name is santhosh. I worked at Microsoft. I have 10 years of experience')
for ent in doc.ents:
  print(ent.text, " ->>>>>>", ent.label_)

Microsoft  ->>>>>> ORG
10 years  ->>>>>> DATE


In [4]:
# Install PDF processing library for local environment
import subprocess
import sys

try:
    import pdfplumber
    print("‚úÖ pdfplumber already installed")
except ImportError:
    print("üì• Installing pdfplumber...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pdfplumber"])
    print("‚úÖ pdfplumber installed successfully")

‚úÖ pdfplumber already installed


In [5]:
# Import PDF processing library (using pdfplumber instead of PyMuPDF)
import pdfplumber
import os

In [6]:
# Load PDF file (update path to your local resume file)
resume_path = "C:/Users/10723269/Downloads/AniruddhaLaha_resume.pdf"  # Update this path

if os.path.exists(resume_path):
    print(f"üìÑ Loading PDF: {resume_path}")
    # We'll process this in the next cell
else:
    print(f"‚ùå File not found: {resume_path}")
    print("üí° Please update the resume_path variable with a valid PDF file path")
    print("üìÅ Example: resume_path = 'C:/path/to/your/resume.pdf'")
    
    # List some common locations where resumes might be
    common_paths = [
        "C:/Users/10723269/Downloads/",
        "C:/Users/10723269/Documents/",
        "C:/Users/10723269/Desktop/"
    ]
    
    print("\nüìÇ Checking common locations for PDF files:")
    for path in common_paths:
        if os.path.exists(path):
            pdf_files = [f for f in os.listdir(path) if f.lower().endswith('.pdf')]
            if pdf_files:
                print(f"   {path}: {pdf_files[:3]}...")  # Show first 3 PDF files

üìÑ Loading PDF: C:/Users/10723269/Downloads/AniruddhaLaha_resume.pdf


In [None]:
#doc = [page.getText() for page in doc]

In [7]:
# Extract text from PDF using pdfplumber
if os.path.exists(resume_path):
    try:
        with pdfplumber.open(resume_path) as pdf:
            text = ""
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        
        print("‚úÖ Text extracted successfully!")
        print(f"üìä Text length: {len(text)} characters")
        print(f"üìÑ Number of pages: {len(pdf.pages)}")
        
    except Exception as e:
        print(f"‚ùå Error extracting text: {e}")
        text = ""
else:
    print("‚ö†Ô∏è Please update the resume_path in the previous cell")
    text = "Sample resume text for testing: John Doe is a Software Engineer at Microsoft with 5 years of experience in Python development."

‚úÖ Text extracted successfully!
üìä Text length: 4274 characters
üìÑ Number of pages: 2


In [8]:
text = text.strip()

In [9]:
text = ' '.join(text.split())

In [10]:
text

'EXPERIENCE AZURE MIGRATION CONSULTANT, IBM INDIA MARCH 2021 - PRESENT ‚Ä¢ Demonstrated expertise in architecture and maintaining scripted CI/CD pipelines. Implemented fully automated CI/CD for multiple infrastructure that accelerated ANIRUDDHA LAHA deployment time by 75%. AZURE CLOUD & DEVOPS ARCHITECT ‚Ä¢ Developed automation scripts and leveraged IaC tools to streamline infrastructure provisioning, deployment workflows, automated database configurations, and ensure consistent, compliant configurations. PROFILE ‚Ä¢ Architected and deployed secure Azure Landing Zones aligned Azure Cloud Architect and DevOps with Microsoft CAF, ensuring compliance, governance, and consultant with 9.8 years of operational excellence for enterprise clients that ensured 35% experience driving automation, improvement in end-user experiences. CI/CD, cloud infrastructure, and ‚Ä¢ Designed cloud strategy roadmaps, conducted cloud readiness reliability engineering across assessments, and advised clients on hyb

In [11]:
doc = nlp(text)
for ent in doc.ents:
  print(ent.text, " ->>>>>> ", ent.label_)

IBM  ->>>>>>  ORG
2021  ->>>>>>  DATE
CI  ->>>>>>  ORG
CI  ->>>>>>  ORG
75%  ->>>>>>  PERCENT
IaC  ->>>>>>  ORG
PROFILE ‚Ä¢  ->>>>>>  ORG
Azure Landing Zones  ->>>>>>  ORG
Azure Cloud Architect  ->>>>>>  WORK_OF_ART
DevOps  ->>>>>>  ORG
Microsoft CAF  ->>>>>>  ORG
9.8 years  ->>>>>>  DATE
35%  ->>>>>>  PERCENT
CI  ->>>>>>  ORG
‚Ä¢ Designed  ->>>>>>  ORG
20%  ->>>>>>  PERCENT
‚Ä¢ Worked  ->>>>>>  PERSON
IaC  ->>>>>>  ORG
Terraform  ->>>>>>  ORG
CICD  ->>>>>>  ORG
CICD  ->>>>>>  ORG
AGIC  ->>>>>>  ORG
DevOps  ->>>>>>  ORG
Linkedin  ->>>>>>  NORP
Terraform  ->>>>>>  ORG
CI  ->>>>>>  ORG
Azure Migrate  ->>>>>>  ORG
8274867428  ->>>>>>  CARDINAL
Kolkata  ->>>>>>  GPE
West Bengal  ->>>>>>  GPE
India ‚Ä¢ Hands  ->>>>>>  ORG
AI Foundry  ->>>>>>  ORG
CERTIFICATIONS  ->>>>>>  ORG
AI  ->>>>>>  GPE
Architect(AZ-305  ->>>>>>  PERSON
‚Ä¢ Developed  ->>>>>>  ORG
Azure Administrator  ->>>>>>  ORG
104  ->>>>>>  CARDINAL
Oracle Cloud Infra  ->>>>>>  ORG
RedHat  ->>>>>>  ORG
EX200  ->>>>>>  CARDINAL
RACK

In [12]:
# Alternative: Use the Modern Resume Parser we created
# This provides structured extraction similar to the trained model

try:
    from modern_resume_parser import ModernResumeParser
    
    if os.path.exists(resume_path):
        print("üöÄ Using Modern Resume Parser for structured extraction...")
        parser = ModernResumeParser(resume_path)
        extracted_data = parser.get_extracted_data()
        
        print("\nüìã Structured Extraction Results:")
        print("=" * 50)
        for key, value in extracted_data.items():
            if isinstance(value, list) and len(value) > 5:
                print(f"{key}: {value[:5]}... (showing first 5)")
            else:
                print(f"{key}: {value}")
    else:
        print("‚ö†Ô∏è Please set a valid resume_path to use the Modern Resume Parser")
        
except ImportError:
    print("üí° Modern Resume Parser not found. Make sure modern_resume_parser.py is in the same directory.")
    print("üìù You can copy it from the main project directory.")

üöÄ Using Modern Resume Parser for structured extraction...

üìã Structured Extraction Results:
name: ANIRUDDHA LAHA
email: anilaha2502@gmail.com
mobile_number: 9038503946
skills: ['Python', 'Go', 'Azure', 'Docker', 'Kubernetes']... (showing first 5)
college_name: Brainware Group of Institution
degree: Bachelor of Technology
designation: None
company_names: ['DevOps', 'Shell', 'RedHat', 'Azure Landing Zones', 'Oracle\n‚Ä¢ Container Orchestration:']... (showing first 5)
no_of_pages: 2
total_experience: None

üìã Structured Extraction Results:
name: ANIRUDDHA LAHA
email: anilaha2502@gmail.com
mobile_number: 9038503946
skills: ['Python', 'Go', 'Azure', 'Docker', 'Kubernetes']... (showing first 5)
college_name: Brainware Group of Institution
degree: Bachelor of Technology
designation: None
company_names: ['DevOps', 'Shell', 'RedHat', 'Azure Landing Zones', 'Oracle\n‚Ä¢ Container Orchestration:']... (showing first 5)
no_of_pages: 2
total_experience: None
