In [12]:
import os
import requests
import pandas as pd
import numpy as np
from typing import List, Dict, Any
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy
import json

class AILinkedInRecruitmentTool:
    def __init__(self, job_requirements: Dict[str, Any]):
        """
        Initialize the recruitment AI tool with job requirements

        :param job_requirements: Dictionary containing job specification details
        """
        self.job_requirements = job_requirements

        # Load spaCy for advanced NLP processing
        try:
            self.nlp = spacy.load('en_core_web_sm')
        except OSError:
            print("Downloading spaCy language model...")
            os.system('python -m spacy download en_core_web_sm')
            self.nlp = spacy.load('en_core_web_sm')

        # Candidates database (simulated - would be replaced with actual LinkedIn data)
        self.candidates_db = []

    def preprocess_linkedin_profile(self, profile: Dict[str, Any]) -> Dict[str, Any]:
        """
        Preprocess and clean LinkedIn profile data

        :param profile: Raw LinkedIn profile dictionary
        :return: Cleaned and structured profile
        """
        processed_profile = {
            'name': profile.get('name', ''),
            'headline': profile.get('headline', ''),
            'experience': self._extract_experience(profile.get('experience', [])),
            'skills': self._extract_skills(profile.get('skills', [])),
            'education': self._extract_education(profile.get('education', [])),
            'summary': profile.get('summary', '')
        }
        return processed_profile

    def _extract_experience(self, experiences: List[Dict]) -> List[str]:
        """
        Extract and process work experience

        :param experiences: List of experience dictionaries
        :return: List of processed experience descriptions
        """
        processed_experiences = []
        for exp in experiences:
            desc = f"{exp.get('title', '')} at {exp.get('company', '')} - {exp.get('description', '')}"
            processed_experiences.append(desc)
        return processed_experiences

    def _extract_skills(self, skills: List[Dict]) -> List[str]:
        """
        Extract skills from profile

        :param skills: List of skill dictionaries
        :return: List of skill names
        """
        return [skill.get('name', '').lower() for skill in skills if skill.get('name')]

    def _extract_education(self, education: List[Dict]) -> List[str]:
        """
        Extract education details

        :param education: List of education dictionaries
        :return: List of education descriptions
        """
        return [f"{edu.get('degree', '')} from {edu.get('school', '')}" for edu in education]

    def calculate_skill_match(self, candidate_skills: List[Dict], required_skills: List[str]) -> float:
        """
        Calculate skill match percentage

        :param candidate_skills: List of candidate skills
        :param required_skills: List of required job skills
        :return: Skill match percentage
        """
        candidate_skill_names = [skill.get('name', '').lower() for skill in candidate_skills if skill.get('name')]
        candidate_skills_set = set(candidate_skill_names)
        required_skills_set = set(required_skills)

        matching_skills = candidate_skills_set.intersection(required_skills_set)
        match_percentage = (len(matching_skills) / len(required_skills_set)) * 100 if required_skills_set else 0

        return match_percentage

    def semantic_similarity(self, text1: str, text2: str) -> float:
        """
        Calculate semantic similarity between two texts using spaCy

        :param text1: First text
        :param text2: Second text
        :return: Semantic similarity score
        """
        doc1 = self.nlp(text1)
        doc2 = self.nlp(text2)
        return doc1.similarity(doc2)

    def ai_candidate_ranking(self, candidates: List[Dict], job_description: str) -> List[Dict]:
        """
        Rank candidates using AI techniques

        :param candidates: List of candidate profiles
        :param job_description: Job description text
        :return: Ranked list of candidates
        """
        ranked_candidates = []

        for candidate in candidates:
            # Skill match calculation
            skill_match_score = self.calculate_skill_match(
                candidate.get('skills', []),
                self.job_requirements.get('required_skills', [])
            )

            # Semantic similarity of profile to job description
            # The experience key contains a list of dictionaries,
            # so we need to extract the descriptions first
            experiences = candidate.get('experience', [])
            experience_descriptions = [exp for exp in experiences] # Extract the descriptions into a list

            # Now join the descriptions
            semantic_match_score = self.semantic_similarity(
                ' '.join(map(str, experience_descriptions)) + candidate.get('summary', ''), # Convert to string using map(str, )
                job_description
            )

            # Combined scoring mechanism
            total_score = (skill_match_score * 0.6) + (semantic_match_score * 40)

            candidate_result = {
                'name': candidate.get('name'),
                'skill_match': skill_match_score,
                'semantic_match': semantic_match_score,
                'total_score': total_score
            }

            ranked_candidates.append(candidate_result)

        # Sort candidates by total score in descending order
        ranked_candidates.sort(key=lambda x: x['total_score'], reverse=True)

        return ranked_candidates

    def generate_candidate_report(self, top_candidates: List[Dict]) -> str:
        """
        Generate a detailed report of top candidates

        :param top_candidates: List of top-ranked candidates
        :return: Formatted report string
        """
        report = "AI Recruitment Candidate Report\n"
        report += "=" * 50 + "\n\n"

        for i, candidate in enumerate(top_candidates[:5], 1):
        # Indented block for the loop
            report += f"Candidate {i}:\n"
            report += f"Name: {candidate['name']}\n"
            report += f"Skill Match: {candidate['skill_match']:.2f}%\n" # Corrected line
            report += f"Semantic Match: {candidate['semantic_match']:.2f}\n"
            report += f"Total Score: {candidate['total_score']:.2f}\n\n"

        return report

In [13]:
# Example job requirements
job_requirements = {
    'required_skills': ['python', 'machine learning', 'data analysis'],
    'job_description': "We are looking for a skilled Python developer with experience in Machine Learning and Data Analysis."
}

# Example candidate profile data
candidates = [
    {
        'name': 'John Doe',
        'headline': 'Data Scientist at XYZ Corp',
        'experience': [
            {'title': 'Data Scientist', 'company': 'XYZ Corp', 'description': 'Worked on machine learning algorithms'}
        ],
        'skills': [{'name': 'python'}, {'name': 'machine learning'}, {'name': 'data analysis'}],
        'education': [{'degree': 'Master\'s', 'school': 'University of ABC'}],
        'summary': "Experienced data scientist with a background in machine learning and data analysis."
    },
    {
        'name': 'Jane Smith',
        'headline': 'Software Engineer at ABC Tech',
        'experience': [
            {'title': 'Software Engineer', 'company': 'ABC Tech', 'description': 'Worked on Python-based applications'}
        ],
        'skills': [{'name': 'python'}, {'name': 'software development'}],
        'education': [{'degree': 'Bachelor\'s', 'school': 'University of XYZ'}],
        'summary': "Software engineer specializing in Python programming."
    }
]

# Initialize the tool
tool = AILinkedInRecruitmentTool(job_requirements)

# Rank candidates
ranked_candidates = tool.ai_candidate_ranking(candidates, job_requirements['job_description'])

# Generate the report
report = tool.generate_candidate_report(ranked_candidates)
print(report)




AI Recruitment Candidate Report

Candidate 1:
Name: John Doe
Skill Match: 100.00%
Semantic Match: 0.39
Total Score: 75.58

Candidate 2:
Name: Jane Smith
Skill Match: 33.33%
Semantic Match: 0.34
Total Score: 33.65




  return doc1.similarity(doc2)
