In [None]:
!pip install python-dotenv playwright requests beautifulsoup4 langchain langgraph langchain-google-genai pdfminer.six

!playwright install

In [None]:


import asyncio
from playwright.async_api import async_playwright
import nest_asyncio
nest_asyncio.apply()w
import requests
from bs4 import BeautifulSoup
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda
from langchain_google_genai import ChatGoogleGenerativeAI
from typing import TypedDict, Optional


In [14]:
# ---------------------------
# STEP 0: Mount Drive & Load Credentials
# ---------------------------
from google.colab import drive
drive.mount('/content/drive')



from dotenv import load_dotenv
import os

ENV_PATH = "/content/drive/MyDrive/cred.env"  
load_dotenv(ENV_PATH)

LINKEDIN_EMAIL = os.getenv("LINKEDIN_EMAIL")
LINKEDIN_PASSWORD = os.getenv("LINKEDIN_PASSWORD")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

print("Email:", LINKEDIN_EMAIL)
print("API Key:", GOOGLE_API_KEY[:6] + "*****")

# STEP 1: LinkedIn Scraping using Playwright

async def scrape_linkedin(profile_url):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()

        # Login
        await page.goto("https://www.linkedin.com/login")
        await page.fill('input#username', LINKEDIN_EMAIL)
        await page.fill('input#password', LINKEDIN_PASSWORD)
        await page.click('button[type="submit"]')
        await page.wait_for_url("https://www.linkedin.com/feed/", timeout=15000)

        # Visit profile page
        await page.goto(profile_url)
        await page.wait_for_timeout(5000)

        # Extract headline
        try:
            headline = await page.locator('div.inline-show-more-text--is-collapsed.full-width').first.inner_text()
            headline = headline.split('\n')[0].strip()
        except:
            headline = "Headline not found"

        # Extract skills
        skills_url = profile_url + "/details/skills/"
        await page.goto(skills_url)
        await page.wait_for_timeout(5000)

        skills = []
        try:
            skill_elements = await page.locator('span[aria-hidden="true"]').all()
            for skill_el in skill_elements:
                s = await skill_el.inner_text()
                if s.strip():
                    skills.append(s.strip())
            if "More profiles for you" in skills:
              index = skills.index("More profiles for you")
              skills = skills[:index]
        except:
            skills = []
        skills = list(set(skills))

        # Extract education
        education_url = profile_url + "/details/education/"
        await page.goto(education_url)
        await page.wait_for_timeout(5000)

        education = []
        try:
            edu_elements = await page.locator('span[aria-hidden="true"]').all()
            for edu_el in edu_elements:
                e = await edu_el.inner_text()
                if e.strip():
                    education.append(e.strip())
            if "More profiles for you" in education:
              index = education.index("More profiles for you")
              education = education[:index]

        except:
            education = []

        await browser.close()

        return {
            "headline": headline,
            "skills": skills,
            "education": education
        }

# Run the async scraper in Colab
def run_scraper(url):
    return asyncio.run(scrape_linkedin(url))

# STEP 2: Extract Resume Text
from pdfminer.high_level import extract_text

def extract_resume_text(resume_path):
    return extract_text(resume_path)

# STEP 3: Scrape GitHub Profile
def scrape_github(github_url):
    try:
        response = requests.get(github_url)
        soup = BeautifulSoup(response.text, "html.parser")
        text = soup.get_text(separator="\n")
        return text[:3000]  # Limit for LLM prompt
    except Exception as e:
        return f"Error fetching GitHub: {e}"

# STEP 4: LangChain & LangGraph Evaluation Setup
# Setup Gemini LLM client
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")

class EvalState(TypedDict):
    resume_path: Optional[str]
    resume_text: Optional[str]
    linkedin_url: Optional[str]
    linkedin_data: Optional[dict]
    github_url: Optional[str]
    github_text: Optional[str]
    job_title: Optional[str]
    job_description: Optional[str]
    resume_score: Optional[str]
    linkedin_score: Optional[str]
    github_score: Optional[str]
    final_score: Optional[str]

@tool
def resume_evaluator(resume_text: str) -> str:
    """Evaluates resume text and returns a score out of 100 with feedback."""
    prompt = f"Evaluate the following resume text and provide a score out of 100 with detailed feedback:\n\n{resume_text}"
    return llm.invoke(prompt).content

@tool
def linkedin_evaluator(linkedin_data: dict) -> str:
    """Evaluates LinkedIn profile data and returns a score out of 100 with detailed feedback."""
    prompt = (
        f"Evaluate the following LinkedIn profile details(skills,headline and education sections) and provide a score out of 100 with detailed feedback:\n\n"
        f"Headline: {linkedin_data.get('headline', '')}\n"
        f"Skills: {', '.join(linkedin_data.get('skills', []))}\n"
        f"Education: {', '.join(linkedin_data.get('education', []))}\n"
    )
    return llm.invoke(prompt).content

@tool
def github_evaluator(github_text: str) -> str:
    """Evaluates GitHub profile text and returns a score out of 100 with detailed feedback."""
    prompt = f"Evaluate the following GitHub profile summary and provide a score out of 100 with detailed feedback:\n\n{github_text}"
    return llm.invoke(prompt).content


def extract_resume(state: EvalState) -> EvalState:
    if state.get("resume_path"):
        state["resume_text"] = extract_resume_text(state["resume_path"])
    return state

def evaluate_resume(state: EvalState) -> EvalState:
    state["resume_score"] = resume_evaluator.invoke({"resume_text": state["resume_text"]})
    return state

def evaluate_linkedin(state: EvalState) -> EvalState:
    # Use the linkedin_data dictionary collected by scraper
    state["linkedin_score"] = linkedin_evaluator.invoke({"linkedin_data": state["linkedin_data"]})
    return state

def evaluate_github(state: EvalState) -> EvalState:
    state["github_score"] = github_evaluator.invoke({"github_text": state["github_text"]})
    return state

def compile_feedback(state: EvalState) -> EvalState:
    prompt = (
        f"You are evaluating a candidate for the role of **{state['job_title']}**.\n"
        f"Job Description:\n{state['job_description']}\n\n"
        f"Resume Feedback:\n{state['resume_score']}\n\n"
        f"LinkedIn Feedback:\n{state['linkedin_score']}\n\n"
        f"GitHub Feedback:\n{state['github_score']}\n\n"
        f"Based on the above, give an overall score out of 100 and summarize strengths and areas to improve."
    )
    state["final_score"] = llm.invoke(prompt).content
    return state


builder = StateGraph(EvalState)
from langchain_core.runnables import RunnableLambda

builder.add_node("extract_resume", RunnableLambda(extract_resume))
builder.add_node("evaluate_resume", RunnableLambda(evaluate_resume))
builder.add_node("evaluate_linkedin", RunnableLambda(evaluate_linkedin))
builder.add_node("evaluate_github", RunnableLambda(evaluate_github))
builder.add_node("compile_feedback", RunnableLambda(compile_feedback))

builder.set_entry_point("extract_resume")
builder.add_edge("extract_resume", "evaluate_resume")
builder.add_edge("evaluate_resume", "evaluate_linkedin")
builder.add_edge("evaluate_linkedin", "evaluate_github")
builder.add_edge("evaluate_github", "compile_feedback")
builder.set_finish_point("compile_feedback")

graph = builder.compile()

# STEP 5: Run full evaluation example

if __name__ == "__main__":

    linkedin_profile_url = "https://www.linkedin.com/in/inti-harshini"
    resume_pdf_path = "/content/B2024150_IntiHarshini_CV (6).pdf"
    github_url = "https://github.com/intiharshini"

    print("Scraping LinkedIn profile...")
    linkedin_data = run_scraper(linkedin_profile_url)
    print("\n--- Extracted LinkedIn Data ---")
    print(f"Headline: {linkedin_data.get('headline', 'N/A')}")
    print(f"Skills: {', '.join(linkedin_data.get('skills', []))}")
    print(f"Education: {', '.join(linkedin_data.get('education', []))}\n")

    print("Scraping GitHub profile...")
    github_text = scrape_github(github_url)

    state = {
        "resume_path": resume_pdf_path,
        "linkedin_url": linkedin_profile_url,
        "linkedin_data": linkedin_data,
        "github_url": github_url,
        "github_text": github_text,
        "job_title": "Data Analyst Intern",
        "job_description": (
            "Looking for a data analyst intern skilled in Python, SQL, Excel, and data visualization tools like Power BI or Tableau. "
            "Understanding of statistics and ability to communicate findings effectively is essential. "
            "Must have experience with GitHub and collaborative tools."
        )
    }

    result = graph.invoke(state)

    print("\n\n====== Final Candidate Evaluation ======\n")
    print(result["final_score"])


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Email: intisatyapriya@gmail.com
API Key: AIzaSy*****
Scraping LinkedIn profile...

--- Extracted LinkedIn Data ---
Headline: Experienced Robotics Process Automation developer with a demonstrated history of working in the information technology and services industry. Skilled in UiPath, C#, Core Java, Robotic Process Automation (RPA), and . Strong engineering professional with a Bachelor of Technology - BTech focused in Computer Science from Pondicherry University.
Skills: Python (Programming Language), Machine Learning, 1, Data Visualization, .NET Framework, Automation Anywhere, Core Java, Robotic Process Automation (RPA), C (Programming Language), UiPath, C#, Teamwork, Data Analysis, Digital Marketing, 4, Marketing Tools: Digital Marketing Tools and Services, C++
Education: 1, 4, Goa Institute of Management (GIM), Postgraduate Degree, Big Data Analytics, Jun 