<a href="https://colab.research.google.com/github/devloper2022/2022.devloper/blob/main/automate_hr_inbox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================
# STEP 0: Install Required Libraries
# ==========================
!pip install pdfplumber python-docx openpyxl pandas google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client


In [None]:
# ==========================
# STEP 1: Upload Gmail API Credentials (Optional, only for running Gmail fetch)
# ==========================
from google.colab import files
uploaded = files.upload()  # Upload your credentials.json if you want Gmail access


In [None]:
# ==========================
# STEP 2: Authenticate Gmail API (Optional)
# ==========================
import os, pickle
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
creds = None

# Load token if exists
if os.path.exists("/content/token.pkl"):
    with open("/content/token.pkl", "rb") as token:
        creds = pickle.load(token)

# OAuth login flow if no token
if not creds:
    flow = InstalledAppFlow.from_client_secrets_file("/content/credentials.json", SCOPES)
    creds = flow.run_local_server(port=0)
    with open("/content/token.pkl", "wb") as token:
        pickle.dump(creds, token)

service = build("gmail", "v1", credentials=creds)
print("✅ Gmail API connected")


In [None]:
# ==========================
# STEP 3: Fetch Emails and Download Attachments
# ==========================
import base64

SAVE_DIR = "/content/resumes"
os.makedirs(SAVE_DIR, exist_ok=True)

def fetch_emails_and_download(service, query="has:attachment"):
    results = service.users().messages().list(userId='me', q=query).execute()
    messages = results.get('messages', [])
    files = []

    for msg in messages:
        msg_id = msg['id']
        message = service.users().messages().get(userId='me', id=msg_id).execute()
        parts = message['payload'].get('parts', [])

        for part in parts:
            if part.get('filename') and part['filename'] != '':
                if 'attachmentId' in part['body']:
                    att_id = part['body']['attachmentId']
                    att = service.users().messages().attachments().get(
                        userId='me', messageId=msg_id, id=att_id
                    ).execute()

                    data = base64.urlsafe_b64decode(att['data'])
                    file_path = os.path.join(SAVE_DIR, part['filename'])
                    with open(file_path, 'wb') as f:
                        f.write(data)
                    files.append(file_path)
                    print(f"📩 Downloaded {file_path}")
    return files

# files = fetch_emails_and_download(service)  # Uncomment when Gmail credentials are provided


In [None]:
# ==========================
# STEP 4: Parse Resumes (PDF / DOCX)
# ==========================
import pdfplumber
import docx
import re

def extract_text(file_path):
    if file_path.endswith(".pdf"):
        text = ""
        with pdfplumber.open(file_path) as pdf:
            for page in pdf.pages:
                text += page.extract_text() + "\n"
        return text
    elif file_path.endswith(".docx"):
        doc = docx.Document(file_path)
        return "\n".join([para.text for para in doc.paragraphs])
    return ""

def parse_resume(file_path):
    text = extract_text(file_path)

    # Regex extraction
    email = re.findall(r'[\w\.-]+@[\w\.-]+', text)
    phone = re.findall(r'\+?\d[\d -]{8,12}\d', text)
    exp_match = re.findall(r'(\d+)\s+years', text, re.I)

    skills_found = set(re.findall(r'\b(Python|Java|C\+\+|AWS|SQL|ML)\b', text, re.I))

    return {
        "Name": text.split("\n")[0].strip() if text else "Unknown",
        "Email": email[0] if email else "",
        "Phone": phone[0] if phone else "",
        "Skills": ", ".join(skills_found),
        "Experience": int(exp_match[0]) if exp_match else 0,
        "Resume File": file_path
    }


In [None]:
# ==========================
# STEP 5: Candidate Rating
# ==========================
def rate_candidate(candidate):
    exp = candidate["Experience"]
    if exp <= 1:
        return "Beginner (1⭐)"
    elif exp <= 4:
        return "Intermediate (2⭐)"
    else:
        return "Experienced (3⭐)"


In [None]:
# ==========================
# STEP 6: Save to Excel
# ==========================
import pandas as pd

EXCEL_FILE = "/content/candidates.xlsx"

def save_to_excel(candidates):
    df = pd.DataFrame(candidates)
    if os.path.exists(EXCEL_FILE):
        old = pd.read_excel(EXCEL_FILE)
        df = pd.concat([old, df], ignore_index=True)
    df.to_excel(EXCEL_FILE, index=False)
    print(f"✅ Saved {len(candidates)} candidates to {EXCEL_FILE}")


In [None]:
# ==========================
# STEP 7: Main Execution
# ==========================
# For GitHub submission, you can use some sample resumes already in SAVE_DIR
import glob

sample_files = glob.glob("/content/resumes/*")  # Make sure you have some sample PDFs/DOCs here

candidates = []
for file in sample_files:
    data = parse_resume(file)
    data["Rating"] = rate_candidate(data)
    candidates.append(data)

save_to_excel(candidates)

# Optional: download Excel
from google.colab import files
files.download(EXCEL_FILE)
