<a href="https://colab.research.google.com/github/akash1629/AI-job-application-bot/blob/main/AI_BOT_COLAB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required Python packages
!pip install flask flask_sqlalchemy pdfminer.six spacy selenium apscheduler flask-ngrok

# Download the spaCy English model
!python -m spacy download en_core_web_sm

# Install Chromium and its driver
!apt-get update
!apt-get install -y chromium-chromedriver

# Ensure the chromedriver is in the PATH
import sys
sys.path.insert(0, '/usr/lib/chromium-browser/chromedriver')


Collecting flask_sqlalchemy
  Downloading flask_sqlalchemy-3.1.1-py3-none-any.whl.metadata (3.4 kB)
Collecting pdfminer.six
  Downloading pdfminer.six-20240706-py3-none-any.whl.metadata (4.1 kB)
Collecting selenium
  Downloading selenium-4.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting apscheduler
  Downloading APScheduler-3.11.0-py3-none-any.whl.metadata (6.4 kB)
Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.28.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downlo

In [None]:
# ================== SETUP & INSTALLATION ==================
# Install required packages and spaCy model
!pip install flask flask_sqlalchemy pdfminer.six spacy selenium apscheduler flask-ngrok
!python -m spacy download en_core_web_sm

# Install Chromium and ChromeDriver
!apt-get update
!apt-get install -y chromium-chromedriver
import sys
sys.path.insert(0, '/usr/lib/chromium-browser/chromedriver')

# Download and unzip ngrok
!wget -q -O ngrok.zip https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip -o ngrok.zip

# (Optional) Set your ngrok authtoken if you have one.
# Replace YOUR_NGROK_AUTHTOKEN with your token.
# !./ngrok authtoken YOUR_NGROK_AUTHTOKEN

# ================== START NGROK TUNNEL ==================
# Start ngrok on port 5000 in the background
get_ipython().system_raw('./ngrok http 5000 &')
import time
time.sleep(3)  # Wait a few seconds for ngrok to initialize

# Retrieve and print the public ngrok URL
import json, urllib.request
# Instead of hardcoding, fetch the ngrok API URL dynamically
ngrok_api_url = "http://localhost:4040/api/tunnels"  # ngrok's local API address
with urllib.request.urlopen(ngrok_api_url) as response:
    data = json.load(response)
    # Extract the public URL from the ngrok API response
    public_url = data['tunnels'][0]['public_url']
    print(" * ngrok tunnel URL:", public_url)

# ================== FLASK APPLICATION CODE ==================
import os
import time
import uuid
from datetime import datetime

from flask import Flask, request, jsonify
from flask_sqlalchemy import SQLAlchemy
from werkzeug.utils import secure_filename

# Parsing libraries
import spacy
from pdfminer.high_level import extract_text

# Selenium for automation and APScheduler for scheduling tasks
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from apscheduler.schedulers.background import BackgroundScheduler

# Note: We are not using flask-ngrok here because we already started ngrok manually.

# ----------- FLASK SETUP -----------
app = Flask(__name__)

# SQLite is used here for demonstration.
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///job_bot.db'
app.config['SECRET_KEY'] = 'some_random_secret_key'
app.config['UPLOAD_FOLDER'] = 'uploads'

db = SQLAlchemy(app)

# Initialize spaCy model
nlp = spacy.load('en_core_web_sm')

# Create the uploads directory if it doesn't exist.
if not os.path.exists(app.config['UPLOAD_FOLDER']):
    os.makedirs(app.config['UPLOAD_FOLDER'])

# ----------- DATABASE MODELS -----------
class User(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    email = db.Column(db.String(120), unique=True, nullable=False)
    password = db.Column(db.String(120), nullable=False)
    location = db.Column(db.String(100), nullable=True)
    experience = db.Column(db.Integer, nullable=True)
    resume_path = db.Column(db.String(255), nullable=True)

class Job(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    title = db.Column(db.String(200), nullable=False)
    company = db.Column(db.String(200), nullable=False)
    location = db.Column(db.String(100), nullable=True)
    apply_link = db.Column(db.String(500), nullable=True)

class Application(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
    job_id = db.Column(db.Integer, db.ForeignKey('job.id'), nullable=False)
    status = db.Column(db.String(50), default='Pending')
    applied_at = db.Column(db.DateTime, default=None, nullable=True)

db.create_all()

# ----------- SCHEDULER SETUP -----------
scheduler = BackgroundScheduler()
scheduler.start()

# ----------- HELPER FUNCTIONS -----------
def parse_resume(file_path):
    """
    Parse text from the uploaded resume using PDFMiner and spaCy
    to extract potential name, email, phone, etc.
    """
    text = extract_text(file_path)
    doc = nlp(text)
    parsed_data = {'name': '', 'email': '', 'phone': '', 'skills': []}

    for ent in doc.ents:
        if ent.label_ == 'PERSON' and not parsed_data['name']:
            parsed_data['name'] = ent.text
        elif ent.label_ == 'EMAIL':
            parsed_data['email'] = ent.text
        # Optionally add regex for phone numbers here

    # Simple skill detection
    tokens = [token.text.lower() for token in doc if not token.is_stop]
    common_skills = ["python", "sql", "tableau", "power bi", "machine learning", "excel"]
    matched_skills = [skill for skill in common_skills if skill in " ".join(tokens)]
    parsed_data['skills'] = matched_skills
    return parsed_data

def scrape_jobs(keyword, location):
    """
    Simulate a job search.
    Replace with actual API calls or Selenium-based scraping as needed.
    """
    dummy_jobs = [
        {
            'title': 'Data Scientist',
            'company': 'ABC Corp',
            'location': location,
            'apply_link': 'https://www.example.com/apply/123'
        },
        {
            'title': 'Supply Chain Analyst',
            'company': 'XYZ Inc',
            'location': location,
            'apply_link': 'https://www.example.com/apply/456'
        }
    ]
    return dummy_jobs

def apply_to_job(apply_link, resume_path):
    """
    Example automation to fill out an application form.
    Note: Actual implementations will vary depending on the site.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(options=chrome_options)
    try:
        driver.get(apply_link)
        time.sleep(2)
        # Simulate application process here.
        print(f"Simulating applying to {apply_link} with resume {resume_path}...")
    except Exception as e:
        print(f"Error applying to {apply_link}: {str(e)}")
    finally:
        driver.quit()

def schedule_job_application(user_id, job_id, run_time):
    """
    Schedule a job application to be executed at a future time.
    """
    job_name = f"apply_job_{user_id}_{job_id}"
    scheduler.add_job(
        func=execute_application,
        trigger='date',
        run_date=run_time,
        args=[user_id, job_id],
        id=job_name,
        replace_existing=True
    )
    print(f"Scheduled application job: {job_name} at {run_time}")

def execute_application(user_id, job_id):
    """
    Function executed by the scheduler to perform the job application.
    """
    user = User.query.get(user_id)
    job = Job.query.get(job_id)
    if not user or not job:
        print(f"Cannot apply. User or Job not found (user_id={user_id}, job_id={job_id}).")
        return
    apply_to_job(job.apply_link, user.resume_path)
    application = Application.query.filter_by(user_id=user_id, job_id=job_id).first()
    if application:
        application.status = "Applied"
        application.applied_at = datetime.now()
        db.session.commit()
    else:
        new_app = Application(user_id=user_id, job_id=job_id, status='Applied', applied_at=datetime.now())
        db.session.add(new_app)
        db.session.commit()
    print(f"Applied for {job.title} on behalf of {user.email} at {datetime.now()}")

# ----------- ROUTES -----------
@app.route('/register', methods=['POST'])
def register():
    """
    Register a new user.
    In production, remember to hash passwords.
    """
    data = request.json
    email = data.get('email')
    password = data.get('password')
    location = data.get('location')
    experience = data.get('experience')
    if not email or not password:
        return jsonify({'error': 'Email and password required'}), 400
    if User.query.filter_by(email=email).first():
        return jsonify({'error': 'Email already exists'}), 400
    user = User(email=email, password=password, location=location, experience=experience)
    db.session.add(user)
    db.session.commit()
    return jsonify({'message': 'User registered successfully'}), 201

@app.route('/login', methods=['POST'])
def login():
    """
    Simple login endpoint.
    """
    data = request.json
    email = data.get('email')
    password = data.get('password')
    user = User.query.filter_by(email=email).first()
    if user and user.password == password:
        return jsonify({'message': 'Login successful', 'user_id': user.id}), 200
    else:
        return jsonify({'error': 'Invalid credentials'}), 401

@app.route('/upload_resume', methods=['POST'])
def upload_resume():
    """
    Upload and parse a resume, then store its file path for the user.
    """
    user_id = request.form.get('user_id')
    user = User.query.get(user_id)
    if not user:
        return jsonify({'error': 'Invalid user ID'}), 400
    file = request.files.get('resume')
    if not file:
        return jsonify({'error': 'No resume file uploaded'}), 400
    filename = secure_filename(file.filename)
    unique_filename = str(uuid.uuid4()) + "_" + filename
    file_path = os.path.join(app.config['UPLOAD_FOLDER'], unique_filename)
    file.save(file_path)
    parsed_data = parse_resume(file_path)
    user.resume_path = file_path
    db.session.commit()
    return jsonify({'message': 'Resume uploaded successfully', 'parsed_data': parsed_data}), 200

@app.route('/search_jobs', methods=['GET'])
def search_jobs_route():
    """
    Search for jobs based on a user's criteria.
    """
    user_id = request.args.get('user_id')
    keyword = request.args.get('keyword', '')
    user = User.query.get(user_id)
    if not user:
        return jsonify({'error': 'Invalid user ID'}), 400
    jobs = scrape_jobs(keyword, user.location)
    saved_jobs = []
    for j in jobs:
        existing_job = Job.query.filter_by(
            title=j['title'],
            company=j['company'],
            location=j['location'],
            apply_link=j['apply_link']
        ).first()
        if not existing_job:
            new_job = Job(
                title=j['title'],
                company=j['company'],
                location=j['location'],
                apply_link=j['apply_link']
            )
            db.session.add(new_job)
            db.session.commit()
            saved_jobs.append(new_job)
        else:
            saved_jobs.append(existing_job)
    response_jobs = [{
        'job_id': sj.id,
        'title': sj.title,
        'company': sj.company,
        'location': sj.location,
        'apply_link': sj.apply_link
    } for sj in saved_jobs]
    return jsonify({'jobs': response_jobs}), 200

@app.route('/apply_job', methods=['POST'])
def apply_job():
    """
    Immediately apply to a job or schedule an application for later.
    Expected JSON Body:
    {
      "user_id": 1,
      "job_id": 2,
      "schedule_time": "2025-01-20 10:00:00" (optional)
    }
    """
    data = request.json
    user_id = data.get('user_id')
    job_id = data.get('job_id')
    schedule_time_str = data.get('schedule_time')  # Optional scheduling
    user = User.query.get(user_id)
    job = Job.query.get(job_id)
    if not user or not job:
        return jsonify({'error': 'Invalid user or job ID'}), 400
    # Check for an existing application record
    application = Application.query.filter_by(user_id=user_id, job_id=job_id).first()
    if application:
        return jsonify({'error': 'Already applied or pending application'}), 400
    # Create a new pending application record
    new_app = Application(user_id=user_id, job_id=job_id, status='Pending')
    db.session.add(new_app)
    db.session.commit()
    if schedule_time_str:
        try:
            schedule_time = datetime.strptime(schedule_time_str, "%Y-%m-%d %H:%M:%S")
            schedule_job_application(user_id, job_id, schedule_time)
            return jsonify({'message': f'Application scheduled at {schedule_time_str}'}), 200
        except ValueError:
            return jsonify({'error': 'Invalid date/time format. Use YYYY-MM-DD HH:MM:SS'}), 400
    else:
        execute_application(user_id, job_id)
        return jsonify({'message': 'Application executed immediately'}), 200

@app.route('/application_status', methods=['GET'])
def application_status():
    """
    Get the status of a user's job applications.
    Example: /application_status?user_id=1
    """
    user_id = request.args.get('user_id')
    user = User.query.get(user_id)
    if not user:
        return jsonify({'error': 'Invalid user ID'}), 400
    apps = Application.query.filter_by(user_id=user.id).all()
    result = []
    for a in apps:
        job = Job.query.get(a.job_id)
        result.append({
            'application_id': a.id,
            'job_title': job.title if job else 'Unknown',
            'company': job.company if job else 'Unknown',
            'status': a.status,
            'applied_at': a.applied_at.strftime('%Y-%m-%d %H:%M:%S') if a.applied_at else None
        })
    return jsonify({'applications': result}), 200

# ----------- MAIN -----------
if __name__ == '__main__':
    # Run the Flask app; visit the ngrok public URL printed earlier to access your endpoints.
    app.run(debug=True)


Collecting en-core-web-sm==3.7.1
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backpor

URLError: <urlopen error [Errno 99] Cannot assign requested address>