In [None]:
!pip install pandas google-colab scikit-learn PyPDF2 docx fake_useragent requests beautifulsoup4

In [None]:
# Install needed packages

import pandas as pd
from google.colab import drive
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

In [None]:
# Manual Profile Input - Edit These!
profile = {
    'skills': ['Python', 'TensorFlow', 'ML', 'ROS'],
    'job_titles': ['Data Scientist', 'ML Engineer', 'Robotics Analyst'],
    'location': 'Remote, Bangalore, Mumbai',
    'experience': 'Entry, 0-2 years'
}

# Example Stub for Job Fetching (to be elaborated for each portal)
def fetch_jobs_from_portal(title, location):
    search_url = f"https://www.indeed.com/jobs?q={title}&l={location}&fromage=1"
    headers = {'User-Agent': 'Mozilla/5.0'}
    jobs = []
    # Requests + BS4 scrape here, parse listing cards for title, company, location, desc, etc.
    # Append dictionaries to jobs
    return jobs

# Batch search by job titles & locations
all_jobs = []
for title in profile['job_titles']:
    for loc in profile['location'].split(','):
        jobs = fetch_jobs_from_portal(title.strip(), loc.strip())
        all_jobs.extend(jobs)

# Relevance Filtering & Ranking - NLP scoring
def score_jobs(jobs, profile_skills):
    # Vectorize job desc and input skills
    descs = [job['description'] for job in jobs]
    vectorizer = TfidfVectorizer().fit(descs + profile_skills)
    profile_vec = vectorizer.transform([' '.join(profile_skills)])
    scores = []
    for job in jobs:
        desc_vec = vectorizer.transform([job['description']])
        score = cosine_similarity(profile_vec, desc_vec)[0][0]
        job['match_score'] = round(score * 100, 2)
        scores.append(job)
    return sorted(scores, key=lambda x: -x['match_score'])

scored_jobs = score_jobs(all_jobs, profile['skills'])
jobs_df = pd.DataFrame(scored_jobs)

# Write to Google Drive
output_path = '/content/drive/MyDrive/JobMatches/job_matches.csv'
jobs_df.to_csv(output_path, index=False)

print(f"Jobs saved to {output_path}")
