In [1]:
import requests
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
role_data = {
    "Software Development": [
        "Experienced in developing web applications using Python and Django.",
        "Proficient in Java and object-oriented programming concepts.",
        "Familiar with version control systems like Git.",
        "Skilled in front-end technologies such as HTML, CSS, and JavaScript.",
        "Strong problem-solving skills and ability to work in agile teams.",
        "Developed mobile applications using React Native framework.",
        "Worked on optimizing database queries for performance improvement.",
        "Knowledge of software design patterns and architectural principles.",
        "Contributed to open-source projects and participated in code reviews.",
        "Experience with cloud platforms like AWS and Azure.",
        "Designed and implemented RESTful APIs for microservices architecture."
    ],
    "Data Science": [
        "Expertise in data preprocessing and cleaning using pandas and NumPy.",
        "Experience in building machine learning models for classification.",
        "Proficient in data visualization using libraries like Matplotlib and Seaborn.",
        "Familiarity with natural language processing techniques.",
        "Strong foundation in statistics and hypothesis testing.",
        "Used scikit-learn to perform feature selection and model evaluation.",
        "Participated in Kaggle competitions to apply machine learning concepts.",
        "Implemented collaborative filtering for a recommendation system.",
        "Utilized TensorFlow for building and training neural networks."
    ],
    "Machine Learning Engineer": [
        "Built and deployed machine learning models for predictive analytics.",
        "Strong understanding of neural networks and deep learning algorithms.",
        "Experience with frameworks like TensorFlow and PyTorch.",
        "Developed algorithms for natural language processing tasks.",
        "Deployed models using Docker containers and Kubernetes."
    ],
    "Data Analyst": [
        "Performed exploratory data analysis on large datasets.",
        "Generated insights from data visualizations and statistical analyses.",
        "Created interactive dashboards using tools like Tableau.",
        "Experience in data cleaning, transformation, and validation.",
        "Strong SQL skills for data querying and analysis."
    ],
    "Front-End Developer": [
        "Developed responsive and user-friendly web interfaces.",
        "Proficient in HTML, CSS, and JavaScript frameworks like React.",
        "Designed and implemented interactive features.",
        "Collaborated closely with UX designers and backend developers."
    ],
    "Backend Developer": [
        "Designed and implemented server-side logic and APIs.",
        "Experience with database management and optimization.",
        "Strong programming skills in languages like Java and Python.",
        "Worked on authentication and security aspects of applications."
    ],
    "Full Stack Developer": [
        "Proficient in both frontend and backend development.",
        "Built end-to-end web applications from design to deployment."
    ],
    "RPA Developer": [
        "Developed and maintained robotic process automation workflows.",
        "Automated repetitive tasks using RPA tools like UiPath.",
        "Experience in creating bots for data entry and validation."
    ]
}

In [3]:
from sklearn.model_selection import train_test_split

# Flatten the role_data dictionary into a list of (role, sentence) tuples
flattened_data = [(role, sentence) for role, sentences in role_data.items() for sentence in sentences]

# Separate data into sentences and topics
sentences = [row[1] for row in flattened_data]
roles = [row[0] for row in flattened_data]

# Split the data into training and testing sets
train_sentences, test_sentences, train_roles, test_roles = train_test_split(
    sentences, roles, test_size=0.2, random_state=42
)

from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=1000)  # Adjust max_features as needed
X_train = vectorizer.fit_transform(train_sentences)
X_test = vectorizer.transform(test_sentences)

from sklearn.svm import SVC  # Support Vector Classifier

classifier = SVC(kernel='linear')
classifier.fit(X_train, train_roles)

from sklearn.metrics import accuracy_score, classification_report

predictions = classifier.predict(X_test)

accuracy = accuracy_score(test_roles, predictions)
print("Accuracy:", accuracy)

report = classification_report(test_roles, predictions)
print("Classification Report:\n", report)

Accuracy: 0.1111111111111111
Classification Report:
                            precision    recall  f1-score   support

        Backend Developer       0.00      0.00      0.00         3
             Data Analyst       0.00      0.00      0.00         1
             Data Science       0.33      1.00      0.50         1
     Full Stack Developer       0.00      0.00      0.00         1
Machine Learning Engineer       0.00      0.00      0.00         1
     Software Development       0.00      0.00      0.00         2

                 accuracy                           0.11         9
                macro avg       0.06      0.17      0.08         9
             weighted avg       0.04      0.11      0.06         9



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
resume_text = {'name': 'John Doe', 'skills': 'Strong SQL skills, Built machine learning models for customer segmentation.', 'experience': '3 years'}
skill = resume_text['skills']

In [5]:
l1 = skill.split(",")
l1

['Strong SQL skills',
 ' Built machine learning models for customer segmentation.']

In [6]:
new_X = vectorizer.transform(l1)
new_predictions = classifier.predict(new_X)
print(new_predictions)

['Data Analyst' 'Machine Learning Engineer']


In [None]:
for role in new_predictions:
  Role=role
  driver=webdriver.Chrome()
  URL=f'https://in.indeed.com/jobs?q={Role}'
  driver.get(URL)
  time.sleep(3)
  soup=BeautifulSoup(driver.page_source,"html.parser")
  driver.close()
  df=pd.DataFrame(columns=['Title','Company','Ratings','Salary','Location','Post_History'])
  results=soup.find(class_='mosaic mosaic-provider-jobcards mosaic-provider-hydrated')
  job_elems = results.find_all('div',class_='slider_container')
  for job_elem in job_elems:
    print(job_elem.prettify())
  for job_elem in job_elems:
    # Post Title
    Title = job_elem.find('div',class_='heading4 color-text-primary singleLineTitle tapItem-gutter')
    Title=Title.text

    # Company Name
    Comp = job_elem.find('div',class_='heading6 company_location tapItem-gutter')
    Company = Comp.find('span',class_='companyName').text

    #Ratings
    rating = job_elem.find('div',class_='heading6 company_location tapItem-gutter')
    rating_span = rating.find('span',class_='ratingNumber')
    if rating_span is None:
        Ratings = "NA"
    else:
        Ratings = rating_span.text

    # Salary offered for the job
    Sal_span = job_elem.find('div',class_='salary-snippet')
    if Sal_span is None:
        Salary = "NA"
    else:
        Salary = Sal_span.text

    # Location for the job post
    Loc = job_elem.find('div',class_='heading6 company_location tapItem-gutter')
    Loc_exp = Loc.find('div',class_='companyLocation')
    if Loc_exp is None:
        Location = "NA"
    else:
        Location = Loc_exp.text

    # Number of days since job posted
    Hist = job_elem.find('div','heading6 tapItem-gutter result-footer')
    Post_Hist = Hist.find('span',class_='date')
    if Post_Hist is None:
        Post_History = "NA"
    else:
        Post_History = Post_Hist.text

    if Title[0:3]=='new':
        Title=Title[3:]

    df=df.append({'Title':Title,'Company':Company,'Salary':Salary,'Location':Location,'Post_History':Post_History,'Ratings':Ratings},ignore_index = True)
    print(df)