In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("jobs_dataset_with_features.csv")

In [3]:
df.head(2)

Unnamed: 0,Role,Features
0,Social Media Manager,5 to 15 Years Digital Marketing Specialist M.T...
1,Frontend Web Developer,"2 to 12 Years Web Developer BCA HTML, CSS, Jav..."


In [4]:
df.columns

Index(['Role', 'Features'], dtype='object')

In [5]:
df['Role'].value_counts()

Role
Interaction Designer            20580
Network Administrator           17470
User Interface Designer         14036
Social Media Manager            13945
User Experience Designer        13935
                                ...  
Inventory Control Specialist     3342
Budget Analyst                   3335
Clinical Nurse Manager           3324
Social Science Researcher        3321
Paid Advertising Specialist      3306
Name: count, Length: 376, dtype: int64

In [7]:
# # Select relevant features
# selected_features = ['Experience','Job Title', 'Qualifications', 'skills', 'Responsibilities', 'Company Size',]

# # Merge selected features into one column
# df['Features'] = df[selected_features].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)

# # Drop the individual feature columns
# df.drop(columns=selected_features, inplace=True)
# # Drop all columns except 'Role' and 'Features'
# df.drop(columns=[col for col in df.columns if col not in ['Role', 'Features']], inplace=True)
# # Save the modified dataset
# df.to_csv('jobs_dataset_with_features.csv', index=False)

In [6]:
df = pd.read_csv("jobs_dataset_with_features.csv")

In [7]:
df.shape

(1615940, 2)

In [9]:
df.head()

Unnamed: 0,Role,Features
0,Social Media Manager,5 to 15 Years Digital Marketing Specialist M.T...
1,Frontend Web Developer,"2 to 12 Years Web Developer BCA HTML, CSS, Jav..."
2,Quality Control Manager,0 to 12 Years Operations Manager PhD Quality c...
3,Wireless Network Engineer,4 to 11 Years Network Engineer PhD Wireless ne...
4,Conference Manager,1 to 12 Years Event Manager MBA Event planning...


In [8]:
# Dropping classes with less than 6500 instances
min_count = 6500
role_counts = df['Role'].value_counts()
dropped_classes = role_counts[role_counts < min_count].index
filtered_df = df[~df['Role'].isin(dropped_classes)].reset_index(drop=True)

# Checking the updated role counts
filtered_df['Role'].value_counts()

Role
Interaction Designer          20580
Network Administrator         17470
User Interface Designer       14036
Social Media Manager          13945
User Experience Designer      13935
                              ...  
Benefits Coordinator           6839
Research Analyst               6830
Administrative Coordinator     6803
IT Support Specialist          6799
UI/UX Designer                 6743
Name: count, Length: 61, dtype: int64

In [9]:
len(filtered_df['Role'].value_counts())

61

In [10]:
df = filtered_df.sample(n=10000)

In [11]:
df.head()

Unnamed: 0,Role,Features
226052,Supply Chain Manager,0 to 14 Years Operations Manager M.Com Supply ...
138631,Portfolio Manager,0 to 13 Years Investment Analyst MCA Investmen...
96547,Inventory Manager,5 to 12 Years Purchasing Agent MBA Inventory c...
511110,Demand Planner,1 to 14 Years Supply Chain Manager MBA Demand ...
380962,Frontend Developer,1 to 8 Years Software Engineer B.Com Proficien...


# TFIDF

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Splitting the data into features (X) and target (y)
X = df['Features']
y = df['Role']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [13]:
# RandomForestClassifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_tfidf, y_train)

# Predictions
y_pred = rf_classifier.predict(X_test_tfidf)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


# Recommendation

In [14]:
# # Clean resume
# import re
# def cleanResume(txt):
#     cleanText = re.sub('http\S+\s', ' ', txt)
#     cleanText = re.sub('RT|cc', ' ', cleanText)
#     cleanText = re.sub('#\S+\s', ' ', cleanText)
#     cleanText = re.sub('@\S+', '  ', cleanText)  
#     cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
#     cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
#     cleanText = re.sub('\s+', ' ', cleanText)
#     return cleanText


# # Prediction and Category Name
# def job_recommendation(resume_text):
#     resume_text= cleanResume(resume_text)
#     resume_tfidf = tfidf_vectorizer.transform([resume_text])
#     predicted_category = rf_classifier.predict(resume_tfidf)[0]
#     return predicted_category



import re

def cleanResume(txt):
    cleanText = re.sub(r'http\S+\s', ' ', txt)  # Raw string with r
    cleanText = re.sub(r'RT|cc', ' ', cleanText)  # Raw string with r
    cleanText = re.sub(r'#\S+\s', ' ', cleanText)  # Raw string with r
    cleanText = re.sub(r'@\S+', ' ', cleanText)  # Raw string with r
    cleanText = re.sub(r'[%s]' % re.escape(r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)  # Raw string for pattern
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText)  # Raw string with r
    cleanText = re.sub(r'\s+', ' ', cleanText)  # Raw string with r
    return cleanText


# Prediction and Category Name
def job_recommendation(resume_text):
    resume_text= cleanResume(resume_text)
    resume_tfidf = tfidf_vectorizer.transform([resume_text])
    predicted_category = rf_classifier.predict(resume_tfidf)[0]
    return predicted_category


In [15]:
# Example Usage
resume_file = """Objective:
A creative and detail-oriented Designer with a passion for visual communication and brand identity seeking opportunities to leverage design skills in a dynamic and collaborative environment.

Education:
- Bachelor of Fine Arts in Graphic Design, XYZ College, GPA: 3.7/4.0
- Diploma in Web Design, ABC Institute, GPA: 3.9/4.0

Skills:
- Proficient in Adobe Creative Suite (Photoshop, Illustrator, InDesign)
- Strong understanding of typography, layout, and color theory
- Experience in both print and digital design
- Ability to conceptualize and execute design projects from concept to completion
- Excellent attention to detail and time management skills

Experience:
Graphic Designer | XYZ Design Studio
- Created visually appealing graphics for various marketing materials, including brochures, flyers, and social media posts
- Collaborated with clients to understand their design needs and deliver creative solutions that align with their brand identity
- Worked closely with the marketing team to ensure consistency in brand messaging across all platforms

Freelance Designer
- Designed logos, branding materials, and website layouts for small businesses and startups
- Managed multiple projects simultaneously while meeting tight deadlines and maintaining quality standards
- Established and maintained strong client relationships through clear communication and exceptional service

Projects:
- Rebranding Campaign for XYZ Company: Led a team to redesign the company's logo, website, and marketing collateral, resulting in a 30% increase in brand recognition
- Packaging Design for ABC Product Launch: Developed eye-catching packaging designs for a new product line, contributing to a successful launch and positive customer feedback

Certifications:
- Adobe Certified Expert (ACE) in Adobe Illustrator
- Responsive Web Design Certification from Udemy

Languages:
- English (Native)
- Spanish (Intermediate)
"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: User Interface Designer


In [18]:
# Example Usage
resume_file = """Objective:
Dedicated and results-oriented Banking professional with a strong background in financial analysis and customer service seeking opportunities to contribute to a reputable financial institution. Eager to leverage expertise in risk management, investment strategies, and relationship building to drive business growth and client satisfaction.

Education:
- Bachelor of Business Administration in Finance, XYZ University, GPA: 3.8/4.0
- Certified Financial Analyst (CFA) Level I Candidate

Skills:
- Proficient in financial modeling and analysis using Excel, Bloomberg Terminal, and other financial software
- Extensive knowledge of banking products and services, including loans, mortgages, and investment products
- Strong understanding of regulatory compliance and risk management practices in the banking industry
- Excellent communication and interpersonal skills, with a focus on building rapport with clients and colleagues
- Ability to work efficiently under pressure and adapt to changing market conditions

Experience:
Financial Analyst | ABC Bank
- Conducted financial analysis and risk assessment for corporate clients, including credit analysis, financial statement analysis, and cash flow modeling
- Developed customized financial solutions to meet clients' needs and objectives, resulting in increased revenue and client retention
- Collaborated with cross-functional teams to identify new business opportunities and optimize existing processes

Customer Service Representative | DEF Bank
- Provided exceptional customer service to bank clients, addressing inquiries, resolving issues, and promoting banking products and services
- Processed transactions accurately and efficiently, including deposits, withdrawals, and account transfers
- Educated customers on various banking products and services, helping them make informed financial decisions

Internship | GHI Investments
- Assisted portfolio managers with investment research and analysis, including industry and company-specific research, financial modeling, and performance analysis
- Prepared investment presentations and reports for clients, highlighting investment opportunities and performance metrics
- Conducted market research and analysis to identify trends and opportunities in the financial markets

Certifications:
- Certified Financial Planner (CFP)
- Series 7 and Series 63 Securities Licenses

Languages:
- English (Native)
- Spanish (Proficient)

"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: Financial Analyst


In [18]:
resume_file = """Objective:
Skilled and passionate Web Developer with a strong background in front-end and back-end development, seeking an opportunity to apply my expertise in building responsive and user-friendly web applications. Eager to contribute to a dynamic team by leveraging skills in modern web technologies, problem-solving, and continuous learning.

Education:
- Bachelor of Science in Computer Science, XYZ University, GPA: 3.7/4.0
- Full Stack Web Development Certification from ABC Coding Bootcamp

Skills:
- Proficient in HTML, CSS, JavaScript, and modern frameworks like React, Angular, and Vue.js
- Strong experience with back-end technologies such as Node.js, Express, and databases like MongoDB and MySQL
- Knowledgeable in RESTful API design and integration, as well as third-party APIs
- Familiarity with DevOps practices, including Docker, CI/CD, and cloud services (AWS, Azure)
- Excellent problem-solving skills and a collaborative mindset, able to work well in both independent and team environments

Experience:
Front-End Developer | Tech Solutions Inc.
- Developed responsive, user-friendly web interfaces using HTML, CSS, JavaScript, and React, resulting in improved user engagement and satisfaction
- Collaborated with designers and back-end developers to ensure seamless integration of UI components
- Optimized front-end code for performance and SEO, reducing page load time by 30%

Full Stack Developer | XYZ Digital Agency
- Built full-stack web applications for clients, using Node.js, Express, and MongoDB for the backend, and React for the frontend
- Designed RESTful APIs to facilitate data interaction between front-end and back-end systems
- Worked with clients to define project requirements and delivered high-quality solutions within tight deadlines

Web Development Intern | ABC Startup
- Assisted in the development of an e-commerce platform using Shopify and custom code integrations
- Implemented UI changes and added new features to enhance user experience and functionality
- Conducted testing and debugging, identifying and resolving issues to ensure smooth performance

Certifications:
- Certified Web Developer (CWD)
- JavaScript Developer Certificate (JDC)

Languages:
- English (Native)
- French (Conversational)

"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)


Predicted Category: Frontend Developer


In [20]:
resume_file = """Objective:
Detail-oriented and experienced Backend Developer with a strong foundation in server-side programming and database management. Adept at designing scalable, secure, and efficient back-end systems to support high-performance web applications. Seeking a challenging role to utilize my technical skills and contribute to innovative solutions.

Education:
- Bachelor of Science in Computer Science, XYZ University, GPA: 3.7/4.0
- Full Stack Web Development Certification from ABC Coding Bootcamp

Skills:
- Proficient in back-end technologies such as Node.js, Express, Django, and Flask
- Strong experience with databases like MongoDB, MySQL, PostgreSQL, and Redis
- Skilled in RESTful API design and development, including GraphQL integrations
- Familiarity with microservices architecture and message brokers like RabbitMQ and Kafka
- Expertise in DevOps practices, including Docker, Kubernetes, CI/CD pipelines, and cloud platforms (AWS, Azure, GCP)
- Excellent debugging and problem-solving abilities with a focus on performance optimization

Experience:
Backend Developer | Tech Solutions Inc.
- Designed and implemented RESTful APIs and microservices to support scalable web applications
- Managed database schemas, optimized queries, and implemented data security protocols
- Worked with front-end teams to ensure smooth data flow and integration
- Improved server performance, reducing API response times by 25%

Backend Developer | XYZ Digital Agency
- Developed robust server-side solutions using Node.js, Express, and PostgreSQL
- Designed and maintained scalable architectures to handle increased user loads
- Collaborated with clients to define system requirements and deliver tailored solutions
- Integrated third-party APIs to extend functionality for client projects

Backend Development Intern | ABC Startup
- Assisted in developing back-end logic for an e-commerce platform using Python and Django
- Optimized database queries to enhance performance for large-scale datasets
- Conducted system testing and deployed updates to production environments

Certifications:
- Certified Backend Developer (CBD)
- Advanced Database Management Certificate (ADMC)

Languages:
- English (Native)
- French (Conversational)
"""

predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)


Predicted Category: Backend Developer


In [23]:
import re

text = r"""
First Lasn DevOps Engineer WORK EXPERIENCE ______________________________________________________________________ Resume Worded London United Kingdom Career training and membership SaaS with 150000 paying users DevOps Engineer 08 2021 Present Reviewed all AWS accounts and environments to avoid additional costs decreasing 63 of quarterly bills Maintained the IT infrastructure VPN and MPLS connections between 80 branch oﬃces and 30 security systems Collaborated with 20 IT support team members to troubleshoot and install devices hardware failures and network related problems Developed a tool that enabled 250 developers to deploy virtual machines within 45 seconds against 10 minutes recorded in previous years Polyhire London United Kingdom NYSE listed recruitment and employer branding company System Admin 10 2019 07 2021 Reduced programming time to 20 days against 15 weeks by devising a modular framework for creating backend code in Python and Django Analyzed user errors and corrected 2400 lines of problematic codes reducing computer system downtime by 74 YoY Transformed the web server of Polyhire from an obsolete Apache to a LAMP stack enhancing site performance by 89 Consolidated all CRM databases into a single VM which improved accessibility and saved the company 1500 monthly Growthsi London United Kingdom Barcelona Spain Augmented reality startup with 50 employees and 100m annual revenue SQL Programmer 11 2018 09 2019 Attracted 700 weekly downloads on Bitbucket by developing an open source SQL parser including parsing trees syntax analysis and runtime error detection Designed a transaction processing functionality to handle 1M simultaneous client requests by updating a server application Created queries and shell scripts to produce oﬃce metric ﬁles and transferred them to the cloud for accessibility by 240 ﬁeld teams Streamlined a complex business process into 10K lines of code enabling users without programming backgrounds to create sales order requests via client applications PREVIOUS EXPERIENCE ______________________________________________________________________ Junior Software Engineer ABC Company London UK Mobile Developer XYZ Company New York USA Software Tester Internship ABC New York USA 06 2017 10 2018 01 2016 05 2017 07 2014 12 2015 CONTACT __________________________ Worcester United Kingdom 44 1234567890 ﬁrst last gmail com SKILLS __________________________ Hard Skills CI CD Pipelines Conﬁguration Management Network Administration Cloud Computing Automation Continuous Integration Techniques Agile Methodologies Virtualization Systems Administration Tools and Software Bash Shell Scripting SQL Elasticsearch Languages English Native Romanian Native Spanish Conversational EDUCATION __________________________ University of New York Associate of Applied Science Information Technology New York City New York 10 2011 06 2014 OTHER _____________________________ CompTIA Network Elastix Certiﬁed Engineer
"""

# Remove non-word characters using raw string for regex
cleaned_text = re.sub(r'\W', ' ', text)
print(cleaned_text)
pred = job_recommendation(cleaned_text)
print(pred)

 First Lasn DevOps Engineer WORK EXPERIENCE ______________________________________________________________________ Resume Worded London United Kingdom Career training and membership SaaS with 150000 paying users DevOps Engineer 08 2021 Present Reviewed all AWS accounts and environments to avoid additional costs decreasing 63 of quarterly bills Maintained the IT infrastructure VPN and MPLS connections between 80 branch oﬃces and 30 security systems Collaborated with 20 IT support team members to troubleshoot and install devices hardware failures and network related problems Developed a tool that enabled 250 developers to deploy virtual machines within 45 seconds against 10 minutes recorded in previous years Polyhire London United Kingdom NYSE listed recruitment and employer branding company System Admin 10 2019 07 2021 Reduced programming time to 20 days against 15 weeks by devising a modular framework for creating backend code in Python and Django Analyzed user errors and corrected 240

In [20]:
import pickle
pickle.dump(rf_classifier,open('rf_classifier_job_recommendation.pkl','wb'))
pickle.dump(tfidf_vectorizer,open('tfidf_vectorizer_job_recommendation.pkl','wb'))