In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("jobs_dataset_with_features.csv")
df


Unnamed: 0,Role,Features
0,Social Media Manager,5 to 15 Years Digital Marketing Specialist M.T...
1,Frontend Web Developer,"2 to 12 Years Web Developer BCA HTML, CSS, Jav..."
2,Quality Control Manager,0 to 12 Years Operations Manager PhD Quality c...
3,Wireless Network Engineer,4 to 11 Years Network Engineer PhD Wireless ne...
4,Conference Manager,1 to 12 Years Event Manager MBA Event planning...
...,...,...
1615935,Mechanical Design Engineer,0 to 12 Years Mechanical Engineer B.Tech Mecha...
1615936,IT Director,2 to 14 Years IT Manager M.Tech Strategic IT p...
1615937,Mechanical Design Engineer,4 to 15 Years Mechanical Engineer BCA Mechanic...
1615938,Training Coordinator,5 to 15 Years HR Coordinator BCA Training prog...


In [3]:
df.columns

Index(['Role', 'Features'], dtype='object')

In [4]:
min_count = 6500
role_counts = df['Role'].value_counts()
dropped_classes = role_counts[role_counts < min_count].index
filtered_df = df[~df['Role'].isin(dropped_classes)].reset_index(drop=True)

# Checking the updated role counts
filtered_df['Role'].value_counts()

Role
Interaction Designer          20580
Network Administrator         17470
User Interface Designer       14036
Social Media Manager          13945
User Experience Designer      13935
                              ...  
Benefits Coordinator           6839
Research Analyst               6830
Administrative Coordinator     6803
IT Support Specialist          6799
UI/UX Designer                 6743
Name: count, Length: 61, dtype: int64

In [5]:
len(filtered_df['Role'].value_counts())


61

In [6]:
df = filtered_df.sample(n=10000)


In [7]:
df.head()


Unnamed: 0,Role,Features
220334,Backend Developer,0 to 12 Years Software Engineer B.Com Proficie...
255545,Network Administrator,3 to 11 Years Network Engineer B.Tech Network ...
66775,Water Resources Engineer,0 to 13 Years Civil Engineer B.Tech Water reso...
239244,Market Research Analyst,5 to 12 Years Research Analyst BA Market resea...
408379,Procurement Manager,0 to 10 Years Procurement Specialist BBA Procu...


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Splitting the data into features (X) and target (y)
X = df['Features']
y = df['Role']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [9]:
# RandomForestClassifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_tfidf, y_train)

# Predictions
y_pred = rf_classifier.predict(X_test_tfidf)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [11]:
# Clean resume
import re
def cleanResume(txt):
    cleanText = re.sub(r'http\S+\s', ' ', txt)
    cleanText = re.sub(r'RT|cc', ' ', cleanText)
    cleanText = re.sub(r'#\S+\s', ' ', cleanText)
    cleanText = re.sub(r'@\S+', '  ', cleanText)  
    cleanText = re.sub(r'[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub(r'\s+', ' ', cleanText)
    return cleanText


# Prediction and Category Name
def job_recommendation(resume_text):
    resume_text= cleanResume(resume_text)
    resume_tfidf = tfidf_vectorizer.transform([resume_text])
    predicted_category = rf_classifier.predict(resume_tfidf)[0]
    return predicted_category

  cleanText = re.sub(r'[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)


In [16]:
# Example Usage
resume_file = """John Doe
Address: 123 Finance Avenue, Suite 101, Cityville, State 12345
Phone: (123) 456-7890
Email: john.doe@email.com
LinkedIn: linkedin.com/in/johndoe

Professional Summary
Highly motivated and results-driven banking professional with over 5 years of experience in retail and corporate banking. Expertise in client relationship management, risk assessment, loan processing, and regulatory compliance. Strong problem-solving abilities with a focus on delivering exceptional customer service and driving operational efficiency. Seeking to leverage my financial expertise in a dynamic banking environment.

Professional Experience
Senior Banking Officer
ABC Bank, Cityville, State
June 2020 – Present

Manage a portfolio of 200+ high-value retail and corporate clients, ensuring optimal client satisfaction and retention.
Lead the loan processing team, approving loan applications worth over $10 million annually.
Conduct financial analysis and creditworthiness assessments to determine risk exposure for loan applications.
Provide consultative banking services to clients, offering personalized solutions for savings, investments, and credit products.
Ensure compliance with banking regulations and oversee risk management processes to minimize financial exposure.
Achieved 15% growth in client base and a 10% increase in loan disbursements within the first year.
Banking Associate
XYZ Bank, Townsville, State
April 2017 – May 2020

Assisted in day-to-day operations of the retail banking department, including customer inquiries, account management, and transaction processing.
Processed loan applications, analyzed credit reports, and worked closely with customers to facilitate loan approvals.
Developed and maintained strong relationships with clients, providing guidance on banking services and promoting financial products.
Ensured compliance with anti-money laundering (AML) and Know Your Customer (KYC) regulations.
Reduced processing times for loan applications by 20% through streamlining workflows.
Education
Master of Business Administration (MBA) in Finance
University of Finance, Cityville, State
Graduated: May 2017

Bachelor of Science in Economics
Cityville University, Cityville, State
Graduated: May 2015

Skills
Financial Analysis and Risk Management
Loan Processing and Credit Assessment
Client Relationship Management
Regulatory Compliance (AML, KYC)
Sales of Financial Products (Mortgages, Investments, Credit Lines)
Problem Solving and Decision-Making
Microsoft Office Suite (Excel, PowerPoint, Word)
Banking Software (Core Banking, Loan Management Systems)
Certifications
Certified Financial Services Auditor (CFSA) – Institute of Internal Auditors
Anti-Money Laundering (AML) Certification – Association of Certified Financial Crime Specialists
Achievements
Received the “Employee of the Year” award at ABC Bank for outstanding performance in client management and loan portfolio growth (2022).
Successfully led a project that reduced loan application turnaround times by 15%, enhancing customer satisfaction.
Languages
English (Fluent)
Spanish (Conversational)
References
Available upon request.

"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: Customer Success Manager


In [14]:
# Example Usage
resume_file = """AFFAN GOHAR 
 
 Bahawalpur,Pakistan | Artificial Intellegence 
affangohar.fg@gmail.com  |  +923017206516 |  
linkedin.com/in/affan-gohar/
 Skilled in building and deploying machine learning models with a strong emphasis on data preprocessing
 and visualization. Proficient in transforming raw data into actionable insights, optimizing algorithms, and
 integrating models into production environments. Experienced in end-to-end model development, from data
 cleaning  to model evaluation and deployment.
 SKILLS
 Python
 Data Cleaning
 AREA OF EXPERTISE
 Data Preprocessing
 Data Visualization
 Feature Engineering
 Model Deployment
 Data Science:
 Proficient in data analysis, statistical modeling, and data visualization.
 Experienced in developing predictive models and interpreting data insights.
 Machine Learning:
 Skilled in designing, training, and evaluating machine learning models.
 Knowledgeable in supervised and unsupervised learning techniques.
 Proficient in feature engineering and model optimization to enhance predictive accuracy.
 PROJECTS
 Survival Prediction on Titanic Dataset
 Conducted an analysis and built a machine learning model to predict passenger survival on the
 Titanic using the well-known Titanic dataset.
 The project involved thorough data cleaning, including handling missing values and encoding
 categorical variables, followed by  to enhance model performance. 
Diabetes Prediction using Machine Learning
 Designed and implemented a machine learning model to predict the onset of diabetes based on
 patient health metrics.
 The project involved extensive data cleaning,  and the application of
 classification algorithms
 Heart Disease Prediction using Machine Learning
 Developed a machine learning model to predict the likelihood of heart disease based on patient
 data.
 The project involved extensive data cleaning,  and the application of
 classification algorithms
 Movie Recommendation System Using Netflix Data
 Developed a movie recommendation system utilizing Netflix data to suggest personalized content to
 users. 
The project involved text analysis and feature extraction from movie descriptions, reviews, and
 metadata.
 House Price Prediction using Machine Learning
 Developed a comprehensive predictive model to estimate house prices using a variety of regression
 algorithms, including linear regression, decision trees, random forest, and gradient boosting.
EDUCATION
 Bachelor of Science in Artificial Inteligence
 The Islamia University of Bahawalpur
 Intermediate in Computer Science (ICS)
 Punjab Group of Colleges
 Matriculation in Computer Science
 FG Abbas Public School
 PROFESSIONAL CERTIFICATIONS
 Feb 2024 -  Mar 2028
 May 2020 -  Jun 2023
 Apr 2018 - April  2020
 Python
 Issued by Kaggle
 Acquired hands-on experience in Python programming, focusing on practical coding skills and
 problem-solving techniques.
 Python for Data Science
 Issued by Codanics
 Gained proficiency in Python programming specifically tailored for data science, including data
 manipulation, analysis, and visualization techniques.
 Google Soft Skills Communication
 Issued by Google
 Completed modules on networking, effective communication, problem solving, critical thinking, and
 time management, enhancing overall professional interaction and personal efficiency.
 Introduction to AI for Youth
 Issued by Intel and The Commonwealth
 Completed a comprehensive program on artificial intelligence fundamentals, including machine
 learning concepts, AI applications, and ethical considerations.
 ADDITIONAL INFORMATION
 Libraries: Pandas, Numpy, Matplotlib, Seaborn, Sci-kit Learn
 Languages: English, Urdu
 REFERENCES ARE AVAILABLE ON REQUEST
"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: Data Scientist


In [15]:
import pickle
pickle.dump(rf_classifier,open('rf_classifier_job_recommendation.pkl','wb'))
pickle.dump(tfidf_vectorizer,open('tfidf_vectorizer_job_recommendation.pkl','wb'))