In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd

In [None]:
data_path = '/content/drive/MyDrive/7th-project/data/jobs_dataset_with_features.csv'


df = pd.read_csv(data_path,
    engine='python',
    on_bad_lines='skip',
    encoding='utf-8',
)

df.head()

Unnamed: 0,Role,Features
0,Social Media Manager,5 to 15 Years Digital Marketing Specialist M.T...
1,Frontend Web Developer,"2 to 12 Years Web Developer BCA HTML, CSS, Jav..."
2,Quality Control Manager,0 to 12 Years Operations Manager PhD Quality c...
3,Wireless Network Engineer,4 to 11 Years Network Engineer PhD Wireless ne...
4,Conference Manager,1 to 12 Years Event Manager MBA Event planning...


In [None]:
df.shape

(1615940, 2)

In [None]:
df['Role'].value_counts()

Unnamed: 0_level_0,count
Role,Unnamed: 1_level_1
Interaction Designer,20580
Network Administrator,17470
User Interface Designer,14036
Social Media Manager,13945
User Experience Designer,13935
...,...
Inventory Control Specialist,3342
Budget Analyst,3335
Clinical Nurse Manager,3324
Social Science Researcher,3321


In [None]:
min_count = 6500
role_counts = df['Role'].value_counts()
dropped_classes = role_counts[role_counts < min_count].index
filtered_df = df[~df['Role'].isin(dropped_classes)].reset_index(drop=True)

filtered_df['Role'].value_counts()

Unnamed: 0_level_0,count
Role,Unnamed: 1_level_1
Interaction Designer,20580
Network Administrator,17470
User Interface Designer,14036
Social Media Manager,13945
User Experience Designer,13935
...,...
Benefits Coordinator,6839
Research Analyst,6830
Administrative Coordinator,6803
IT Support Specialist,6799


In [None]:
print(len(filtered_df['Role'].value_counts()))

61


In [None]:
df = filtered_df.sample(n=10000,random_state=42)
df.head()

Unnamed: 0,Role,Features
263157,Sales Account Manager,0 to 12 Years Account Manager MBA Account mana...
200995,Event Planner,0 to 13 Years Event Coordinator M.Com Event pl...
5247,Inside Sales Representative,3 to 10 Years Sales Representative B.Tech Sale...
403186,User Interface Designer,2 to 14 Years UX/UI Designer MBA UI design pri...
433977,Quality Assurance Analyst,0 to 12 Years Software Tester BCA Quality assu...


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multiclass import OneVsRestClassifier
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
X = df['Features']
y = df['Role']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [None]:
def model_training(model,X_train,X_test,y_train,y_test):
  model = OneVsRestClassifier(model())
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)

  print(f" {model} Results:")

  print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
  print(f"Classification Report:\n{classification_report(y_test, y_pred)}")

In [None]:
model_training(RandomForestClassifier,
               X_train=X_train_tfidf,
               X_test=X_test_tfidf,
               y_train=y_train,
               y_test=y_test)

 OneVsRestClassifier(estimator=RandomForestClassifier()) Results:
Accuracy: 1.0000
Classification Report:
                                precision    recall  f1-score   support

             Account Executive       1.00      1.00      1.00        24
    Administrative Coordinator       1.00      1.00      1.00        23
             Automation Tester       1.00      1.00      1.00        32
             Backend Developer       1.00      1.00      1.00        44
          Benefits Coordinator       1.00      1.00      1.00        26
 Business Intelligence Analyst       1.00      1.00      1.00        29
   Client Relationship Manager       1.00      1.00      1.00        22
               Content Creator       1.00      1.00      1.00        34
            Content Strategist       1.00      1.00      1.00        22
      Customer Success Manager       1.00      1.00      1.00        42
   Customer Support Specialist       1.00      1.00      1.00        32
                  Data Analys

In [None]:
model_training(KNeighborsClassifier,
               X_train=X_train_tfidf,
               X_test=X_test_tfidf,
               y_train=y_train,
               y_test=y_test)

 OneVsRestClassifier(estimator=KNeighborsClassifier()) Results:
Accuracy: 1.0000
Classification Report:
                                precision    recall  f1-score   support

             Account Executive       1.00      1.00      1.00        24
    Administrative Coordinator       1.00      1.00      1.00        23
             Automation Tester       1.00      1.00      1.00        32
             Backend Developer       1.00      1.00      1.00        44
          Benefits Coordinator       1.00      1.00      1.00        26
 Business Intelligence Analyst       1.00      1.00      1.00        29
   Client Relationship Manager       1.00      1.00      1.00        22
               Content Creator       1.00      1.00      1.00        34
            Content Strategist       1.00      1.00      1.00        22
      Customer Success Manager       1.00      1.00      1.00        42
   Customer Support Specialist       1.00      1.00      1.00        32
                  Data Analyst 

In [None]:
knn_classifier = KNeighborsClassifier()
knn_classifier.fit(X_train_tfidf,y_train)

In [None]:
import re
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText)
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText


# Prediction and Category Name
def job_recommendation(resume_text,model):
    resume_text= cleanResume(resume_text)
    resume_tfidf = tfidf_vectorizer.transform([resume_text])
    predicted_category = model.predict(resume_tfidf)[0]
    return predicted_category

In [None]:
# Example Usage
resume_file = """Objective:
A creative and detail-oriented Designer with a passion for visual communication and brand identity seeking opportunities to leverage design skills in a dynamic and collaborative environment.

Education:
- Bachelor of Fine Arts in Graphic Design, XYZ College, GPA: 3.7/4.0
- Diploma in Web Design, ABC Institute, GPA: 3.9/4.0

Skills:
- Proficient in Adobe Creative Suite (Photoshop, Illustrator, InDesign)
- Strong understanding of typography, layout, and color theory
- Experience in both print and digital design
- Ability to conceptualize and execute design projects from concept to completion
- Excellent attention to detail and time management skills

Experience:
Graphic Designer | XYZ Design Studio
- Created visually appealing graphics for various marketing materials, including brochures, flyers, and social media posts
- Collaborated with clients to understand their design needs and deliver creative solutions that align with their brand identity
- Worked closely with the marketing team to ensure consistency in brand messaging across all platforms

Freelance Designer
- Designed logos, branding materials, and website layouts for small businesses and startups
- Managed multiple projects simultaneously while meeting tight deadlines and maintaining quality standards
- Established and maintained strong client relationships through clear communication and exceptional service

Projects:
- Rebranding Campaign for XYZ Company: Led a team to redesign the company's logo, website, and marketing collateral, resulting in a 30% increase in brand recognition
- Packaging Design for ABC Product Launch: Developed eye-catching packaging designs for a new product line, contributing to a successful launch and positive customer feedback

Certifications:
- Adobe Certified Expert (ACE) in Adobe Illustrator
- Responsive Web Design Certification from Udemy

Languages:
- English (Native)
- Spanish (Intermediate)
"""
predicted_category = job_recommendation(resume_file,knn_classifier)
print("Predicted Category:", predicted_category)

Predicted Category: User Interface Designer


In [None]:
resume_file = """Objective:
Dedicated and results-oriented Banking professional with a strong background in financial analysis and customer service seeking opportunities to contribute to a reputable financial institution. Eager to leverage expertise in risk management, investment strategies, and relationship building to drive business growth and client satisfaction.

Education:
- Bachelor of Business Administration in Finance, XYZ University, GPA: 3.8/4.0
- Certified Financial Analyst (CFA) Level I Candidate

Skills:
- Proficient in financial modeling and analysis using Excel, Bloomberg Terminal, and other financial software
- Extensive knowledge of banking products and services, including loans, mortgages, and investment products
- Strong understanding of regulatory compliance and risk management practices in the banking industry
- Excellent communication and interpersonal skills, with a focus on building rapport with clients and colleagues
- Ability to work efficiently under pressure and adapt to changing market conditions

Experience:
Financial Analyst | ABC Bank
- Conducted financial analysis and risk assessment for corporate clients, including credit analysis, financial statement analysis, and cash flow modeling
- Developed customized financial solutions to meet clients' needs and objectives, resulting in increased revenue and client retention
- Collaborated with cross-functional teams to identify new business opportunities and optimize existing processes

Customer Service Representative | DEF Bank
- Provided exceptional customer service to bank clients, addressing inquiries, resolving issues, and promoting banking products and services
- Processed transactions accurately and efficiently, including deposits, withdrawals, and account transfers
- Educated customers on various banking products and services, helping them make informed financial decisions

Internship | GHI Investments
- Assisted portfolio managers with investment research and analysis, including industry and company-specific research, financial modeling, and performance analysis
- Prepared investment presentations and reports for clients, highlighting investment opportunities and performance metrics
- Conducted market research and analysis to identify trends and opportunities in the financial markets

Certifications:
- Certified Financial Planner (CFP)
- Series 7 and Series 63 Securities Licenses

Languages:
- English (Native)
- Spanish (Proficient)

"""
predicted_category = job_recommendation(resume_file,knn_classifier)
print("Predicted Category:", predicted_category)

Predicted Category: Financial Analyst


In [None]:
import os
import pickle

# Define the folder name
folder_name = '/content/drive/MyDrive/7th-project/model'

# Create the folder if it doesn't exist
os.makedirs(folder_name, exist_ok=True)

# Save the RandomForestClassifier
with open(os.path.join(folder_name, 'knn_classifier_job_recommendation.pkl'), 'wb') as model_file:
    pickle.dump(knn_classifier, model_file)

# Save the TfidfVectorizer
with open(os.path.join(folder_name, 'tfidf_vectorizer_job_recommendation.pkl'), 'wb') as vectorizer_file:
    pickle.dump(tfidf_vectorizer, vectorizer_file)

print(f"Files saved successfully in the '{folder_name}' folder.")

Files saved successfully in the '/content/drive/MyDrive/7th-project/model' folder.
