In [1]:
# If you're running locally and don't have these libraries installed, you can uncomment these
# !pip install pyresparser Flask numpy pandas nltk sklearn ftfy spacy python-docx

# Downloading NLTK data
import nltk
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ACER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
from pyresparser import ResumeParser
from docx import Document
import numpy as np
import pandas as pd
import re
from ftfy import fix_text
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import spacy


In [3]:
stopw = set(stopwords.words('english'))  # Load English stopwords


In [4]:
# Assuming 'job_final.csv' is present in the working directory
df = pd.read_csv('job_final.csv')

# Clean job descriptions by removing stopwords and short words (<3 characters)
df['test'] = df['Job_Description'].apply(lambda x: ' '.join([word for word in str(x).split() if len(word) > 2 and word not in stopw]))

# Print to verify
print(df['Location'])


0         Bengaluru
1         Bengaluru
2         Bengaluru
3         Bengaluru
4         Bengaluru
           ...     
1919      Bengaluru
1920      Bengaluru
1921      Bengaluru
1922      Bengaluru
1923      Bengaluru
Name: Location, Length: 1924, dtype: object


In [16]:
file_path = 'test.pdf'

# Open and process the resume document (simulating file upload)
try:
    # Simply parse the resume without specifying custom NLP models
    data = ResumeParser(file_path).get_extracted_data()
    print("PDF processed successfully")
    
except Exception as e:
    print("Error processing PDF document:", e)
    data = {}

# Extract skills from the resume if parsing was successful
resume = data.get('skills', [])
print("Skills extracted:", resume)




PDF processed successfully
Skills extracted: ['C', 'Retention', 'Technical', 'Security', 'Analysis', 'Oracle', 'Mysql', 'English', 'Programming', 'Database', 'Jira', 'Github', 'C++', 'Tensorflow', 'Cloud', 'Apex', 'Communication', 'Access', 'System', 'Engineering', 'Python', 'Interactive', 'Algorithms', 'Json', 'Opencv', 'Operating systems', 'Sql', 'Video', 'Java', 'Linux', 'Consulting', 'Queries', 'Javascript']


In [13]:
# Prepare resume skills as a single string
skills = []
skills.append(' '.join(word for word in resume))
org_name_clean = skills

# Define a function to generate n-grams from text (used later for vectorization)
def ngrams(string, n=3):
    string = fix_text(string)
    string = string.encode("ascii", errors="ignore").decode()
    string = string.lower()
    chars_to_remove = [")", "(", ".", "|", "[", "]", "{", "}", "'"]
    rx = '[' + re.escape(''.join(chars_to_remove)) + ']'
    string = re.sub(rx, '', string)
    string = string.replace('&', 'and')
    string = string.replace(',', ' ')
    string = string.replace('-', ' ')
    string = string.title()
    string = re.sub(' +', ' ', string).strip()
    string = ' ' + string + ' '
    string = re.sub(r'[,-./]|\sBD', r'', string)
    ngrams = zip(*[string[i:] for i in range(n)])
    return [''.join(ngram) for ngram in ngrams]

# Vectorize the resume skills using TF-IDF
vectorizer = TfidfVectorizer(min_df=1, analyzer=ngrams, lowercase=False)
tfidf = vectorizer.fit_transform(org_name_clean)
print('Vectorizing completed...')


Vectorizing completed...


In [14]:
# Function to find nearest neighbors based on the resume's skills
def getNearestN(query):
    queryTFIDF_ = vectorizer.transform(query)
    distances, indices = nbrs.kneighbors(queryTFIDF_)
    return distances, indices

# Fit the Nearest Neighbors model with the job descriptions
nbrs = NearestNeighbors(n_neighbors=1, n_jobs=-1).fit(tfidf)

# Prepare job descriptions for matching
unique_org = df['test'].values
distances, indices = getNearestN(unique_org)

# Output the nearest matches
matches = []
for i, j in enumerate(indices):
    dist = round(distances[i][0], 2)
    temp = [dist]
    matches.append(temp)

# Convert the match results to a DataFrame for easy handling
matches = pd.DataFrame(matches, columns=['Match confidence'])
df['match'] = matches['Match confidence']
df1 = df.sort_values('match')
df2 = df1[['Position', 'Company', 'Location', 'url']].head(10).reset_index()

# Clean up Location column
df2['Location'] = df2['Location'].str.replace(r'[^\x00-\x7F]', '', regex=True)
df2['Location'] = df2['Location'].str.replace("â€“", "")

# Display the top 10 matched jobs
df2


Unnamed: 0,index,Position,Company,Location,url
0,1195,Advance Analytics Consultant,Premier Farnell,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
1,1342,Advance Analytics Consultant,Farnell element14,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
2,685,Data Analyst,Infratab,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
3,1028,Data Analyst,Goalreify,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
4,1149,"Data Analytics- Interns, Data Analysts",Bengaluru,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
5,931,Web Analytics Manager (4-8 Years) for an On-De...,Zyoin,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
6,824,Data Scientists,Bloom Consulting Services,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
7,1255,"Data Scientist, Global Data Science Center of ...",Visa,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
8,724,"Data Analyst, 2 - Marketing",AllianceData,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...
9,804,Data Engineer,ExpertEase,Bengaluru,https://www.glassdoor.co.in/partner/jobListing...


In [15]:
# Extract unique locations for the dropdown (if needed in an application)
dropdown_locations = sorted(df2['Location'].unique())

# Create a list of jobs to display
job_list = []
for index, row in df2.iterrows():
    job_list.append({
        'Position': row['Position'],
        'Company': row['Company'],
        'Location': row['Location'],
        'Apply Link': row['url']
    })

# Display the list of job matches
job_list


[{'Position': 'Advance Analytics Consultant',
  'Company': 'Premier Farnell',
  'Location': 'Bengaluru',
  'Apply Link': 'https://www.glassdoor.co.in/partner/jobListing.htm?pos=2118&ao=3949&s=58&guid=0000016baeaed05a9c74761e75f813aa&src=GD_JOB_AD&t=SR&extid=1&exst=OL&ist=&ast=OL&vt=w&slr=true&cs=1_fab29092&cb=1562003821206&jobListingId=3214630510'},
 {'Position': 'Advance Analytics Consultant',
  'Company': 'Farnell element14',
  'Location': 'Bengaluru',
  'Apply Link': 'https://www.glassdoor.co.in/partner/jobListing.htm?pos=2122&ao=437149&s=58&guid=0000016baeaed05a9c74761e75f813aa&src=GD_JOB_AD&t=SR&extid=1&exst=OL&ist=&ast=OL&vt=w&slr=true&ea=1&cs=1_64be5100&cb=1562003821209&jobListingId=3214862080'},
 {'Position': 'Data Analyst',
  'Company': 'Infratab',
  'Location': '  Bengaluru',
  'Apply Link': 'https://www.glassdoor.co.in/partner/jobListing.htm?pos=2029&ao=437149&s=58&guid=0000016b6ff275d3928b9519225ec86e&src=GD_JOB_AD&t=SR&extid=1&exst=OL&ist=&ast=OL&vt=w&slr=true&cs=1_28fbc5c