Last updated: 10/30/23 by portilla@gmail.com

In [None]:
!pip install ibm-watson

In [None]:
!pip install PyPDF2
!pip install python-dotenv

### **Importing the IBM watson sdk for python which can be used to make API calls and a package to parse pdf files for resumes.**

In [None]:
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, CategoriesOptions, KeywordsOptions, SentimentOptions
from PyPDF2 import PdfReader

In [None]:
import pandas as pd
import json
import os

### **Load the environment variables files containing the API key and URL. This has to be private so nobody can access your instance on the cloud without authorization. Need to create a nlu.env file with 2 environment variables.**

In [None]:
from dotenv import load_dotenv
load_dotenv('nlu.env')

True

In [None]:
IAM_KEY = os.getenv('IAM_KEY')
SERVICE_URL = os.getenv('SERVICE_URL')

In [None]:
authenticator = IAMAuthenticator(IAM_KEY)
natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-08-01',authenticator=authenticator)
natural_language_understanding.set_service_url(SERVICE_URL)

### **Use keyword extraction on resume. Make a pdf file and change below accordingly**

In [None]:
# extract text from pdf and get keywords
reader = PdfReader('./JSmith-Resume.pdf')
resume_text = reader.pages[0].extract_text()
response=natural_language_understanding.analyze(
    text=resume_text,
    features=Features(keywords=KeywordsOptions(emotion=False, sentiment=False, limit=20))
    ).get_result()

### **The keywords for an example job description was already provided in class. It's in the json file**

In [None]:
# get keywords from job description
with open('./23-03008-cloud-keywords.json') as f:
  job_desc = json.load(f)

### **Extract the keywords obtained in the json response from API call**

In [None]:
response

{'usage': {'text_units': 1, 'text_characters': 3374, 'features': 1},
 'language': 'en',
 'keywords': [{'text': 'key role', 'relevance': 0.610587, 'count': 2},
  {'text': 'Google Cloud Platform APIs', 'relevance': 0.604594, 'count': 1},
  {'text': 'images of skin diseases', 'relevance': 0.558629, 'count': 1},
  {'text': 'Teaching Assistant', 'relevance': 0.548107, 'count': 1},
  {'text': 'Image Augmentation', 'relevance': 0.546106, 'count': 1},
  {'text': 'Datacenter Scale Computing', 'relevance': 0.543593, 'count': 1},
  {'text': 'PID controller', 'relevance': 0.542825, 'count': 1},
  {'text': 'Graduate Student Researcher Aug.',
   'relevance': 0.540336,
   'count': 1},
  {'text': 'guidance algorithms', 'relevance': 0.539484, 'count': 1},
  {'text': 'integration of an Intel Realsense',
   'relevance': 0.5363,
   'count': 1},
  {'text': 'JWT-based authentication system',
   'relevance': 0.534445,
   'count': 1},
  {'text': 'RESTful APIs', 'relevance': 0.533885, 'count': 1},
  {'text': '

In [None]:
# extract keywords from the json
def extract_keywords(text):
    return [keyword['text'] for keyword in text['keywords']]

job_description_keywords = extract_keywords(job_desc)
resume_keywords = extract_keywords(response)

### **Use a simple tfidf vectorizer to vectorize resume and job description and get cosine similarity between the vectors**

In [None]:
# get similiarity between features
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample documents
doc1 = ' '.join(job_description_keywords)
doc2 = ' '.join(resume_keywords)

# Convert documents to TF-IDF vectors
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform([doc1, doc2])

# Compute cosine similarity between the documents
similarity_score = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])

similarity_score

array([[0.25558804]])