In [12]:
### Part 1: OpenCV - Extract Text from Image
# Import Libraries & Setup
import os
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import random

# Load English tokenizer and NLP pipeline from spaCy
nlp = spacy.load("en_core_web_sm")


In [2]:
#Load CV Files
# Function to load CV text files
def load_cv_files(folder_path):
    cv_texts = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                cv_texts.append(f.read())
    return cv_texts

# Load the CVs
cv_texts = load_cv_files("cv_samples")
print(f"✅ Loaded {len(cv_texts)} CVs.")


✅ Loaded 3 CVs.


In [3]:
# Preprocess using spaCy: tokenize, remove stopwords and punctuation, lemmatize
def preprocess(text):
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha]
    return " ".join(tokens)

processed_texts = [preprocess(cv) for cv in cv_texts]


In [4]:

possible_labels = ["Introvert", "Extrovert"]
labels = [random.choice(possible_labels) for _ in processed_texts]

for i, label in enumerate(labels, start=1):
    print(f"CV{i} → Assigned Personality: {label}")


CV1 → Assigned Personality: Introvert
CV2 → Assigned Personality: Extrovert
CV3 → Assigned Personality: Extrovert


In [5]:
#Convert Texts to Numerical Features using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(processed_texts)


In [7]:
#Training a Machine Learning Model (Random Forest)
model = RandomForestClassifier()
model.fit(X, labels)

print("✅ Model trained successfully!")


✅ Model trained successfully!


In [8]:
# Entering a new CV text here to test
new_cv = """
Ananya is a creative and dynamic project manager with 7 years of experience in handling client projects and software teams. 
She is very communicative, enthusiastic, and enjoys motivating her team members. 
She has worked at Infosys and Cognizant and enjoys event planning and group brainstorming.
"""

# Preprocess, transform and predict
def predict_personality(cv_text):
    cleaned = preprocess(cv_text)
    vector = vectorizer.transform([cleaned])
    return model.predict(vector)[0]

predicted = predict_personality(new_cv)
print("🧠 Predicted Personality:", predicted)


🧠 Predicted Personality: Extrovert


In [9]:
pip install pytesseract opencv-python


Defaulting to user installation because normal site-packages is not writeable
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (19 kB)
Collecting numpy<2.3.0,>=2 (from opencv-python)
  Downloading numpy-2.2.6-cp312-cp312-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.8 kB ? eta -:--:--
     ------ --------------------------------- 10.2/60.8 kB ? eta -:--:--
     ------------------- ------------------ 30.7/60.8 kB 660.6 kB/s eta 0:00:01
     ------------------------------- ------ 51.2/60.8 kB 525.1 kB/s eta 0:00:01
     -------------------------------------- 60.8/60.8 kB 460.0 kB/s eta 0:00:00
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl (39.0 MB)
   ---------------------------------------- 0.0/39.0 MB ? eta -:--:--
   ---------------------------

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mediapipe 0.10.21 requires numpy<2, but you have numpy 2.2.6 which is incompatible.
contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.2.6 which is incompatible.
numba 0.59.1 requires numpy<1.27,>=1.22, but you have numpy 2.2.6 which is incompatible.
pywavelets 1.5.0 requires numpy<2.0,>=1.22.4, but you have numpy 2.2.6 which is incompatible.
streamlit 1.32.0 requires numpy<2,>=1.19.3, but you have numpy 2.2.6 which is incompatible.


In [2]:
pip install numpy==1.26.4


Defaulting to user installation because normal site-packages is not writeable
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
Successfully installed numpy-1.26.4
Note: you may need to restart the kernel to use updated packages.


  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.


In [2]:
import numpy as np
import pandas as pd
import cv2
import pytesseract

print("✅ All libraries are working!")


✅ All libraries are working!


In [3]:
import cv2
import pytesseract


In [4]:
#set path
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"


In [5]:
#Reading the image and extracting the text
image = cv2.imread('cv_image.jpg')
text_from_image = pytesseract.image_to_string(image)

print("📄 Extracted Text from CV Image:")
print(text_from_image)


📄 Extracted Text from CV Image:
IMAGE NOT INCLUDED

ANNE
ROBERTSON

WEB DESIGNER

JOB EXPERIENCE_

2019 - Present

CONTACT Enter Your JOb Possition Here

Company Name

United States. elit sed eiusmod tempor indunt labore dolore magna
aliqua Utenim ad minim veamaquis nostrud exercit

9 Street Address 0, New York, Lorem ipsum dolor sithis amet, consectetur adipisicing

+0123 456 7890 ullamco aliquip commodo consequat.

urgmail@name.com

www.urwebsitename.com 2016 - 2019
Enter Your JOb Possition Here
Company Name

Lorem ipsum dolor sithis amet, consectetur adipisicing
EDUCATION_ elit sed eilusmod tempor indunt laboredolore magna

aliqua Utenim ad minim veamaquis nostrud exercit

. . ullamco aliquip commodo consequat.
Creative Graphic Arts -_ q

University Name Here
2019-2020

Bachelor Of Science SKI LLS_

University Name Here

2015-1019 Photoshop eeees After Iffect esses
Illustrator eeoee Adobe XD eece

Indesign eevee Powerpoint ee8ee8

REFERENCE_

Indesign eoce

M David Parker
Senior Web

In [10]:
### Part 2: NLP - Analyze Text
#Analyze the Extracted Text 
import spacy
from textblob import TextBlob

# Load English language model
nlp = spacy.load("en_core_web_sm")

# Process the extracted CV text
doc = nlp(text_from_image)

# 🔍 Named Entity Recognition
print("\n🔍 Named Entities Detected (Person, Org, Location, etc.):")
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")

# 🧠 Sentiment Analysis using TextBlob
blob = TextBlob(text_from_image)
print("\n💬 Sentiment Analysis:")
print("Polarity     :", blob.sentiment.polarity)
print("Subjectivity :", blob.sentiment.subjectivity)


NameError: name 'text_from_image' is not defined

In [7]:
# Personality Trait Prediction
def predict_personality(cv_text):
    personality_traits = {
        "Extroversion": False,
        "Conscientiousness": False,
        "Openness": False,
        "Agreeableness": False,
        "Neuroticism": False
    }

    if "team" in cv_text.lower() or "collaborate" in cv_text.lower():
        personality_traits["Extroversion"] = True

    if "organized" in cv_text.lower() or "responsible" in cv_text.lower():
        personality_traits["Conscientiousness"] = True

    if "creative" in cv_text.lower() or "innovative" in cv_text.lower():
        personality_traits["Openness"] = True

    if "help" in cv_text.lower() or "support" in cv_text.lower():
        personality_traits["Agreeableness"] = True

    if "stress" in cv_text.lower() or "pressure" in cv_text.lower():
        personality_traits["Neuroticism"] = True

    return personality_traits

# Run the prediction
traits = predict_personality(text_from_image)
print("\n🧬 Predicted Personality Traits:")
for trait, present in traits.items():
    print(f"{trait}: {'✔️ Present' if present else '❌ Not Detected'}")



🧬 Predicted Personality Traits:
Extroversion: ❌ Not Detected
Conscientiousness: ❌ Not Detected
Openness: ✔️ Present
Agreeableness: ❌ Not Detected
Neuroticism: ❌ Not Detected


In [11]:
# Part 3: Reinforcement Learning - Predict Traits
#Import required Libraries
import gym
from gym import spaces
import numpy as np
from stable_baselines3 import PPO


In [2]:
#Creating a Custom CV Personality Environment
class CVEnv(gym.Env):
    def __init__(self):
        super(CVEnv, self).__init__()
        self.observation_space = spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)  # 5 sample features
        self.action_space = spaces.Discrete(5)  # Predict one of 5 traits
        self.state = np.random.rand(5)
        self.target_trait = np.argmax(self.state)  # Assume most prominent trait is correct

    def reset(self):
        self.state = np.random.rand(5)
        self.target_trait = np.argmax(self.state)
        return self.state

    def step(self, action):
        reward = 1.0 if action == self.target_trait else -0.1
        done = True  # Single-step episode
        info = {}
        return self.state, reward, done, info


In [4]:
import gym
import numpy as np
from gym import spaces

#  Defining Custom Environment
class CVEnv(gym.Env):
    def __init__(self):
        super(CVEnv, self).__init__()

        # Define observation space 
        self.observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)

        # Define action space 
        self.action_space = spaces.MultiBinary(5)

    def reset(self):
        # Return random observation as a simulation
        self.state = np.random.rand(4)
        return self.state

    def step(self, action):
        # Simulate reward based on number of correct "predictions"
        reward = np.sum(action == [0, 0, 1, 0, 0])  # Only openness = 1, rest = 0
        done = True  # End after one step (simplified)
        return self.state, reward, done, {}

    def render(self, mode='human'):
        pass


In [5]:
# Create environment instance
env = CVEnv()


In [6]:
from stable_baselines3 import PPO

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 2.38     |
| time/              |          |
|    fps             | 742      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1          |
|    ep_rew_mean          | 2.9        |
| time/                   |            |
|    fps                  | 472        |
|    iterations           | 2          |
|    time_elapsed         | 8          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.12236838 |
|    clip_fraction        | 0.594      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.36      |
|    explained_variance   | -0.0319    |
|    learning_rate        | 0.0003     |
|   

<stable_baselines3.ppo.ppo.PPO at 0x25eb5244a90>

In [7]:
#Testing the model
obs = env.reset()
action, _ = model.predict(obs)
print("Predicted Traits:", action)


Predicted Traits: [0. 0. 1. 0. 0.]


In [8]:
#Saving the model
model.save("cv_personality_model")


In [9]:
#Load the model
from stable_baselines3 import PPO
model = PPO.load("cv_personality_model")
