# POC Model

### Imports

In [31]:
import os
from openai import AzureOpenAI
from langchain_openai import AzureChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
import json
from langchain_community.document_loaders import PyPDFLoader
import datetime
from docx import Document
from dotenv import load_dotenv



### Inits

In [32]:
api_key = os.getenv("AZURE_OPENAI_API_KEY")
endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")

In [33]:
load_dotenv()
llm = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version="2024-12-01-preview",
    deployment_name="gpt-4o",
    model="gpt-4o",  # Ensure function calling support
)

### Load PDF

In [34]:


# Load the JSON file
file_path = "../resume_data.json" 

# Open and parse the JSON file
with open(file_path, 'r') as file:
    structured_data = json.load(file)


### Structure file

In [35]:
current_date = datetime.datetime.now().date()

with open('../data/json_schema.json', 'r') as file:
    json_schema = json.load(file)

In [36]:
messages = [
	SystemMessage(
		content="You are an AI that evaluates resume sections and provides ratings and feedback."
	),
	HumanMessage(
		content=(
			f"Using the following structured resume data in JSON format:\n\n{json.dumps(structured_data, indent=2)}\n\n"
			"For each section (Profile, Skills, Experience), return:\n"
			"- 3 things done poorly.\n"
			"- 3 things done well.\n"
			"- A rating out of 10.\n"
			"- Flag if the rating is under 6.\n\n"
			"Return the output as valid JSON."
		)
	),
]


response = llm.invoke(messages)
print("Raw LLM Response:", response.content)


if not response.content.strip():
	print("Error: Response content is empty.")
	evaluation_feedback = {}
else:
	# Parse the response
	try:
		evaluation_feedback = json.loads(response.content)
		print("Parsed JSON:", json.dumps(evaluation_feedback, indent=2))  # Debugging print
	except json.JSONDecodeError as e:
		print("Error decoding JSON:", e)
		evaluation_feedback = {}

# Define sections and weights
sections = ["Profile", "Skills", "Experience"]
weights = {"Profile": 0.4, "Skills": 0.3, "Experience": 0.3}


ratings = {}
for section in sections:
	if section in evaluation_feedback.get("evaluation", {}):
		ratings[section] = evaluation_feedback["evaluation"][section].get("rating", 0)
	else:
		print(f"Warning: Section '{section}' is missing in the response.")
		ratings[section] = 0  # Default to 0 for nowwww
print("Ratings:", ratings)  # Debugging print


overall_score = sum(ratings[section] * weights[section] for section in sections)
print("Overall Score Calculation:", overall_score)

# Add the overall score to the feedback
evaluation_feedback["overall_score"] = round(overall_score, 2)

# Print the updated feedback
print(json.dumps(evaluation_feedback, indent=2))


Raw LLM Response: ```json
{
  "evaluation": {
    "profile": {
      "things_done_poorly": [
        "The profile is overly dense, making it hard to quickly grasp key information at a glance.",
        "Some details about soft skills or leadership style (e.g., communication, adaptability) are missing, despite leadership roles being highlighted.",
        "It lacks specific measurable impacts of contributions, such as quantitative metrics to illustrate outcomes (e.g., percentage improvement, reduction in time, etc.)."
      ],
      "things_done_well": [
        "The profile provides a clear overview of technical expertise and relevant work experience.",
        "It effectively connects academic achievements with professional and research accomplishments.",
        "Complex roles and projects are described in a way that highlights innovation and relevance to the NLP and machine learning fields."
      ],
      "rating": 8,
      "flag": false
    },
    "skills": {
      "things_done_po

### Experience

In [24]:
unsep_json = {"experience": {
    "json_schema": {
        "name": "formatted_experience",
        "description": "Formats the experience section of a resume into structured JSON without separating CGI Experience and Other Experience.",
        "parameters": {
            "type": "object",
            "properties": {
                "experience": {
                    "type": "array",
                    "description": "List of all job experiences.",
                    "items": {
                        "type": "object",
                        "properties": {
                            "company": {
                                "type": "string",
                                "description": "Company name"
                            },
                            "sector": {
                                "type": "string",
                                "description": "Industry of the type of work (e.g., Healthcare, Financial services)"
                            },
                            "job_title": {
                                "type": "string",
                                "description": "Job title"
                            },
                            "start_date": {
                                "type": "string",
                                "description": "Start date (MM/YY)"
                            },
                            "end_date": {
                                "type": "string",
                                "description": "End date (MM/YY) or 'Present'"
                            },
                            "responsibilities": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                },
                                "description": "Key responsibilities in bullet points"
                            },
                            "technologies": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                },
                                "description": "List of relevant technologies used"
                            }
                        }
                    }
                }
            },
            "required": [
                "experience"
            ]
        }
    }
}
}

In [25]:
EXPERIENCE_SP = "You are an AI that reformats and structures job experience data from resumes into a JSON format."

EXPERIENCE_HP = """
Using the following experience section in JSON format:
{text_input}

Reformat it into the following structured JSON format:
{json_dump}

Reformat the experience section into a structured JSON format, ensuring the following:
- Clearly include fields for 'Company Name', 'Job Title', 'Dates of Employment', 'Responsibilities', and 'Technologies Used'.
- Format job titles and dates as follows: 'Senior Consultant - Data Scientist (11/24 to Present)'.
- Rewrite responsibilities into clear, action-based bullet points.
- Include a 'Technology' field listing relevant technologies used.
- Ensure consistency, readability, and completeness.
"""



In [26]:
# Define the messages for the LLM
text_input = pdf_text
messages = [
    SystemMessage(
        content=EXPERIENCE_SP
    ),
    HumanMessage(
        content=(
            EXPERIENCE_HP.format(
                text_input=text_input,
                json_dump=json.dumps(
                    unsep_json["experience"]["json_schema"], indent=2
                ),
            )
        )
    ),
]

response = llm.invoke(messages, functions=[unsep_json["experience"]["json_schema"]])
structured_data = response.additional_kwargs["function_call"]["arguments"]
json_structured_data = json.loads(structured_data)

In [32]:
EXPERIENCE_SP2 = "You are an AI that reformats and structures job experience data from resumes into a JSON format with clearly separated 'CGI Experience' and 'Other Experience' sections."

EXPERIENCE_HP2 = """
Using the following experience section in JSON format:
{text_input}

Reformat it into the following structured JSON format, explicitly separating CGI Experience and Other Experience:

Ensure the following:
- Place a job under 'cgi_experience' only if the job clearly indicates that the work was performed at CGI or that the candidate was employed by CGI. This should be evident if the employer or client name explicitly includes 'CGI' (e.g., 'CGI', 'CGI Inc.', 'CGI Americas'). Do not classify a job as CGI Experience if the connection to CGI is merely tangential or if the job was performed for a CGI client without direct employment.
** IMPORTANT - If no CGI-related jobs are present, leave the 'cgi_experience' section empty. **
- All other jobs should be placed under 'other_experience'.

For CGI experience format:
- client_or_sector: Use client name if available (e.g., "Bank of America"), otherwise use sector (e.g., "Financial Services")
- position_title (keep original job title)
- dates (formatted as "MM/YY to MM/YY" or "MM/YY to Present")
- responsibilities (as action-based bullet points)
- technology (as an array of technologies used)

For other experience format:
- company (company name)
- position_title (keep original job title)
- dates (formatted as "MM/YY to MM/YY" or "MM/YY to Present")
- responsibilities (as action-based bullet points)
- technology (as an array of technologies used)

Rewrite responsibilities into clear, action-based bullet points if needed.
"""

# Define the structured output schema
structured_output_schema = {
    "name": "format_experience",
    "description": "Formats and separates job experience data into CGI and Other experience sections",
    "parameters": {
        "type": "object",
        "properties": {
            "cgi_experience": {
                "type": "array",
                "description": "Experience specifically at CGI",
                "items": {
                    "type": "object",
                    "properties": {
                        "client_or_sector": {"type": "string", "description": "Name of the client OR industry sector"},
                        "position_title": {"type": "string", "description": "Job title"},
                        "dates": {"type": "string", "description": "Employment dates"},
                        "responsibilities": {"type": "array", "items": {"type": "string"}},
                        "technology": {"type": "array", "items": {"type": "string"}}
                    }
                }
            },
            "other_experience": {
                "type": "array",
                "description": "Experience at companies other than CGI",
                "items": {
                    "type": "object",
                    "properties": {
                        "company": {"type": "string", "description": "Company name"},
                        "position_title": {"type": "string", "description": "Job title"},
                        "dates": {"type": "string", "description": "Employment dates"},
                        "responsibilities": {"type": "array", "items": {"type": "string"}},
                        "technology": {"type": "array", "items": {"type": "string"}}
                    }
                }
            }
        },
        "required": ["cgi_experience", "other_experience"]
    }
}

In [33]:
messages = [
    SystemMessage(content=EXPERIENCE_SP2),
    HumanMessage(
        content=(
            EXPERIENCE_HP2.format(
                text_input=json.dumps(json_structured_data, indent=2),
                json_dump=json.dumps(
                    structured_output_schema, indent=2
                ),
            )
        )
    ),
]
response = llm.invoke(messages, functions=[structured_output_schema])
structured_data = response.additional_kwargs["function_call"]["arguments"]
json_structured_data = json.loads(structured_data)



In [34]:
json_structured_data

{'cgi_experience': [],
 'other_experience': [{'company': 'Amazon',
   'position_title': 'Language Engineer I',
   'dates': '11/25 to Present',
   'responsibilities': ['Collaborated with scientists and engineers to design APIs, evaluate LLM performance, and develop scalable solutions for language data production and analysis.',
    'Engineered prompts for generative AI, automated workflows, and performed data analysis using Python and scripting tools.',
    'Managed customer-facing ML and deterministic models, resolved production issues, deployed Alexa language updates, and tested new features with modeling tools.'],
   'technology': ['Python', 'Scripting tools', 'Generative AI']},
  {'company': 'TEKsystems at Meta',
   'position_title': 'Machine Learning Engineer',
   'dates': '02/24 to 09/24',
   'responsibilities': ['Performed extensive feature replacement for ads-based models to ensure privacy compliance while retaining neutral metrics.',
    'Implemented innovative feature-replacem

In [36]:
GENERAL_EXPERIENCE_SP = "You are an AI that reformats and structures job experience data from resumes into a JSON format."

GENERAL_EXPERIENCE_HP = """
Using the following experience section in JSON format:
{text_input}

Reformat it into the following structured JSON format:
{json_dump}

Reformat the experience section into a structured JSON format, ensuring the following:
- Clearly include fields for 'Company Name', 'Job Title', 'Dates of Employment', 'Responsibilities', and 'Technologies Used'.
- Format job titles and dates as follows: 'Senior Consultant - Data Scientist (11/24 to Present)'.
- Rewrite responsibilities into clear, action-based bullet points.
- Include a 'Technology' field listing relevant technologies used.
- Ensure consistency, readability, and completeness.
"""

SEP_EXPERIENCE_SP = "You are an AI that reformats and structures job experience data from resumes into a JSON format with clearly separated 'CGI Experience' and 'Other Experience' sections."

SEP_EXPERIENCE_HP2 = """
Using the following experience section in JSON format:
{text_input}

Reformat it into the following structured JSON format, explicitly separating CGI Experience and Other Experience:

Ensure the following:
- Place a job under 'cgi_experience' only if the job clearly indicates that the work was performed at CGI or that the candidate was employed by CGI. This should be evident if the employer or client name explicitly includes 'CGI' (e.g., 'CGI', 'CGI Inc.', 'CGI Americas'). Do not classify a job as CGI Experience if the connection to CGI is merely tangential or if the job was performed for a CGI client without direct employment.
** IMPORTANT - If no CGI-related jobs are present, leave the 'cgi_experience' section empty. **
- All other jobs should be placed under 'other_experience'.

For CGI experience format:
- client_or_sector: Use client name if available (e.g., "Bank of America"), otherwise use sector (e.g., "Financial Services")
- position_title (keep original job title)
- dates (formatted as "MM/YY to MM/YY" or "MM/YY to Present")
- responsibilities (as action-based bullet points)
- technology (as an array of technologies used)

For other experience format:
- company (company name)
- position_title (keep original job title)
- dates (formatted as "MM/YY to MM/YY" or "MM/YY to Present")
- responsibilities (as action-based bullet points)
- technology (as an array of technologies used)

Rewrite responsibilities into clear, action-based bullet points if needed.
"""

In [38]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnablePassthrough


# Define your prompts
general_experience_prompt = ChatPromptTemplate.from_messages([
    ("system", GENERAL_EXPERIENCE_SP),
    ("human", GENERAL_EXPERIENCE_HP)
])

separated_experience_prompt = ChatPromptTemplate.from_messages([
    ("system", SEP_EXPERIENCE_SP),
    ("human", SEP_EXPERIENCE_HP2)
])

# Define your chains with function calling
def create_function_chain(prompt_template, function_schema):
    openai_function = convert_to_openai_function(function_schema)
    chain = prompt_template | llm.bind(
        functions=[openai_function], 
        function_call={"name": function_schema["name"]}
    )
    return chain

# Create the chains
general_experience_chain = create_function_chain(
    general_experience_prompt, 
    general_experience_schema
)

# For the second chain, we need to format the input JSON properly
def format_for_second_chain(data):
    if isinstance(data, str):
        # If the output is a string (JSON), parse it
        return {"text_input": data}
    elif isinstance(data, dict) and "function_call" in data.additional_kwargs:
        # If it's already a response with function call
        args = data.additional_kwargs["function_call"]["arguments"]
        return {"text_input": args}
    else:
        # Otherwise, just convert to JSON string
        return {"text_input": json.dumps(data, indent=2)}

separated_experience_chain = create_function_chain(
    separated_experience_prompt,
    separated_experience_schema
)

# Build the pipeline using the | operator
pipeline = (
    general_experience_chain 
    | (lambda x: json.loads(x.additional_kwargs["function_call"]["arguments"])) 
    | (lambda x: {"text_input": json.dumps(x, indent=2)})
    | separated_experience_chain
    | (lambda x: json.loads(x.additional_kwargs["function_call"]["arguments"]))
)

# Example usage with proper input formatting
def process_resume(pdf_text):
    return pipeline.invoke({"text_input": pdf_text, "json_dump": json.dumps(general_experience_schema, indent=2)})

# Run the pipeline
result = process_resume(pdf_text)

NameError: name 'general_experience_schema' is not defined

In [40]:
# with langchain:

from langchain.chains import SequentialChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate


# First prompt template for extracting experience
experience_extraction_prompt = PromptTemplate(
    input_variables=["text_input"],
    template="""
    Extract all job experience from the following resume text:
    {text_input}
    
    Format each job with company, job title, dates, responsibilities, and technologies used.
    """
)

# Second prompt template for separating CGI and other experience
experience_formatting_prompt = PromptTemplate(
    input_variables=["text_input"],
    template="""
    Using the following experience section in JSON format:
    {text_input}
    
    Reformat it into structured JSON format, explicitly separating CGI Experience and Other Experience:
    
    Ensure the following:
    - Place a job under 'cgi_experience' only if the job clearly indicates that the work was performed at CGI or that the candidate was employed by CGI. This should be evident if the employer or client name explicitly includes 'CGI'.
    - If no CGI-related jobs are present, leave the 'cgi_experience' section empty.
    - All other jobs should be placed under 'other_experience'.
    
    For CGI experience format:
    - client_or_sector: Use client name if available, otherwise use sector
    - position_title (keep original job title)
    - dates (formatted as "MM/YY to MM/YY" or "MM/YY to Present")
    - responsibilities (as action-based bullet points)
    - technology (as an array of technologies used)
    
    For other experience format:
    - company (company name)
    - position_title (keep original job title)
    - dates (formatted as "MM/YY to MM/YY" or "MM/YY to Present")
    - responsibilities (as action-based bullet points)
    - technology (as an array of technologies used)
    """
)

# Create the chains
extract_chain = LLMChain(
    llm=llm,
    prompt=experience_extraction_prompt,
    output_key="extracted_experience"
)

format_chain = LLMChain(
    llm=llm,
    prompt=experience_formatting_prompt,
    output_key="formatted_experience"
)

# Combine into sequential chain
sequential_chain = SequentialChain(
    chains=[extract_chain, format_chain],
    input_variables=["text_input"],
    output_variables=["formatted_experience"]
)

# Use the chain
result = sequential_chain({"text_input": pdf_text})
print(result["formatted_experience"])

```json
{
  "cgi_experience": [],
  "other_experience": [
    {
      "company": "Amazon",
      "position_title": "Language Engineer I",
      "dates": "11/25 to Present",
      "responsibilities": [
        "Collaborated with scientists and engineers to design APIs, evaluate LLM performance, and develop scalable solutions for language data production and analysis.",
        "Engineered prompts for generative AI, automated workflows, and performed data analysis using Python and scripting tools.",
        "Managed customer-facing ML and deterministic models, resolved production issues, deployed Alexa language updates, and tested new features with modeling tools."
      ],
      "technology": ["Python", "Scripting Tools"]
    },
    {
      "company": "TEKsystems at Meta",
      "position_title": "Machine Learning Engineer",
      "dates": "02/24 to 09/24",
      "responsibilities": [
        "Performed extensive feature replacement for ads-based models to ensure privacy compliance whil

In [30]:
# json_structured_data

### Profile

In [None]:
### Professional Summary
messages = [
    SystemMessage(
        content="You are an AI that takes structured resumes in JSON format and writes a compelling, professional summary of the applicant."
    ),
    HumanMessage(
        content=(
            "Using the following structured resume data in JSON format:\n\n"
            f"{structured_data}\n\n"
            "Write a well-crafted, three-paragraph professional profile of the applicant in the third person. "
            "Keep a good balance of detailed and concise. Do not use AI-isms"
            "Incorporate their professional summary, work experience, education, skills, certifications, and any notable achievements. "
            "Highlight their expertise, impact, and technical skills, ensuring the profile flows naturally and is engaging."
        )
    ),
]


response = llm.invoke(messages, functions=[json_schema])

profile = response.content.strip()

In [100]:
# Years of Experience
messages = [
    SystemMessage(
        content="You are an AI that takes in a professional summary and determines the applicants years of experience."
    ),
    HumanMessage(
        content=(
            "Using the following professional summary:\n\n"
            f"{profile}\n\n"
            "Write a very concise header desribing their experience in the following format:\n <X> years experience in <X_category>\nex: 5 years of experience in Software Development"
        )
    ),
]


# Call the LLM with function calling enabled
response = llm.invoke(messages, functions=[json_schema])

years_exp = response.content.strip()

In [101]:
def call_llm(overall, section, text_input=pdf_text):
    messages = [
        SystemMessage(
            content=overall[section]['system_prompt']
        ),
        HumanMessage(
            content=(overall[section]['human_prompt'].format(text_input=text_input, json_dump=json.dumps(overall[section]['json_schema'], indent=2)))
        ), 
    ]
    response = llm.invoke(messages, functions=[overall[section]['json_schema']])
    structured_data = response.additional_kwargs["function_call"]["arguments"]
    json_structured_data = json.loads(structured_data)
    
    return json_structured_data

In [102]:
with open("overall.json", 'r') as file:
    overall = json.load(file)

In [103]:
res_dict = dict()

print("Loading...")
for key in overall.keys():
    res_dict[key] = call_llm(overall, key, pdf_text)
    print(f"\t>> Completed key: {key}")


Loading...
	>> Completed key: experience
	>> Completed key: volunteer
	>> Completed key: other_sections
	>> Completed key: skills_summary


# Formatting to doc

In [10]:
def replicate_section(doc, start_tag, end_tag, replacements, times_to_repeat):
    """
    Duplicates the section between start_tag and end_tag (inclusive), replacing the tags with specified values.
    """
   
    para_group = []
    inside_section = False
    end_para = None
    
    for paragraph in doc.paragraphs:
        if start_tag in paragraph.text:
            inside_section = True
            paragraph.text = paragraph.text.replace(f"{start_tag}", "")
        if inside_section:
            para_group.append(paragraph)
        if end_tag in paragraph.text and inside_section:
            end_para = paragraph
            inside_section = False
            paragraph.text = paragraph.text.replace(f"{end_tag}", "")
            break 
    
    end_para = end_para._p
    for i in range(times_to_repeat):
        new_para_lst = []
        for paragraph in para_group:
            paragraph.text = paragraph.text.replace(f"{start_tag}", "")
            paragraph.text = paragraph.text.replace(f"{end_tag}", "")
            
            new_paragraph = doc.add_paragraph()
            new_paragraph.alignment = paragraph.alignment
            new_paragraph.style = paragraph.style
            

            new_paragraph.text = paragraph.text    
            new_para_lst.append(new_paragraph)
            
        for para in new_para_lst:
            end_para.addnext(para._p)
            end_para = para._p

# def replicate_row(doc, key, res_dict_skills):
    




def replace_text_in_docx(doc, replacements):
    """
    Replaces text in a DOCX file, and replicates sections based on the specified start and end tags.
    """
    for key, value in replacements:
        for paragraph in doc.paragraphs:
        
            if key in paragraph.text:
                if isinstance(value, list):
                    paragraph.style = 'ListBullet'
                    x_par = paragraph._p
                    if len(value) > 1:
                        paragraph.text = paragraph.text.replace(key, value[0])
                        value = value[1:]
                        for bp in value[::-1]:
                            para = doc.add_paragraph(bp, style='ListBullet')
                            x_par.addnext(para._p)
                else:
                    paragraph.text = paragraph.text.replace(key, value)
                break
                
        
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        if key in paragraph.text:
                            if isinstance(value, list):
                                paragraph.style = 'ListBullet'
                                x_par = paragraph._p
                                paragraph.text = paragraph.text.replace(key, value[0])
                                value = value[1:]
                                for bp in value:
                                    para = doc.add_paragraph(bp, style='ListBullet')
                                    x_par.addnext(para._p)
                            else:
                                paragraph.text = paragraph.text.replace(key, value)
                            break
                        

def handle_skills_summary(doc, replacements, res_dict_skills):
    for key, value in replacements:
        for table in doc.tables:
            for row in table.rows:
                if any(key in para.text for cell in row.cells for para in cell.paragraphs):
                    cleaned_key = key.replace("{", "").replace("}", "")
                    times_to_repeat = len(res_dict_skills[cleaned_key]) - 1
                    for _ in range(times_to_repeat):
                        new_row = table.add_row()
                        for i, new_cell in enumerate(new_row.cells):
                            old_para = row.cells[i].paragraphs[0]
                            new_cell.text = old_para.text
                            new_cell.paragraphs[0].style = old_para.style
                        row._tr.addnext(new_row._tr)


def replace_text_in_table(doc, replacements, res_dict):
    """
    Replaces text in a DOCX file, and replicates sections based on the specified start and end tags.
    """
    for key, value in replacements:
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    replacement_made = False
                    for paragraph in cell.paragraphs:
                        if key in paragraph.text:
                            paragraph.text = paragraph.text.replace(key, value)
                            replacement_made = True
                            break
                    if replacement_made:
                        break  
                if replacement_made:
                    break 
            if replacement_made:
                break  

        

In [11]:
structured_data

{'contact': {'name': 'ANANYA APPARAJU',
  'email': 'ananya.apparaju@gmail.com',
  'phone': '',
  'location': '',
  'linkedin': '',
  'website': ''},
 'professional_summary': 'Passionate about solving complex problems in natural language processing and information retrieval. Skilled in machine learning and deep learning techniques. Experienced in Python, Java, and C++. Committed to continuous learning and driving innovation in language technology.',
 'education': [{'degree': 'Master of Data Science',
   'field_of_study': 'Computational Linguistics',
   'institution': 'University of British Columbia',
   'location': 'Vancouver, BC',
   'graduation_year': '06/2023'},
  {'degree': 'Bachelor of Technology',
   'field_of_study': 'Electronics and Communications Engineering',
   'institution': 'Jawaharlal Nehru Technological University',
   'location': 'Hyderabad, Telangana, India',
   'graduation_year': '11/2020'}],
 'experience': [{'job_title': 'Language Engineer I',
   'company': 'Amazon',


In [80]:
structured_data[]


In [106]:
# ed_list
certs = [f"{i['name']}, {i['issuing_organization']}" for i in structured_data['certifications']]
certs

['Lean Six Sigma Green Belt, Six Sigma Global Institute',
 'Certified SAFe 6 Practitioner, Scaled Agile Inc.',
 'SAP Student Recognition Award, SAP University Alliances, Dalhousie University',
 'Lean Six Sigma Yellow Belt, Government of Nova Scotia']

In [None]:
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
from docx.shared import Pt  # Import Pt from docx.shared
from copy import deepcopy


def generate_resume():
    # Input/Output
    input_filename = "../data/resume_sample.docx"
    output_filename = "POC_UPDATE.docx"
    doc = Document(input_filename)
    
    full_name = structured_data['contact']['name']
    cgi_title = "Consultant"
    sector = "Health Services"
    replacements = [
        ("{full_name}", full_name),
        ("{cgi_title}", cgi_title),
        ("{years_exp}", years_exp),
        ("{professional_profile}", profile),
        ("{industry}", res_dict['other_sections']['industry_experience']),
        ("{tech_specs}", res_dict['other_sections']['technical_specializations']),
        ("{expertise}", res_dict['other_sections']['areas_of_expertise']),
        ("{languages}", res_dict['other_sections']['languages']),
        ("{environment}", res_dict['other_sections']['environments']),
        ("{tools}", res_dict['other_sections']['tools_and_software'])
    ]
    
    if 'certifications' in structured_data:
        certs = [f"{i['name']}, {i['issuing_organization']}" for i in structured_data['certifications']]
        replacements["{certs}"] = certs
    
    # CGI Experience
    cgi_exp =  res_dict['experience']['cgi_experience']
    for exp in cgi_exp:
        exp = {k: v for k, v in exp.items() if k in ['sector', 'job_title', 'start_date', 'end_date', 'responsibilities']}
        for key, value in exp.items():
            replacements.append( ("{" + key + "}", value) )
    times_to_repeat = len(cgi_exp) -1
    
    replicate_section(doc, "{begin_cgi_exp}", "{end_cgi_exp}", replacements, times_to_repeat)

    # Other Experience
    o_exp =  res_dict['experience']['other_experience']
    for exp in o_exp:
        exp = {k: v for k, v in exp.items() if k in ['company', 'job_title', 'start_date', 'end_date', 'responsibilities']}
        for key, value in exp.items():
            replacements.append( ("{" + key + "}", value) )
    times_to_repeat = len(o_exp) -1
    replicate_section(doc, "{begin_other_exp}", "{end_other_exp}", replacements, times_to_repeat)

    # Skills summary
    table_reps = []
    for key, value in res_dict['skills_summary'].items():
        table_reps.append( ("{" + key + "}", value) )
    ed_list = [f"{el['degree']}, {el['field_of_study']} - {el['institution']}" for el in structured_data['education']]
    replacements.append( ("{education_entry}", ed_list) )


    replace_text_in_docx(doc, replacements)
    handle_skills_summary(doc, table_reps, res_dict['skills_summary'])

    replacements = []
    for key, value in res_dict['skills_summary'].items():
        replacements.extend( [ ("{" + key + "}", v['skill']) for v in value] )
        replacements.extend( [ ("{num_years}", str(v['years_of_experience'])) for v in value] )
        replacements.extend( [ ("{skill_level}", str(v['skill_level'])) for v in value] )

    replace_text_in_table(doc, replacements,  res_dict['skills_summary'])

    
    doc.save(output_filename)
    print(f"Updated document saved as: {output_filename}")
generate_resume()

PackageNotFoundError: Package not found at 'resume_sample.docx'