In [34]:
import openai
import pdfplumber
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

openai.api_key = 'your api key'



def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text


def extract_fields_with_gpt(resume_text):
    prompt = f"""
    You are an AI bot designed to act as a professional for parsing Resumes. Extract the following fields from the resume:
    - Name
    - Contact details: as in the Resume
    - University
    - Year of Study
    - Course
    - Discipline
    - CGPA/Percentage
    - Key Skills: in minimum words
    - Gen AI Experience Score (1-10)
    - AI/ML Experience Score (1-10)
    - Total Resume score -
    Provide the output as a dictionary, with each field as a key and its respective value as the value.
    """
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",  
        messages=[{"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": prompt + resume_text}],
        max_tokens=1000
    )
    
    # Parsing the response into a structured dictionary
    extracted_data = response['choices'][0]['message']['content']
    # Extract the fields from the returned response string (you can further refine this parsing)
    data_dict = {}
    for line in extracted_data.split('\n'):
        if ":" in line:
            key, value = line.split(":", 1)
            data_dict[key.strip()] = value.strip()

    return data_dict


def process_resume(resume_path):
    resume_text = extract_text_from_pdf(resume_path)
    extracted_data = extract_fields_with_gpt(resume_text)
    return extracted_data


def process_batch(resume_paths):
    results = []
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(process_resume, resume_paths))
    return results


def save_to_excel(data, output_path):
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    # Save to an Excel file
    df.to_excel(output_path, index=False)




In [35]:
resume_paths = ['CV_ANIKET (1).pdf','Resume_1.pdf','resume-sample-2.pdf','resume-sample-3.pdf','resume-sample-4.pdf','resume-sample-5.pdf','resume-sample-6.pdf','resume-sample-7.pdf','resume-sample-8.pdf','resume-sample-9.pdf','resume-sample-10.pdf','resume-sample-11.pdf','resume-sample-12.pdf']
output_path = 'Output.xlsx'

# Process resumes and save results to Excel
data = process_batch(resume_paths)
save_to_excel(data, output_path)