In [4]:
import os
import pdfplumber
import psycopg2
import pandas as pd
import time
import xml.etree.ElementTree as ET
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI
from docx import Document 
from win32com import client 

DB_CONFIG = {
    "dbname": "postgres",
    "user": "postgres",
    "password": "12345",
    "host": "localhost",
    "port": 5432
}

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        return "".join([page.extract_text() + "\n" for page in pdf.pages])

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    return "\n".join([para.text for para in doc.paragraphs])

def extract_text_from_doc(doc_path):
    
    word = client.Dispatch("Word.Application")
    doc = word.Documents.Open(doc_path)
    text = doc.Content.Text
    doc.Close()
    word.Quit()
    return text

def extract_text_from_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    return " ".join([elem.text for elem in root.iter() if elem.text])

def extract_text_from_folder(folder_path):
    extracted_texts = {}
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        text = None

        try:
            if filename.lower().endswith(".pdf"):
                text = extract_text_from_pdf(file_path)
            elif filename.lower().endswith(".docx"):
                text = extract_text_from_docx(file_path)
            elif filename.lower().endswith(".doc"):
                text = extract_text_from_doc(file_path)
            elif filename.lower().endswith(".xml"):
                text = extract_text_from_xml(file_path)
            
            if text:
                extracted_texts[filename] = text
        except Exception as e:
            print(f"Error extracting text from {filename}: {e}")

    return extracted_texts

folder_path = r"C:\Users\Divya_prasath\Downloads\Profiles 1"
text_files = extract_text_from_folder(folder_path)

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key="AIzaSyC_1emKcfen2n5ZS-cjOTT37najYpBWmco",
)

class Resume(BaseModel):
    name: str = Field(description="name from resume")
    phone: str = Field(description="phone number from resume")
    email: str = Field(description="email from resume")
    skill: str = Field(description="skill from resume")

parser = JsonOutputParser(pydantic_object=Resume)

prompt = PromptTemplate(
    template="Extract name, phone, email, skills from the given text resume.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

details = []
for filename, text in text_files.items():
    retries = 3
    success = False

    while retries > 0 and not success:
        try:
            extracted_details = chain.invoke({"query": text})
            details.append(extracted_details)
            success = True
            time.sleep(1)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            retries -= 1
            time.sleep(2 ** (3 - retries))

df = pd.DataFrame(details)

def load_data_to_postgres(df, db_config):
    try:
        connection = psycopg2.connect(**db_config)
        cursor = connection.cursor()

        for _, row in df.iterrows():
            cursor.execute("""
                INSERT INTO public.resumeh (name, phone_number, email_id, skills) 
                VALUES (%s, %s, %s, %s)
            """, (row["name"], row["phone"], row["email"], row["skill"]))

        connection.commit()
        print("Data loaded successfully into the PostgreSQL table.")
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Error: {error}")
    finally:
        if connection:
            cursor.close()
            connection.close()

load_data_to_postgres(df, DB_CONFIG)


Error extracting text from Naukri_JAYAKUMART[2y_5m].doc: (-2147221005, 'Invalid class string', None, None)
Error extracting text from Naukri_MrRamesh[12y_0m].doc: (-2147221005, 'Invalid class string', None, None)


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_DeepikaN[5y_0m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_DheepanRaj[3y_0m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_DheepanRaj[3y_0m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_JisinJoseph[1y_9m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_JisinJoseph[1y_9m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_JisinJoseph[1y_9m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_NagarjunaP[11y_0m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_RabinsinghR[3y_5m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_RajaPinnoju[3y_0m] 2.pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_SibinDhas[1y_4m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_SibinDhas[1y_4m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_SibinDhas[1y_4m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_VIGNESHP[5y_0m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_VigneshV[2y_6m].pdf: 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Error processing Naukri_VigneshV[2y_6m].pdf: 429 Resource has been exhausted (e.g. check quota).
Data loaded successfully into the PostgreSQL table.
