In [2]:
import streamlit as st
import pandas as pd
import json
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import UnstructuredURLLoader
from azureai import AzureAI
from appconfig import AppConfig

class JobProfileMatcher:
    def __init__(self):
        # Initialize configuration and clients
        self.config = AppConfig()
        self.azure_ai = AzureAI(self.config)
        self.llm = self.azure_ai.get_client()
        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        self.chroma_db_name = "jd_chroma_db"
        self.chroma_persist_dir = "chroma_db"

        # Load or initialize Chroma DB
        if os.path.exists(self.chroma_persist_dir) and os.listdir(self.chroma_persist_dir):
            self.chroma_db = Chroma(
                collection_name=self.chroma_db_name,
                embedding_function=self.embeddings,
                persist_directory=self.chroma_persist_dir
            )
        else:
            self.chroma_db = Chroma(
                collection_name=self.chroma_db_name,
                embedding_function=self.embeddings,
                persist_directory=self.chroma_persist_dir
            )

    def preprocess_jd(self, row):
        combined_text = f"""
        Title: {row.get('Title', 'N/A')}
        Description: {row.get('Description', 'N/A')}
        Skills: {row.get('Skills', 'N/A')}
        Location: {row.get('Location', 'N/A')}
        Experience: {row.get('Experience', 'N/A')}
        Salary: {row.get('Salary', 'N/A')}
        """
        return combined_text.strip()

    def extract_profile_details(self, profile_text):
        # Define the LLM and prompt for profile extraction
        prompt = PromptTemplate(
            input_variables=["profile_text"],
            template="""
            The scraped text is from the career's page of a website.
            Your job is to extract the job postings and return them in JSON format containing the
            following keys:
            Title, Description, Skills, Location, Experience, Salary.
            Profile Text:
            {profile_text}
            """
        )

        chain = LLMChain(llm=self.llm, prompt=prompt)
        response = chain.run(profile_text)

        try:
            profile_details = json.loads(response)
            return profile_details
        except json.JSONDecodeError:
            st.error("Error: LLM response is not valid JSON. Please check the response format.")
            st.stop()

    def store_job_descriptions(self, jd_file):
        jd_df = pd.read_csv(jd_file)
        jd_df["Combined"] = jd_df.apply(self.preprocess_jd, axis=1)

        for _, row in jd_df.iterrows():
            jd_text = row["Combined"]
            self.chroma_db.add_texts([jd_text], metadatas={"Title": row["Title"]})

        self.chroma_db.persist()
        st.success("Job Descriptions stored in Chroma DB!")

    def process_profiles(self, profile_links):
        for link in profile_links:
            try:
                loader = UnstructuredURLLoader(urls=[link])
                documents = loader.load()
                profile_text = " ".join(doc.page_content for doc in documents)
                profile_details = self.extract_profile_details(profile_text)

                combined_profile_text = self.preprocess_jd(pd.Series(profile_details))
                self.chroma_db.add_texts([combined_profile_text], metadatas=profile_details)
            except Exception as e:
                st.error(f"Error processing URL {link}: {e}")
                st.stop()

        self.chroma_db.persist()
        st.success("Profiles stored in Chroma DB!")

    def match_profiles(self, jd_file):
        jd_df = pd.read_csv(jd_file)
        jd_df["Combined"] = jd_df.apply(self.preprocess_jd, axis=1)
        results = []

        for _, row in jd_df.iterrows():
            jd_text = row["Combined"]
            matches = self.chroma_db.similarity_search(jd_text, top_k=3)
            for match in matches:
                match_percent = match["score"] * 100
                results.append({
                    "JD": jd_text,
                    "Matched Profile Title": match["metadata"].get("Title", "Unknown"),
                    "Matching Percentage": f"{match_percent:.2f}%"
                })

        return results

# Streamlit App
st.title("Job Profile Matcher")
matcher = JobProfileMatcher()

# Step 1: Upload CSV File
st.header("Step 1: Upload Job Descriptions (CSV)")
jd_file = st.file_uploader("Upload JD CSV File", type="csv")
if jd_file and st.button("Store Job Descriptions"):
    matcher.store_job_descriptions(jd_file)

# Step 2: Enter Profile Links
st.header("Step 2: Provide Profile Links")
profile_links = st.text_area("Enter up to 4 profile links (one per line)").splitlines()
if profile_links and st.button("Process Profiles"):
    matcher.process_profiles(profile_links)

# Step 3: Match Profiles
st.header("Step 3: Match Profiles with JDs")
if jd_file and st.button("Match Profiles"):
    results = matcher.match_profiles(jd_file)
    for result in results:
        st.write(result)




KUBERNETES_SERVICE_PORT_HTTPS=
NODE_MAX_SPACE_SIZE=4096
KUBERNETES_SERVICE_PORT=
no_proxy=localhost,127.0.0.1,github.com,.github.com,.npmjs.org,.yarnpkg.com,npm.sap.com,.maven.apache.org,.repo-cache.svc.cluster.local
SAP_UI_BOOTSTRAP_URL=https://sapui5.hana.ondemand.com
CF_API_ENDPOINT=https://api.cf.eu10.hana.ondemand.com
HOSTNAME=workspaces-ws-c9pr6-deployment-7699d4cd86-vx9v5
INTERNAL_LANDSCAPE=internalFalse
SUBACCOUNT_ID=d79438d3-3873-4da0-9769-5ba543ad9894
WING_EXT_INIT_PHASE_FIN_FILES=/extbin/simple-ext-installer.fin
NODE_OPTIONS=--max-old-space-size=4096
SIMPLE_EXTENSION_METADATA=eyAiaW50ZXJuYWxBcGlWZXJzaW9uIjogMSwgIm5hbWUiOiAiYmFzaWMtdG9vbHMiLCAibmFtZXNwYWNlIjogImJhc2ljLXRvb2xzIiwgIm5wbUNvbmZpZyI6IHsicmVnaXN0cmllcyI6eyJiYXMtZGV2IjoiaHR0cHM6Ly9jb21tb24ucmVwb3NpdG9yaWVzLmNsb3VkLnNhcC9hcnRpZmFjdG9yeS9hcGkvbnBtL2RldngtbnBtLWxpdmUvIn19LCAidnNjb2RlRXh0ZW5zaW9ucyI6IFt7ICJuYW1lIjogIkBiYXMtZGV2L2FwcC1zdHVkaW8tdG9vbGtpdCIsICJ2ZXJzaW9uUmFuZ2UiOiAiMC40OC4wIiwgInNvdXJjZSI6ICJucG0iLCAiR1VOIj

