In [1]:
import streamlit as st
import pandas as pd
# from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from appconfig import AppConfig
from azureai import AzureAI

# Create instances of AppConfig and AzureAI
config = AppConfig()
azure_ai = AzureAI(config)


KUBERNETES_SERVICE_PORT_HTTPS=
NODE_MAX_SPACE_SIZE=4096
KUBERNETES_SERVICE_PORT=
no_proxy=localhost,127.0.0.1,github.com,.github.com,.npmjs.org,.yarnpkg.com,npm.sap.com,.maven.apache.org,.repo-cache.svc.cluster.local
SAP_UI_BOOTSTRAP_URL=https://sapui5.hana.ondemand.com
CF_API_ENDPOINT=https://api.cf.eu10.hana.ondemand.com
HOSTNAME=workspaces-ws-v52kq-deployment-554475b856-qfswh
INTERNAL_LANDSCAPE=internalFalse
SUBACCOUNT_ID=d79438d3-3873-4da0-9769-5ba543ad9894
WING_EXT_INIT_PHASE_FIN_FILES=/extbin/simple-ext-installer.fin
NODE_OPTIONS=--max-old-space-size=4096
SIMPLE_EXTENSION_METADATA=eyAiaW50ZXJuYWxBcGlWZXJzaW9uIjogMSwgIm5hbWUiOiAiYmFzaWMtdG9vbHMiLCAibmFtZXNwYWNlIjogImJhc2ljLXRvb2xzIiwgIm5wbUNvbmZpZyI6IHsicmVnaXN0cmllcyI6eyJiYXMtZGV2IjoiaHR0cHM6Ly9jb21tb24ucmVwb3NpdG9yaWVzLmNsb3VkLnNhcC9hcnRpZmFjdG9yeS9hcGkvbnBtL2RldngtbnBtLWxpdmUvIn19LCAidnNjb2RlRXh0ZW5zaW9ucyI6IFt7ICJuYW1lIjogIkBiYXMtZGV2L2FwcC1zdHVkaW8tdG9vbGtpdCIsICJ2ZXJzaW9uUmFuZ2UiOiAiMC40OC4wIiwgInNvdXJjZSI6ICJucG0iLCAiR1VOIj

In [1]:
import os
import pandas as pd
from langchain_community.embeddings  import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# Function to preprocess and combine JD fields
def preprocess_jd(row):
    combined_text = f"""
    Title: {row['Title']}
    Description: {row['Description']}
    Skills: {row['Skills']}
    Location: {row['Location']}
    Experience: {row['Experience']}
    Salary: {row['Salary']}
    """
    return combined_text

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# File and Chroma DB setup
chroma_db_name = "jd_chroma_db"
chroma_persist_dir = "chroma_db"
chroma_db = Chroma(collection_name=chroma_db_name, embedding_function=embeddings, persist_directory=chroma_persist_dir)

def extract_profile_details(profile_text):
    # Define the LLM and prompt for profile extraction
    llm = azure_ai.get_client()
    prompt = PromptTemplate(
        input_variables=["profile_text"],
        template="""
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the
        following keys:
        Title, Description, Skills, Location, Experience, Salary.
        Profile Text:
        {profile_text}
        """
    )

    chain = LLMChain(llm=llm, prompt=prompt)
    response = chain.run(profile_text)
    return eval(response)  # Parse the JSON string to a Python dictionary

def main():
    print("Welcome to Job Profile Matcher")

    # Step 1: Upload CSV file
    jd_file_path = input("Enter the path to the JD CSV file: ").strip()

    if not os.path.exists(jd_file_path):
        print("File not found. Please check the path and try again.")
        return

    # Load JD file
    jd_df = pd.read_csv(jd_file_path)
    print("Job Descriptions Loaded:")
    print(jd_df.head())

    # Combine JD columns for vectorization
    jd_df["Combined"] = jd_df.apply(preprocess_jd, axis=1)

    # Step 2: Store JDs in Chroma DB
    store_jds = input("Do you want to store these JDs in the vector database? (yes/no): ").strip().lower()
    if store_jds == "yes":
        for _, row in jd_df.iterrows():
            jd_text = row["Combined"]
            chroma_db.add_texts([jd_text], metadatas={"Title": row["Title"]})

        # Persist the database
        chroma_db.persist()
        print("Job Descriptions stored in Chroma DB!")

    # Step 3: Input profile links
    print("Enter the profile links (up to 4). Leave blank to stop.")
    profile_links = []
    for i in range(4):
        link = input(f"Profile Link {i + 1}: ").strip()
        if link:
            profile_links.append(link)
        else:
            break

    if not profile_links:
        print("No profiles entered. Exiting.")
        return

    # Step 4: Extract details and store profiles in Chroma DB
    for link in profile_links:
        # Simulate profile scraping by extracting details using LLMChain
        profile_text = f"Extracted profile text from {link}"
        profile_details = extract_profile_details(profile_text)

        # Combine extracted details into a single text block
        combined_profile_text = preprocess_jd(pd.Series(profile_details))

        # Store in Chroma DB
        chroma_db.add_texts([combined_profile_text], metadatas=profile_details)

    chroma_db.persist()
    print("Profiles stored in Chroma DB!")

    # Step 5: Match profiles with JDs
    match_profiles = input("Do you want to match profiles with the stored JDs? (yes/no): ").strip().lower()
    if match_profiles == "yes":
        matching_results = []
        for _, row in jd_df.iterrows():
            jd_text = row["Combined"]
            matches = chroma_db.similarity_search(jd_text, top_k=3)  # Adjust top_k as needed
            for match in matches:
                match_percent = match["score"] * 100  # Normalize score to percentage
                matching_results.append((jd_text, match["metadata"].get("Title", "Unknown"), match_percent))

        # Display results
        for jd_text, title, percent in matching_results:
            print(f"JD: {jd_text}")
            print(f"Matched Profile Title: {title}")
            print(f"Matching Percentage: {percent:.2f}%")

if __name__ == "__main__":
    main()


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


ImportError: Could not import sentence_transformers python package. Please install it with `pip install sentence-transformers`.