In [1]:
# Import necessary libraries and modules
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
import pandas as pd
import uuid

# Initialize the LLM (ChatGroq) with specific model and API key
llm = ChatGroq(
    temperature=0,  # Set temperature for response randomness
    groq_api_key="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",  # API key for authentication
    model="llama-3.3-70b-versatile"  # The specific LLM model to use
)

# Load the webpage data from the specified URL (Nike job page)
loader = WebBaseLoader("https://careers.nike.com/data-engineer/job/R-51131")
page_data = loader.load().pop().page_content  # Get the content from the loaded page

# Define the prompt template to extract job postings in JSON format
prompt_extract = PromptTemplate.from_template(
    """
    ### SCRAPED TEXT FROM WEBSITE:
    {page_data}
    
    ### INSTRUCTION:
    The scraped text is from the career's page of a website.
    Your job is to extract the job postings and return them in JSON format containing the 
    following keys: `company name`, `role`, `experience`, `skills` and `description`.
    Only return the valid JSON.
    ### VALID JSON (NO PREAMBLE):    
    """
)

# Run the extraction chain to get job postings in JSON format
chain_extract = prompt_extract | llm 
res = chain_extract.invoke(input={'page_data': page_data})

# Parse the extracted response into valid JSON
json_parser = JsonOutputParser()
job = json_parser.parse(res.content)  # JSON object containing job details

# Load roles.csv for tech stack data (assumed columns: Role, Techstack, Links)
df = pd.read_csv("roles.csv")  # Roles CSV with tech stack details

# Match skills from the parsed job output with the techstack in the CSV
if 'skills' in job and job['skills']:
    # Normalize job skills into a list of lower-case strings. Adjust if job['skills'] is already a list.
    if isinstance(job['skills'], list):
        job_skills = [skill.strip().lower() for skill in job['skills']]
    else:
        job_skills = [skill.strip().lower() for skill in job['skills'].split(',')]
    
    # Define a helper function to check if any job skill appears in a given Techstack cell (case insensitive)
    def skill_match(techstack_value):
        tech_skills = [ts.strip().lower() for ts in techstack_value.split(',')]
        return any(job_skill in tech_skills for job_skill in job_skills)
    
    # Filter rows in the DataFrame where at least one job skill is found in the Techstack column
    matched_df = df[df['Techstack'].apply(skill_match)]
    
    # Extract unique matched techstack entries as a comma-separated string
    if not matched_df.empty:
        matched_skills = ", ".join(matched_df['Techstack'].unique())
    else:
        matched_skills = "No specific matched skills found"
else:
    matched_skills = "No skills extracted from job posting"

# (Optional) If you want to add CSV data to ChromaDB for further querying, you can use it here.
# [The ChromaDB code is omitted for brevity.]

# Define a prompt template that requests multiple outputs (cover letter, cold email, etc.) in plain text
# and uses the matched skills (from roles.csv) as additional context.
prompt_multi = PromptTemplate.from_template(
    """
    ### JOB DESCRIPTION:
    {job_description}
    
    ### ADDITIONAL INFORMATION:
    Based on my portfolio, the following skills and technologies from my roles are a strong match for this role: {matched_skills}.
    
    ### INSTRUCTIONS:
    You are Divya Dadi, a Master of Computer Science student at Rice University with extensive software and data engineering experience.
    Your resume includes roles as a Software Engineer at Affekta LLC, a Software Engineer Intern at American Unit Inc, and an Assistant System Engineer at Tata Consultancy Services.
    Using the job description, your resume details, and the matched skills above, generate the following outputs in plain text with clear headings:
    
    1. Cover Letter:
       A formal cover letter to the hiring manager explaining how your skills and experience align with the job requirements.
    
    2. Cold Email:
       A concise cold email to the hiring manager, highlighting your interest in the role and your key qualifications.
    
    3. Message to Hiring Manager:
       A brief message designed to catch the hiring manager's attention, suitable for email or messaging.
    
    4. LinkedIn DM:
       A personable direct message intended for a recruiter on LinkedIn, introducing yourself and expressing your interest in the position.
    
    Include your LinkedIn profile: https://www.linkedin.com/in/divya-dadi-9a2539172/
    
    ### OUTPUT:
    
    """
)

# Generate the multi-output response using the LLM
chain_multi = prompt_multi | llm
res_multi = chain_multi.invoke({
    "job_description": str(job),
    "matched_skills": matched_skills
})

# Output the generated content (plain text with headings)
print(res_multi.content)


USER_AGENT environment variable not set, consider setting it to identify your requests.


### Cover Letter:
Dear Hiring Manager,

I am excited to apply for the role at your esteemed organization, as advertised. With a strong foundation in computer science and extensive experience in software and data engineering, I am confident that my skills and expertise align with the job requirements.

As a Master of Computer Science student at Rice University, I have developed a solid understanding of computer science principles and a passion for innovative technologies. My professional experience as a Software Engineer at Affekta LLC, Software Engineer Intern at American Unit Inc, and Assistant System Engineer at Tata Consultancy Services has equipped me with the skills to design, develop, and deploy scalable software solutions.

Throughout my career, I have demonstrated my ability to work collaboratively in teams, communicate effectively with stakeholders, and adapt to new technologies and challenges. My experience in software engineering has taught me the importance of attention to 