In [1]:
%%writefile app.py
import time
import numpy as np
import pandas as pd
import streamlit as st
from streamlit_option_menu import option_menu
from streamlit_extras.add_vertical_space import add_vertical_space
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import warnings
warnings.filterwarnings('ignore')

def streamlit_config():
    # page configuration
    st.set_page_config(page_title='Smart Resume Parser', layout="wide")

    page_background_color = """
    <style>
    body {
        background-color: #20B2AA;
    }
    [data-testid="stHeader"] {
        background: rgba(0,0,0,0);
    }
    </style>
    """
    
    st.markdown(page_background_color, unsafe_allow_html=True)

    # title and position
    st.markdown(f'<h1 style="text-align: center;">Smart Resume Parser</h1>',
                unsafe_allow_html=True)

    # Display the image from URL
    st.image("https://www.recruitbpm.com/blog/wp-content/uploads/2023/04/Resume-Parser-Software.png", caption="", use_container_width=True)
    
class resume_analyzer:

    @staticmethod
    def pdf_to_chunks(pdf):
        # Read the PDF and return text chunks
        pdf_reader = PdfReader(pdf)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()

        # Split the long text into small chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=200, length_function=len)
        chunks = text_splitter.split_text(text=text)
        return chunks

    @staticmethod
    def openai(openai_api_key, chunks, analyze):
        # OpenAI service for embedding
        embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
        vectorstores = FAISS.from_texts(chunks, embedding=embeddings)

        # Search for the most similar chunks based on the query
        docs = vectorstores.similarity_search(query=analyze, k=3)

        # Create an OpenAI object, using ChatGPT 3.5 Turbo model
        llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)
        chain = load_qa_chain(llm=llm, chain_type='stuff')

        # Run the question-answering pipeline
        response = chain.run(input_documents=docs, question=analyze)
        return response

    @staticmethod
    def summary_prompt(query_with_chunks):
        return f''' need to detailed summarization of below resume and finally conclude them

                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    {query_with_chunks}
                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    '''
    @staticmethod
    def summarize_skills_query(summary):
        return f'''Identify and list the key skills mentioned in the following resume summary:

                """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                {summary}
                """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
            '''
    @staticmethod  
    def recommend_skills_query(summary):
        return f"Based on the resume, identify skills the candidate possesses and recommend additional skills for career growth:\n{summary}"

    @staticmethod
    def recommend_courses_query(summary):
        return f"Suggest and courses or certifications to enhance the skills mentioned in the resume and improve the candidate's career prospects and list them:\n{summary}"

    @staticmethod
    def resume_tips_query(summary):
        return f"List professional tips to improve this resume for better presentation and relevance in simple points:\n{summary}"

    @staticmethod
    def resume_summary(pdf, openai_api_key):
        pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
        summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks)
        summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary_prompt)
        return summary

    @staticmethod
    def resume_strength(pdf, openai_api_key):
        pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
        summary = resume_analyzer.resume_summary(pdf, openai_api_key)
        strength_prompt = f'''need to detailed analysis and explain of the strength of below resume and finally conclude them

                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    {summary}
                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    '''
        strength = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=strength_prompt)
        return strength

    @staticmethod
    def resume_weakness(pdf, openai_api_key):
        pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
        summary = resume_analyzer.resume_summary(pdf, openai_api_key)
        weakness_prompt = f'''need to detailed analysis and explain of the weakness of below resume and how to improve make a better resume.

                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    {summary}
                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    '''
        weakness = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=weakness_prompt)
        return weakness

    @staticmethod
    def job_title_suggestion(pdf, openai_api_key):
        pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
        summary = resume_analyzer.resume_summary(pdf, openai_api_key)
        job_title_prompt = f'''what are the job roles i can apply to based on the below resume?

                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    {summary}
                    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                    '''
        job_title = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=job_title_prompt)
        return job_title

    @staticmethod
    def skills_and_courses(pdf, openai_api_key):
        summary = resume_analyzer.resume_summary(pdf, openai_api_key)
        skills_query = resume_analyzer.summarize_skills_query(summary)
        skills_result = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=[summary], analyze=skills_query)

        recommended_skills_query = resume_analyzer.recommend_skills_query(summary)
        recommended_skills_result = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=[summary], analyze=recommended_skills_query)
        
        courses_query = resume_analyzer.recommend_courses_query(summary)
        courses = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=[summary], analyze=courses_query)
        
        return skills_result, recommended_skills_result, courses

           

    @staticmethod
    def resume_tips(pdf, openai_api_key):
        summary = resume_analyzer.resume_summary(pdf, openai_api_key)
        tips_query = resume_analyzer.resume_tips_query(summary)
        tips = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=[summary], analyze=tips_query)
        return tips


class linkedin_scraper:

    def webdriver_setup():
            
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')

        driver = webdriver.Chrome(options=options)
        driver.maximize_window()
        return driver


    def get_userinput():

        add_vertical_space(2)
        with st.form(key='linkedin_scarp'):

            add_vertical_space(1)
            col1,col2,col3 = st.columns([0.5,0.3,0.2], gap='medium')
            with col1:
                job_title_input = st.text_input(label='Job Title')
                job_title_input = job_title_input.split(',')
            with col2:
                job_location = st.text_input(label='Job Location', value='United States')
            with col3:
                job_count = st.number_input(label='Job Count', min_value=1, value=1, step=1)

            # Submit Button
            add_vertical_space(1)
            submit = st.form_submit_button(label='Submit')
            add_vertical_space(1)
        
        return job_title_input, job_location, job_count, submit


    def build_url(job_title, job_location):

        b = []
        for i in job_title:
            x = i.split()
            y = '%20'.join(x)
            b.append(y)

        job_title = '%2C%20'.join(b)
        link = f"https://www.linkedin.com/jobs/search/?currentJobId=4061304622&geoId=103644278&keywords={job_title}&origin=JOB_SEARCH_PAGE_SEARCH_BUTTON&refresh=true"

        return link
    

    def open_link(driver, link):

        while True:
            # Break the Loop if the Element is Found, Indicating the Page Loaded Correctly
            try:
                driver.get(link)
                driver.implicitly_wait(5)
                time.sleep(3)
                driver.find_element(by=By.CSS_SELECTOR, value='span.switcher-tabs__placeholder-text.m-auto')
                return
            
            # Retry Loading the Page
            except NoSuchElementException:
                continue


    def link_open_scrolldown(driver, link, job_count):
        
        # Open the Link in LinkedIn
        linkedin_scraper.open_link(driver, link)

        # Scroll Down the Page
        for i in range(0,job_count):

            # Simulate clicking the Page Up button
            body = driver.find_element(by=By.TAG_NAME, value='body')
            body.send_keys(Keys.PAGE_UP)

            # Locate the sign-in modal dialog 
            try:
                driver.find_element(by=By.CSS_SELECTOR, 
                                value="button[data-tracking-control-name='public_jobs_contextual-sign-in-modal_modal_dismiss']>icon>svg").click()
            except:
                pass

            # Scoll down the Page to End
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            driver.implicitly_wait(2)

            # Click on See More Jobs Button if Present
            try:
                x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
                driver.implicitly_wait(5)
            except:
                pass


    def job_title_filter(scrap_job_title, user_job_title_input):
        
        # User Job Title Convert into Lower Case
        user_input = [i.lower().strip() for i in user_job_title_input]

        # scraped Job Title Convert into Lower Case
        scrap_title = [i.lower().strip() for i in [scrap_job_title]]

        # Verify Any User Job Title in the scraped Job Title
        confirmation_count = 0
        for i in user_input:
            if all(j in scrap_title[0] for j in i.split()):
                confirmation_count += 1

        # Return Job Title if confirmation_count greater than 0 else return NaN
        if confirmation_count > 0:
            return scrap_job_title
        else:
            return np.nan


    def scrap_company_data(driver, job_title_input, job_location):

        # scraping the Company Data
        company = driver.find_elements(by=By.CSS_SELECTOR, value='h4[class="base-search-card__subtitle"]')
        company_name = [i.text for i in company]

        location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')
        company_location = [i.text for i in location]

        title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')
        job_title = [i.text for i in title]

        url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')
        website_url = [i.get_attribute('href') for i in url]

        # combine the all data to single dataframe
        df = pd.DataFrame(company_name, columns=['Company Name'])
        df['Job Title'] = pd.DataFrame(job_title)
        df['Location'] = pd.DataFrame(company_location)
        df['Website URL'] = pd.DataFrame(website_url)

        # Return Job Title if there are more than 1 matched word else return NaN
        df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scraper.job_title_filter(x, job_title_input))

        # Return Location if User Job Location in Scraped Location else return NaN
        df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan)
        
        # Drop Null Values and Reset Index
        df = df.dropna()
        df.reset_index(drop=True, inplace=True)

        return df 
        

    def scrap_job_description(driver, df, job_count):
        
        # Get URL into List
        website_url = df['Website URL'].tolist()
        
        # Scrap the Job Description
        job_description = []
        description_count = 0

        for i in range(0, len(website_url)):
            try:
                # Open the Link in LinkedIn
                linkedin_scraper.open_link(driver, website_url[i])

                # Click on Show More Button
                driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
                driver.implicitly_wait(5)
                time.sleep(1)

                # Get Job Description
                description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]')
                data = [i.text for i in description][0]
                
                # Check Description length and Duplicate
                if len(data.strip()) > 0 and data not in job_description:
                    job_description.append(data)
                    description_count += 1
                else:
                    job_description.append('Description Not Available')
            
            # If any unexpected issue 
            except:
                job_description.append('Description Not Available')
            
            # Check Description Count reach User Job Count
            if description_count == job_count:
                break

        # Filter the Job Description
        df = df.iloc[:len(job_description), :]

        # Add Job Description in Dataframe
        df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
        df['Job Description'] = df['Job Description'].apply(lambda x: np.nan if x=='Description Not Available' else x)
        df = df.dropna()
        df.reset_index(drop=True, inplace=True)
        return df


    def display_data_userinterface(df_final):

        # Display the Data in User Interface
        add_vertical_space(1)
        if len(df_final) > 0:
            for i in range(0, len(df_final)):
                
                st.markdown(f'<h3 style="color: orange;">Job Posting Details : {i+1}</h3>', unsafe_allow_html=True)
                st.write(f"Company Name : {df_final.iloc[i,0]}")
                st.write(f"Job Title    : {df_final.iloc[i,1]}")
                st.write(f"Location     : {df_final.iloc[i,2]}")
                st.write(f"Website URL  : {df_final.iloc[i,3]}")

                with st.expander(label='Job Desription'):
                    st.write(df_final.iloc[i, 4])
                add_vertical_space(3)
        
        else:
            st.markdown(f'<h5 style="text-align: center;color: orange;">No Matching Jobs Found</h5>', 
                                unsafe_allow_html=True)


    def main():
        
        # Initially set driver to None
        driver = None
        
        try:
            job_title_input, job_location, job_count, submit = linkedin_scraper.get_userinput()
            add_vertical_space(2)
            
            if submit:
                if job_title_input != [] and job_location != '':
                    
                    with st.spinner('Chrome Webdriver Setup Initializing...'):
                        driver = linkedin_scraper.webdriver_setup()
                                       
                    with st.spinner('Loading More Job Listings...'):

                        # build URL based on User Job Title Input
                        link = linkedin_scraper.build_url(job_title_input, job_location)

                        # Open the Link in LinkedIn and Scroll Down the Page
                        linkedin_scraper.link_open_scrolldown(driver, link, job_count)

                    with st.spinner('scraping Job Details...'):

                        # Scraping the Company Name, Location, Job Title and URL Data
                        df = linkedin_scraper.scrap_company_data(driver, job_title_input, job_location)

                        # Scraping the Job Descriptin Data
                        df_final = linkedin_scraper. scrap_job_description(driver, df, job_count)
                    
                    # Display the Data in User Interface
                    linkedin_scraper.display_data_userinterface(df_final)

                
                # If User Click Submit Button and Job Title is Empty
                elif job_title_input == []:
                    st.markdown(f'<h5 style="text-align: center;color: orange;">Job Title is Empty</h5>', 
                                unsafe_allow_html=True)
                
                elif job_location == '':
                    st.markdown(f'<h5 style="text-align: center;color: orange;">Job Location is Empty</h5>', 
                                unsafe_allow_html=True)

        except Exception as e:
            add_vertical_space(2)
            st.markdown(f'<h5 style="text-align: center;color: orange;">{e}</h5>', unsafe_allow_html=True)
        
        finally:
            if driver:
                driver.quit()


# Streamlit Configuration Setup
streamlit_config()
add_vertical_space(2)

# Sidebar for navigation
with st.sidebar:
    add_vertical_space(4)
    option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Skills & Courses', 'Resume Tips', 'LinkedIn Jobs'],
                         icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'tools', 'clipboard-check','linkedin'])

    # User Upload the Resume
    add_vertical_space(1)
    pdf = st.file_uploader(label='Upload Your Resume', type='pdf')
    add_vertical_space(1)

    # Enter OpenAI API Key
    col1, col2 = st.columns([0.6, 0.4])
    with col1:
        openai_api_key = st.text_input(label='Enter OpenAI API Key', type='password')
    add_vertical_space(2)

    # Click on Submit Button
    submit = st.button(label='Submit')
    add_vertical_space(1)
    

if pdf is not None and openai_api_key != '':
        try:
            with st.spinner('Processing...'):

                # Process all sections after upload
                if option == 'Summary':
                    summary = resume_analyzer.resume_summary(pdf, openai_api_key)
                    st.markdown(f'<h4 style="color: orange;">Summary:</h4>', unsafe_allow_html=True)
                    st.write(summary)

                elif option == 'Strength':
                    strength = resume_analyzer.resume_strength(pdf, openai_api_key)
                    st.markdown(f'<h4 style="color: orange;">Strength:</h4>', unsafe_allow_html=True)
                    st.write(strength)

                elif option == 'Weakness':
                    weakness = resume_analyzer.resume_weakness(pdf, openai_api_key)
                    st.markdown(f'<h4 style="color: orange;">Weakness and Suggestions:</h4>', unsafe_allow_html=True)
                    st.write(weakness)

                elif option == 'Job Titles':
                    job_titles = resume_analyzer.job_title_suggestion(pdf, openai_api_key)
                    st.markdown(f'<h4 style="color: orange;">Job Titles:</h4>', unsafe_allow_html=True)
                    st.write(job_titles)

                elif option == 'Skills & Courses':

                    skills_result, recommended_skills_result, courses = resume_analyzer.skills_and_courses(pdf, openai_api_key)
                    
                    # Display skills you have
                    st.markdown(f'<h4 style="color: orange;">Skills You Have:</h4>', unsafe_allow_html=True)
                    st.write(skills_result)

                    # Display recommended skills
                    st.markdown(f'<h4 style="color: orange;">Recommended Skills:</h4>', unsafe_allow_html=True)
                    st.write(recommended_skills_result)

                    #Display courses recommended
                    st.markdown(f'<h4 style="color: orange;">Recommended Courses:</h4>', unsafe_allow_html=True)
                    st.write(courses)
                    
                elif option == 'Resume Tips':
                    tips = resume_analyzer.resume_tips(pdf, openai_api_key)
                    st.markdown(f'<h4 style="color: orange;">Resume Tips:</h4>', unsafe_allow_html=True)
                    st.write(tips)

                elif option == 'LinkedIn Jobs':
                    linkedin_jobs = linkedin_scraper.main()
                    st.markdown(f'<h4 style="color: orange;">LinkedIn Jobs:</h4>', unsafe_allow_html=True)
                    st.write(linkedin_jobs)
                    
                    
        except Exception as e:
            st.error(f"Error: {str(e)}")

elif pdf is None:
        st.markdown(f'<h5 style="text-align: center;color: orange;">Please Upload Your Resume</h5>', unsafe_allow_html=True)

elif openai_api_key == '':
        st.markdown(f'<h5 style="text-align: center;color: orange;">Please Enter OpenAI API Key</h5>', unsafe_allow_html=True)


Overwriting app.py


In [None]:
!streamlit run app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://10.0.0.43:8502[0m
[0m

>> from langchain.embeddings import OpenAIEmbeddings

with new imports of:

>> from langchain_community.embeddings import OpenAIEmbeddings
You can use the langchain cli to **automatically** upgrade many imports. Please see documentation here <https://python.langchain.com/docs/versions/v0_2/>
  from langchain.embeddings.openai import OpenAIEmbeddings

>> from langchain.vectorstores import FAISS

with new imports of:

>> from langchain_community.vectorstores import FAISS
You can use the langchain cli to **automatically** upgrade many imports. Please see documentation here <https://python.langchain.com/docs/versions/v0_2/>
  from langchain.vectorstores import FAISS

`from langchain_community.chat_models import ChatOpenAI`.

To install langchain-community run `pip install -U langchain-community`.

`from