<a href="https://colab.research.google.com/github/Yuvakrishna782/Resume_Screener/blob/main/AI_project2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install spacy pandas streamlit
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl (400.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.7/400.7 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import spacy
import pandas as pd
import streamlit as st
from collections import Counter
import re

In [None]:
nlp = spacy.load("en_core_web_lg")
JOB_KEYWORDS = {
    "Data Scientist": ["python", "machine learning", "data analysis", "sql", "statistics"],
    "AI Engineer": ["python", "deep learning", "tensorflow", "pytorch", "nlp"],
    "Software Engineer": ["python", "java", "c++", "software development", "algorithms"],
    "DevOps Engineer": ["aws", "docker", "kubernetes", "ci/cd", "terraform"]
}

In [None]:
def extract_skills(text):
    doc = nlp(text.lower())
    skills = []
    for chunk in doc.noun_chunks:
        if len(chunk.text.split()) <= 3:
            skills.append(chunk.text)
    for token in doc:
        if token.pos_ in ["NOUN", "PROPN"] and len(token.text) > 2:
            skills.append(token.text)

    return list(set(skills))

In [None]:
def calculate_score(resume_text, job_title):
    if job_title not in JOB_KEYWORDS:
        return 0
    skills = extract_skills(resume_text)
    keywords = JOB_KEYWORDS[job_title]
    matches = 0
    for skill in skills:
        for kw in keywords:
            if kw in skill or skill in kw:
                matches += 1
                break
    score = min(100, (matches / len(keywords)) * 100)
    return round(score)

In [None]:
def analyze_resumes(df, job_title, top_n):
    results = []

    for _, row in df.iterrows():
        score = calculate_score(row['str_resume'], job_title)
        skills = extract_skills(row['str_resume'])
        matched_keywords = []
        for kw in JOB_KEYWORDS[job_title]:
            for skill in skills:
                if kw in skill or skill in kw:
                    matched_keywords.append(kw)
                    break

        results.append({
            "Name": row['application_name'].replace(".pdf", ""),
            "Score": score,
            "Matched Keywords": ", ".join(list(set(matched_keywords))),
            "Resume": row['str_resume'][:500] + "..."
        })
        results_df = pd.DataFrame(results)
        results_df = results_df.sort_values("Score", ascending=False).head(top_n)

        return results_df

In [None]:
def clean_resume_text(text):
    if not isinstance(text, str):
        return ""
    text = re.sub(r'[^\w\s.,;:!?\-]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [None]:
def main():
    st.title("AI Resume Screener")
    st.write("Upload a CSV file containing resumes to analyze and shortlist candidates.")
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

    if uploaded_file is not None:
        try:
            df = pd.read_csv(uploaded_file)
            df['str_resume'] = df['str_resume'].apply(clean_resume_text)
            job_title = st.selectbox("Select Job Title", list(JOB_KEYWORDS.keys()))
            total_resumes = len(df)
            top_n = st.slider("Number of candidates to shortlist",
                             min_value=1,
                             max_value=total_resumes,
                             value=min(5, total_resumes))

            if st.button("Analyze Resumes"):
                with st.spinner("Analyzing resumes..."):
                    results_df = analyze_resumes(df, job_title, top_n)

                st.success("Analysis Complete!")
                st.write(f"Top {top_n} candidates for {job_title}:")
                for idx, row in results_df.iterrows():
                    with st.expander(f"{row['Name']} (Score: {row['Score']})"):
                        st.write(f"**Matched Keywords:** {row['Matched Keywords']}")
                        st.write("**Resume Excerpt:**")
                        st.write(row['Resume'])
                csv = results_df.to_csv(index=False).encode('utf-8')
                st.download_button(
                    label="Download Results as CSV",
                    data=csv,
                    file_name=f"shortlisted_candidates_{job_title}.csv",
                    mime='text/csv'
                )

        except Exception as e:
            st.error(f"Error processing file: {str(e)}")

if __name__ == "__main__":
    main()

2025-05-30 06:38:00.768 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [None]:
%%writefile resume_screener.py

Writing resume_screener.py


In [None]:
!pip install streamlit pyngrok



In [None]:
%%writefile resume_screener.py
import streamlit as st

st.title("Hello Streamlit")


Overwriting resume_screener.py


In [None]:
%%writefile resume_screener.py
import streamlit as st

st.title(" Resume Screener")
st.write("Streamlit app running in Colab using ngrok tunnel.")


Overwriting resume_screener.py


In [None]:
!pip install -q streamlit pyngrok
!pkill -f ngrok

from pyngrok import ngrok
ngrok.set_auth_token("2xnsoTS9MDZ4WYw56Numc3O3TXZ_2atkkUK99HpLVRv3oqpXW")
ngrok.kill()
public_url = ngrok.connect("http://localhost:8501")
print(" Streamlit app is live at:", public_url)
!streamlit run resume_screener.py &


✅ Streamlit app is live at: NgrokTunnel: "https://ae97-34-125-142-206.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.142.206:8501[0m
[0m
