In [18]:
!pip install gradio nltk scikit-learn sentence-transformers pypdf faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m23.8/23.8 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


### Imports

In [19]:
import pandas as pd
import numpy as np
import gradio as gr
import nltk
import faiss
from collections import Counter

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem.porter import PorterStemmer
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader

nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Load Data

In [20]:
data = pd.read_csv(
    '/content/drive/MyDrive/PROJECTS & WORKS/course_recommendation/Coursera.csv'
)

data = data[
    ['Course Name', 'Difficulty Level', 'Course URL',
     'Course Description', 'Skills']
]

data.fillna('', inplace=True)


In [21]:
data.head()

Unnamed: 0,Course Name,Difficulty Level,Course URL,Course Description,Skills
0,Write A Feature Length Screenplay For Film Or ...,Beginner,https://www.coursera.org/learn/write-a-feature...,Write a Full Length Feature Film Script In th...,Drama Comedy peering screenwriting film D...
1,Business Strategy: Business Model Canvas Analy...,Beginner,https://www.coursera.org/learn/canvas-analysis...,"By the end of this guided project, you will be...",Finance business plan persona (user experien...
2,Silicon Thin Film Solar Cells,Advanced,https://www.coursera.org/learn/silicon-thin-fi...,This course consists of a general presentation...,chemistry physics Solar Energy film lambda...
3,Finance for Managers,Intermediate,https://www.coursera.org/learn/operational-fin...,"When it comes to numbers, there is always more...",accounts receivable dupont analysis analysis...
4,Retrieve Data using Single-Table SQL Queries,Beginner,https://www.coursera.org/learn/single-table-sq...,In this course youÔøΩll learn how to effectively...,Data Analysis select (sql) database manageme...


In [22]:
data.shape

(3522, 5)

In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3522 entries, 0 to 3521
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Course Name         3522 non-null   object
 1   Difficulty Level    3522 non-null   object
 2   Course URL          3522 non-null   object
 3   Course Description  3522 non-null   object
 4   Skills              3522 non-null   object
dtypes: object(5)
memory usage: 137.7+ KB


In [24]:
data.describe()

Unnamed: 0,Course Name,Difficulty Level,Course URL,Course Description,Skills
count,3522,3522,3522,3522,3522
unique,3416,5,3424,3397,3424
top,Google Cloud Platform Fundamentals: Core Infra...,Beginner,https://www.coursera.org/learn/gcp-fundamentals,This course introduces you to important concep...,Google Cloud Platform Big Data Cloud Infrast...
freq,8,1444,8,8,8


In [25]:
data.isnull().sum()

Unnamed: 0,0
Course Name,0
Difficulty Level,0
Course URL,0
Course Description,0
Skills,0


### Cleaning & Tags

In [26]:
def clean_text(text):
    return (
        text.lower()
            .replace(' ', ',')
            .replace(':', '')
            .replace('(', '')
            .replace(')', '')
    )

for col in ['Course Name', 'Course Description', 'Skills']:
    data[col] = data[col].apply(clean_text)

data['tags'] = (
    data['Course Name']
    + data['Difficulty Level']
    + data['Course Description']
    + data['Skills']
)


### Final Dataframe

In [27]:
df = data[['Course Name', 'Course URL', 'Difficulty Level', 'tags']]
df['Course Name'] = df['Course Name'].str.replace(',', ' ')
df.rename(columns={
    'Course Name': 'course_name',
    'Course URL': 'course_url',
    'Difficulty Level': 'difficulty'
}, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Course Name'] = df['Course Name'].str.replace(',', ' ')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={


### Content Engine

In [13]:
ps = PorterStemmer()

df['tags'] = df['tags'].apply(
    lambda x: " ".join(ps.stem(w) for w in x.split())
)

cv = CountVectorizer(max_features=8000, stop_words='english')
content_vectors = cv.fit_transform(df['tags']).toarray()
content_sim = cosine_similarity(content_vectors)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(


### Semantic Embeddings (Core Represntation: BERT)

In [28]:
bert = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = bert.encode(df['tags'], show_progress_bar=True)

# Normalize for FAISS cosine similarity
faiss.normalize_L2(embeddings)

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Batches:   0%|          | 0/111 [00:00<?, ?it/s]

### Faiss Index (Scalable Search)

In [29]:
dim = embeddings.shape[1]
faiss_index = faiss.IndexFlatIP(dim)   # Inner Product = cosine (normalized)
faiss_index.add(embeddings)


### Resume Skill Extraction

In [30]:
SKILLS = set("""
python java c++ sql machine learning deep learning ai data science nlp
computer vision tensorflow pytorch pandas numpy statistics cloud aws
azure gcp docker kubernetes finance marketing
""".split())

def read_resume(file):
    if file is None:
        return ""
    if file.name.endswith(".pdf"):
        reader = PdfReader(file)
        return " ".join(page.extract_text() or "" for page in reader.pages)
    return file.read().decode("utf-8")

def extract_skills(text):
    text = text.lower()
    return ", ".join(sorted({s for s in SKILLS if s in text}))


### User Feedback Memory (Real System)

In [31]:
user_feedback = []   # in-memory (interview-safe explanation)

def store_feedback(course_name, liked=True):
    user_feedback.append({
        "course": course_name,
        "liked": liked
    })


### Final Recommender (FAISS + Explainable)

In [32]:
def ultimate_recommend(resume_file, difficulty, top_n):
    resume_text = read_resume(resume_file)
    skills = extract_skills(resume_text)

    if not skills:
        return "‚ùå No skills detected from resume."

    query_emb = bert.encode([skills])
    faiss.normalize_L2(query_emb)

    scores, indices = faiss_index.search(query_emb, top_n * 3)

    results = df.iloc[indices[0]].copy()

    if difficulty != "All":
        results = results[results['difficulty'] == difficulty]

    results = results.head(top_n)

    output = f"## üìÑ Resume-Based Recommendations\n\n"
    output += f"**Extracted skills:** {skills}\n\n"

    for _, row in results.iterrows():
        output += (
            f"### [{row.course_name}]({row.course_url})\n"
            f"- Difficulty: **{row.difficulty}**\n"
            f"- Why: semantic match with resume skills\n\n"
        )

    return output


### Learning Path (FAISS-powered)

In [36]:
def ultimate_learning_path(resume_file):
    resume_text = read_resume(resume_file)
    skills = extract_skills(resume_text)

    if not skills:
        return "‚ùå No skills detected."

    query_emb = bert.encode([skills])
    faiss.normalize_L2(query_emb)

    scores, indices = faiss_index.search(query_emb, 30)
    temp = df.iloc[indices[0]].copy()

    output = f"# üß≠ Learning Path\n\n**Skills:** {skills}\n\n"

    for level in ['Beginner', 'Intermediate', 'Advanced']:
        subset = temp[temp['difficulty'] == level].head(2)
        if not subset.empty:
            output += f"## {level}\n"
            for _, row in subset.iterrows():
                output += f"- [{row.course_name}]({row.course_url})\n"
            output += "\n"

    return output


### Gradio (UI)

In [38]:
def analytics():
    if not user_feedback:
        return "No user feedback collected yet."

    counts = Counter([f["course"] for f in user_feedback if f["liked"]])
    output = "## üìä User Interaction Analytics\n\n"

    for course, cnt in counts.most_common(5):
        output += f"- {course}: {cnt} likes\n"

    return output

with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# üß† Ultimate Resume-Aware Course Recommendation System")
    gr.Markdown(
        "Semantic ‚Ä¢ Scalable ‚Ä¢ Explainable ‚Ä¢ Feedback-Driven ‚Ä¢ Built in Colab"
    )

    with gr.Tab("üìÑ Resume ‚Üí Courses"):
        resume = gr.File(label="Upload Resume (PDF or TXT)")
        diff = gr.Dropdown(
            ["All"] + sorted(df['difficulty'].unique()),
            value="All"
        )
        topn = gr.Slider(3, 10, value=5)
        out = gr.Markdown()

        gr.Button("üöÄ Recommend").click(
            ultimate_recommend,
            inputs=[resume, diff, topn],
            outputs=out
        )

    with gr.Tab("üß≠ Learning Path"):
        resume_lp = gr.File(label="Upload Resume")
        lp_out = gr.Markdown()

        gr.Button("üìö Generate Path").click(
            ultimate_learning_path,
            inputs=resume_lp,
            outputs=lp_out
        )

    with gr.Tab("üìä Analytics"):
        analytics_out = gr.Markdown()
        gr.Button("üìà View Analytics").click(
            analytics,
            outputs=analytics_out
        )


  with gr.Blocks(theme=gr.themes.Soft()) as app:


In [39]:
app.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fb29c515e494c6a044.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


