## Build My First Recommendation Engine

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Explanation :- This dataset contains 10 students and their areas of interest. These keywords will help us calculate similarity

In [31]:
 # Create a dataset of students with their interests
data = {
    'Student': [
        'Amit', 'Sneha', 'Ravi', 'Priya', 'Vikram', 
        'Meena', 'Kunal', 'Sara', 'Neha', 'Anuj'
    ],
    'Interests': [
        'Machine Learning, Python, AI',
        'Web Development, HTML, CSS, JavaScript',
        'Blockchain, Cryptocurrency, Security',
        'Data Science, Python, Statistics',
        'Mobile App Development, Android, Kotlin',
        'Cybersecurity, Networking, Ethical Hacking',
        'Deep Learning, AI, Neural Networks',
        'Frontend, ReactJS, CSS, HTML',
        'Cloud Computing, AWS, DevOps',
        'Backend Development, NodeJS, MongoDB'
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Student,Interests
0,Amit,"Machine Learning, Python, AI"
1,Sneha,"Web Development, HTML, CSS, JavaScript"
2,Ravi,"Blockchain, Cryptocurrency, Security"
3,Priya,"Data Science, Python, Statistics"
4,Vikram,"Mobile App Development, Android, Kotlin"
5,Meena,"Cybersecurity, Networking, Ethical Hacking"
6,Kunal,"Deep Learning, AI, Neural Networks"
7,Sara,"Frontend, ReactJS, CSS, HTML"
8,Neha,"Cloud Computing, AWS, DevOps"
9,Anuj,"Backend Development, NodeJS, MongoDB"


 ## TF-IDF Vectorization :- it's convert text into numerical form.

In [32]:
 # Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Transform the interests into TF-IDF matrix
tfidf_matrix = vectorizer.fit_transform(df['Interests'])

# Show the matrix shape
print("TF-IDF Matrix Shape:", tfidf_matrix.shape) ## 35 will be number of unique words

TF-IDF Matrix Shape: (10, 35)


In [61]:
 # Example input interest
user_input = "Python,Machine Learning,Deep Learning"

# Transform the input into TF-IDF vector
user_tfidf = vectorizer.transform([user_input])
user_tfidf

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 4 stored elements and shape (1, 35)>

## Cosine similarity gives a score between 0 and 1(heigher means more smiliar)

In [62]:
 # Compute cosine similarity between input and dataset
similarity_scores = cosine_similarity(user_tfidf, tfidf_matrix)

# Show similarity scores
similarity_scores

array([[0.75124576, 0.        , 0.        , 0.15808715, 0.        ,
        0.        , 0.48952487, 0.        , 0.        , 0.        ]])

## We get the top 3 students with the most simliar interest,based in the cosine score

In [63]:
# Get scores in descending order
top_indices = similarity_scores[0].argsort()[::-1][:3] 
print("Top indices:", top_indices)

# Display top 3 recommendations
print("Top 3 Recommended Students/Courses:")
for i in top_indices:
    score = similarity_scores[0][i]
    print(f"{df.iloc[i]['Student']} ➤ Interests: {df.iloc[i]['Interests']} (Score:{score})") ## df.iloc[i] gives the ith row form the df

Top indices: [0 6 3]
Top 3 Recommended Students/Courses:
Amit ➤ Interests: Machine Learning, Python, AI (Score:0.7512457562087003)
Kunal ➤ Interests: Deep Learning, AI, Neural Networks (Score:0.4895248674965103)
Priya ➤ Interests: Data Science, Python, Statistics (Score:0.15808714974561422)


## By manual matching

In [64]:
# Convert user input into a set of keywords
user_keywords = set([word.strip().lower() for word in user_input.split(',')])
print("User Keywords:", user_keywords)

User Keywords: {'deep learning', 'machine learning', 'python'}


In [66]:
# Create a new column for match score
def calculate_match_score(row):
    interests = set([word.strip().lower() for word in row.split(',')])
    return len(user_keywords & interests)  # Intersection count

# Apply score to each row
df['Match_Score'] = df['Interests'].apply(calculate_match_score)

In [71]:
# Sort by match score descending
df_sorted = df.sort_values(by='Match_Score', ascending=False)

# Show top 3 recommendations
print("Top 3 Recommended Students/Courses  (Manual Matching):")
print(df_sorted[['Student', 'Interests', 'Match_Score']].head(3).to_string(index=False))

Top 3 Recommended Students/Courses  (Manual Matching):
Student                          Interests  Match_Score
   Amit       Machine Learning, Python, AI            2
  Priya   Data Science, Python, Statistics            1
  Kunal Deep Learning, AI, Neural Networks            1
