In [20]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


In [10]:
# Mock data for aspirants
aspirants = pd.DataFrame({
    'aspirant_id': [1, 2, 3, 4, 5],
    'preferred_subject': ['Legal Reasoning', 'English', 'Logical Reasoning', 'GK & Current Affairs', 'English'],
    'target_college': ['NLSIU', 'NLU Delhi', 'NALSAR', 'NLU Jodhpur', 'NLU Bhopal'],
    'prep_level': ['Beginner', 'Intermediate', 'Advanced', 'Intermediate', 'Beginner'],
    'learning_style': ['Visual', 'Reading', 'Video-based', 'Reading', 'Visual']
})

# Expanded Mentor data
mentors = pd.DataFrame({
    'mentor_id': [101, 102, 103, 104, 105, 106],
    'name': ['Aanya', 'Kabir', 'Riya', 'Dev', 'Ishaan', 'Meera'],
    'expert_subject': ['Legal Reasoning', 'English', 'Logical Reasoning', 'GK & Current Affairs', 'English', 'Legal Reasoning'],
    'college': ['NLSIU', 'NLU Delhi', 'NALSAR', 'NLU Jodhpur', 'NLU Bhopal', 'NLU Patna'],
    'teaching_style': ['Visual', 'Reading', 'Video-based', 'Reading', 'Visual', 'Reading']
})

print(" Aspirants:\n")
print(aspirants)

print("\n Mentors:\n")
print(mentors)

 Aspirants:

   aspirant_id     preferred_subject target_college    prep_level  \
0            1       Legal Reasoning          NLSIU      Beginner   
1            2               English      NLU Delhi  Intermediate   
2            3     Logical Reasoning         NALSAR      Advanced   
3            4  GK & Current Affairs    NLU Jodhpur  Intermediate   
4            5               English     NLU Bhopal      Beginner   

  learning_style  
0         Visual  
1        Reading  
2    Video-based  
3        Reading  
4         Visual  

 Mentors:

   mentor_id    name        expert_subject      college teaching_style
0        101   Aanya       Legal Reasoning        NLSIU         Visual
1        102   Kabir               English    NLU Delhi        Reading
2        103    Riya     Logical Reasoning       NALSAR    Video-based
3        104     Dev  GK & Current Affairs  NLU Jodhpur        Reading
4        105  Ishaan               English   NLU Bhopal         Visual
5        106   Meera

In [11]:
aspirants.head(5)

Unnamed: 0,aspirant_id,preferred_subject,target_college,prep_level,learning_style
0,1,Legal Reasoning,NLSIU,Beginner,Visual
1,2,English,NLU Delhi,Intermediate,Reading
2,3,Logical Reasoning,NALSAR,Advanced,Video-based
3,4,GK & Current Affairs,NLU Jodhpur,Intermediate,Reading
4,5,English,NLU Bhopal,Beginner,Visual


In [12]:
mentors.head(6)

Unnamed: 0,mentor_id,name,expert_subject,college,teaching_style
0,101,Aanya,Legal Reasoning,NLSIU,Visual
1,102,Kabir,English,NLU Delhi,Reading
2,103,Riya,Logical Reasoning,NALSAR,Video-based
3,104,Dev,GK & Current Affairs,NLU Jodhpur,Reading
4,105,Ishaan,English,NLU Bhopal,Visual
5,106,Meera,Legal Reasoning,NLU Patna,Reading


In [13]:
aspirants.describe()

Unnamed: 0,aspirant_id
count,5.0
mean,3.0
std,1.581139
min,1.0
25%,2.0
50%,3.0
75%,4.0
max,5.0


In [14]:
mentors.describe()

Unnamed: 0,mentor_id
count,6.0
mean,103.5
std,1.870829
min,101.0
25%,102.25
50%,103.5
75%,104.75
max,106.0


In [19]:
asp = aspirants.copy()
men = mentors.copy()

men = men.rename(columns={'expert_subject': 'preferred_subject','college': 'target_college','teaching_style': 'learning_style'})

asp['type'] = 'aspirant'
men['type'] = 'mentor'

# The combined DataFrame should include the 'type' column
combined = pd.concat([asp, men], ignore_index=True)

# Use the combined DataFrame with the 'type' column for one-hot encoding
encoded = pd.get_dummies(combined[['preferred_subject', 'target_college', 'prep_level', 'learning_style']])  # Only include relevant columns for encoding

# Now you can separate them using the 'type' column
asp_encoded = encoded[combined['type'] == 'aspirant'].reset_index(drop=True)
men_encoded = encoded[combined['type'] == 'mentor'].reset_index(drop=True)

print("✅ Preprocessing done!")
print("\nAspirant features:\n", asp_encoded.head())
print("\nMentor features:\n", men_encoded.head())

✅ Preprocessing done!

Aspirant features:
    preferred_subject_English  preferred_subject_GK & Current Affairs  \
0                      False                                   False   
1                       True                                   False   
2                      False                                   False   
3                      False                                    True   
4                       True                                   False   

   preferred_subject_Legal Reasoning  preferred_subject_Logical Reasoning  \
0                               True                                False   
1                              False                                False   
2                              False                                 True   
3                              False                                False   
4                              False                                False   

   target_college_NALSAR  target_college_NLSIU  target_colleg

In [22]:
# Calculating cosine similarity between each aspirant and each mentor
similarity_matrix = cosine_similarity(asp_encoded, men_encoded)

# Showing top 3 mentor recommendations for each aspirant
for i, scores in enumerate(similarity_matrix):
    top_3 = np.argsort(scores)[-3:][::-1]  # Get indices of top 3 mentors
    print(f"\n Aspirant {aspirants.loc[i, 'aspirant_id']} Recommendations:")
    for rank, idx in enumerate(top_3, 1):
        mentor_name = mentors.loc[idx, 'name']
        sim_score = scores[idx]
        print(f"   {rank}. {mentor_name} (Score: {sim_score:.2f})")



 Aspirant 1 Recommendations:
   1. Aanya (Score: 0.87)
   2. Meera (Score: 0.29)
   3. Ishaan (Score: 0.29)

 Aspirant 2 Recommendations:
   1. Kabir (Score: 0.87)
   2. Meera (Score: 0.29)
   3. Ishaan (Score: 0.29)

 Aspirant 3 Recommendations:
   1. Riya (Score: 0.87)
   2. Meera (Score: 0.00)
   3. Ishaan (Score: 0.00)

 Aspirant 4 Recommendations:
   1. Dev (Score: 0.87)
   2. Meera (Score: 0.29)
   3. Kabir (Score: 0.29)

 Aspirant 5 Recommendations:
   1. Ishaan (Score: 0.87)
   2. Aanya (Score: 0.29)
   3. Kabir (Score: 0.29)
