In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
alumni_data = pd.read_csv('/content/sample_data/alumni_profiles_final.csv')

In [None]:
student_data = pd.read_csv('/content/sample_data/student_profiles_final.csv')

In [None]:
student_data.iloc[100]

Unnamed: 0,100
profile_id,101
name,person 101
sector,Technology & IT
skills,python
interests,
current_industry,


In [None]:
new_student = pd.Series(['101','person 101','Technology & IT','python'],index=['profile_id','name','sector','skills'])

In [None]:
student_data = pd.concat([student_data, new_student.to_frame().T], ignore_index=True)

In [None]:
student = student_data.drop(5, axis=0)

In [None]:
print(student.head())

  profile_id      name                         sector  \
0          1  Person 1        Government & Non-Profit   
1          2  Person 2           Hospitality & Travel   
2          3  Person 3              Finance & Banking   
3          4  Person 4  Agriculture & Food Production   
4          5  Person 5    Engineering & Manufacturing   

                                              skills  \
0  Grant Writing, Humanitarian Aid, Public Policy...   
1  Reservation Management, Hotel Operations, Hosp...   
2  Equity Research, Financial Modeling, Quantitat...   
3            Rural Development, Livestock Management   
4  Product Design, Thermodynamics, Project Schedu...   

                                           interests        current_industry  
0          Public Policy, Environmental Conservation              Non-Profit  
1    Luxury Travel, Cruise Tourism, Cultural Tourism        Event Management  
2  Portfolio Management, Stock Market, Cryptocurr...       Wealth Management  
3  A

In [None]:
selected_student = student_data.iloc[3]

In [None]:
def compute_sector_similarity(student_sector, alumni_sector):
    return 1 if student_sector == alumni_sector else 0

In [None]:
def compute_text_similarity(student_feature, alumni_feature):
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(alumni_feature)
    student_tfidf = tfidf.transform([student_feature])
    return cosine_similarity(student_tfidf, tfidf_matrix).flatten()

In [None]:
print(alumni_data.columns)
print(student_data.columns)

Index(['profile_id', 'name', 'sector', 'skills', 'interests',
       'current_industry'],
      dtype='object')
Index(['profile_id', 'name', 'sector', 'skills', 'interests',
       'current_industry'],
      dtype='object')


In [None]:
sector_similarities = alumni_data['sector'].apply(lambda x: compute_sector_similarity(selected_student['sector'], x))
interest_similarities = compute_text_similarity(selected_student['interests'], alumni_data['interests'])
skill_similarities = compute_text_similarity(selected_student['skills'], alumni_data['skills'])
industry_similarities = compute_text_similarity(selected_student['current_industry'], alumni_data['current_industry'])

In [None]:
final_scores = (0.4 * sector_similarities) + (0.3 * interest_similarities) + (0.2 * industry_similarities) + (0.1 * skill_similarities)


top_indices = final_scores.argsort()[::-1]


recommendations = alumni_data.iloc[top_indices]


top_10_recommendations = recommendations[['name', 'sector', 'skills', 'interests', 'current_industry']].head(10)

In [None]:
print(f"Top 10 Recommended Alumni for {selected_student['name']}:\n", top_10_recommendations)

Top 10 Recommended Alumni for Person 4:
              name                         sector  \
1911  Person 1912  Agriculture & Food Production   
1172  Person 1173  Agriculture & Food Production   
1440  Person 1441  Agriculture & Food Production   
1230  Person 1231  Agriculture & Food Production   
135    Person 136  Agriculture & Food Production   
990    Person 991  Agriculture & Food Production   
901    Person 902  Agriculture & Food Production   
1489  Person 1490  Agriculture & Food Production   
963    Person 964  Agriculture & Food Production   
1790  Person 1791  Agriculture & Food Production   

                                                 skills  \
1911  Agribusiness, Post-Harvest Management, Organic...   
1172  Animal Husbandry, Organic Farming, Farm Manage...   
1440            Dairy Farming, Agricultural Engineering   
1230          Agricultural Engineering, Food Processing   
135   Fisheries Management, Horticulture, Irrigation...   
990   Agricultural Engineering, 

In [None]:
df = pd.read_csv('/content/sample_data/student_profiles_final.csv')
print(df.head(10))

   profile_id       name                         sector  \
0           1   Person 1        Government & Non-Profit   
1           2   Person 2           Hospitality & Travel   
2           3   Person 3              Finance & Banking   
3           4   Person 4  Agriculture & Food Production   
4           5   Person 5    Engineering & Manufacturing   
5           6   Person 6     Real Estate & Construction   
6           7   Person 7          Media & Entertainment   
7           8   Person 8             Energy & Utilities   
8           9   Person 9        Consumer Goods & Retail   
9          10  Person 10  Agriculture & Food Production   

                                              skills  \
0  Grant Writing, Humanitarian Aid, Public Policy...   
1  Reservation Management, Hotel Operations, Hosp...   
2  Equity Research, Financial Modeling, Quantitat...   
3            Rural Development, Livestock Management   
4  Product Design, Thermodynamics, Project Schedu...   
5  Constructio