In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
# file name

books_csv = 'books.csv'
country_sports_csv = 'country_sports.csv'
societies_csv = 'societies.csv'
sports_csv = 'sports.csv'
volunteer_programs_csv = 'volunteer_programs.csv'

In [3]:
# Load datasets

books_df = pd.read_csv(books_csv)
country_sports_df = pd.read_csv(country_sports_csv)
societies_df = pd.read_csv(societies_csv)
sports_df = pd.read_csv(sports_csv)
volunteer_programs_df = pd.read_csv(volunteer_programs_csv)

In [4]:
academic_interests = books_df[['classification_level_1', 'classification_level_2', 'classification_level_3']].stack().reset_index(drop=True).unique().tolist()

In [5]:
books_df['combined'] = (books_df['book_title'].fillna('') + " " + 
                        books_df['book_author'].fillna('') + " " + 
                        books_df['classification_level_1'].fillna('') + " " + 
                        books_df['classification_level_2'].fillna('') + " " + 
                        books_df['classification_level_3'].fillna(''))

societies_df['combined'] = (societies_df['society_name'].fillna('') + " " + 
                            societies_df['society_type'].fillna('') + " " + 
                            societies_df['society_description'].fillna('') + " " + 
                            societies_df['society_keywords'].fillna(''))

sports_df['combined'] = (sports_df['sport_name'].fillna('') + " " + 
                         sports_df['sport_description'].fillna('') + " " + 
                         sports_df['sport_keywords'].fillna(''))

volunteer_programs_df['combined'] = (volunteer_programs_df['program_name'].fillna('') + " " + 
                                     volunteer_programs_df['program_description'].fillna('') + " " + 
                                     volunteer_programs_df['program_keywords'].fillna(''))

In [6]:
tfidf_books = TfidfVectorizer(stop_words='english')
tfidf_matrix_books = tfidf_books.fit_transform(books_df['combined'])
cosine_sim_books = cosine_similarity(tfidf_matrix_books)

tfidf_societies = TfidfVectorizer(stop_words='english')
tfidf_matrix_societies = tfidf_societies.fit_transform(societies_df['combined'])
cosine_sim_societies = cosine_similarity(tfidf_matrix_societies)

tfidf_sports = TfidfVectorizer(stop_words='english')
tfidf_matrix_sports = tfidf_sports.fit_transform(sports_df['combined'])
cosine_sim_sports = cosine_similarity(tfidf_matrix_sports)

tfidf_volunteer_programs = TfidfVectorizer(stop_words='english')
tfidf_matrix_volunteer_programs = tfidf_volunteer_programs.fit_transform(volunteer_programs_df['combined'])
cosine_sim_volunteer_programs = cosine_similarity(tfidf_matrix_volunteer_programs)

In [7]:
def get_book_recommendations(book_title, cosine_sim=cosine_sim_books, num_recommendation=3):
    idx = books_df[books_df['book_title'] == book_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendation+1]
    book_indices = [i[0] for i in sim_scores]
    return books_df.iloc[book_indices][['book_title', 'book_author']]

def get_society_recommendations(society_name, cosine_sim=cosine_sim_societies, num_recommendation=3):
    idx = societies_df[societies_df['society_name'] == society_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendation+1]
    society_indices = [i[0] for i in sim_scores]
    return societies_df.iloc[society_indices][['society_name', 'society_type']]

def get_sport_recommendations(sport_name, cosine_sim=cosine_sim_sports, num_recommendation=3):
    idx = sports_df[sports_df['sport_name'] == sport_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendation+1]
    sport_indices = [i[0] for i in sim_scores]
    return sports_df.iloc[sport_indices][['sport_name', 'sport_description']]

def get_volunteer_program_recommendations(program_name, cosine_sim=cosine_sim_volunteer_programs, num_recommendation=3):
    idx = volunteer_programs_df[volunteer_programs_df['program_name'] == program_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendation+1]
    program_indices = [i[0] for i in sim_scores]
    return volunteer_programs_df.iloc[program_indices][['program_name', 'program_description']]


In [8]:
def ask_user_preferences():
    # Asking user for their country of origin
    print("Please select your country of origin from the following list:")
    for i, country in enumerate(country_sports_df['country'].unique(), 1):
        print(f"{i}. {country}")
    
    country_index = int(input("Enter the number corresponding to your country: ")) - 1
    country = country_sports_df['country'].unique()[country_index]
    
    # Asking user for their preferred sport from the country sports dataset
    print("Please select your preferred sport(s) from the following list:")
    preferred_sports_list = country_sports_df[country_sports_df['country'] == country]['preferred_sport'].values[0].split(', ')
    for i, sport in enumerate(preferred_sports_list, 1):
        print(f"{i}. {sport}")
    
    preferred_sports_indices = input("Enter the numbers of your preferred sports, separated by commas: ")
    preferred_sports_indices = [int(i.strip()) - 1 for i in preferred_sports_indices.split(',')]
    preferred_sports = [preferred_sports_list[i] for i in preferred_sports_indices]
    
    # Creating a list of academic activities based on book classification levels

    academic_activities = books_df[['classification_level_1', 'classification_level_2', 'classification_level_3']].stack().reset_index(drop=True).unique().tolist()
    print("Please select your top 3 academic interests from the following list:")
    for i, activity in enumerate(academic_activities, 1):
        print(f"{i}. {activity}")
    
    academic_indices = input("Enter the numbers of your top 3 academic interests, separated by commas: ")
    academic_indices = [int(i.strip()) - 1 for i in academic_indices.split(',')]
    academic_interests = [academic_activities[i] for i in academic_indices]
    
    # Creating a list of extracurricular activities
    extracurricular_activities = list(societies_df['society_name'].unique()) + list(volunteer_programs_df['program_name'].unique())
    print("Please select your top 3 extracurricular interests from the following list:")
    for i, activity in enumerate(extracurricular_activities, 1):
        print(f"{i}. {activity}")
    
    extracurricular_indices = input("Enter the numbers of your top 3 extracurricular interests, separated by commas: ")
    extracurricular_indices = [int(i.strip()) - 1 for i in extracurricular_indices.split(',')]
    extracurricular_interests = [extracurricular_activities[i] for i in extracurricular_indices]
    
    return country, preferred_sports, academic_interests, extracurricular_interests


In [9]:
def get_recommendations():
    country, preferred_sports, academic_interests, extracurricular_interests = ask_user_preferences()
    
    recommendations = {
        'books': [],
        'societies': [],
        'sports': [],
        'volunteer_programs': []
    }
    
    for sport in preferred_sports:
        recommendations['sports'].extend(get_sport_recommendations(sport)['sport_name'].tolist())
    
    # Get recommendations for books based on academic interests
    for interest in academic_interests:
        recommendations['books'].extend(books_df[books_df['classification_level_1'] == interest]['book_title'].tolist())
    
    # Get recommendations for societies and volunteer programs based on extracurricular interests
    for interest in extracurricular_interests:
        if interest in societies_df['society_name'].unique():
            recommendations['societies'].extend(get_society_recommendations(interest)['society_name'].tolist())
        if interest in volunteer_programs_df['program_name'].unique():
            recommendations['volunteer_programs'].extend(get_volunteer_program_recommendations(interest)['program_name'].tolist())
    
    # Ensure maximum of 3 recommendations for each category
    recommendations['books'] = recommendations['books'][:3]
    recommendations['societies'] = recommendations['societies'][:3]
    recommendations['sports'] = recommendations['sports'][:3]
    recommendations['volunteer_programs'] = recommendations['volunteer_programs'][:3]
    
    return recommendations

# Example usage
recommendations = get_recommendations()
print("Here are some recommendations for you:")


Please select your country of origin from the following list:
1. Afghanistan
2. Albania
3. Algeria
4. American Samoa
5. Andorra
6. Angola
7. Anguilla
8. Antigua and Barbuda
9. Argentina
10. Armenia
11. Australia
12. Austria
13. Azerbaijan
14. Bahamas
15. Bahrain
16. Bangladesh
17. Barbados
18. Belarus
19. Belgium
20. Bhutan
21. Bolivia
22. Bosnia and Herzegovina
23. Botswana
24. Brazil
25. Brunei
26. Bulgaria
27. Burkina Faso
28. Burundi
29. Cambodia
30. Cameroon
31. Canada
32. Cape Verde
33. Central African Republic
34. Chad
35. Chile
36. China
37. Colombia
38. Comoros
39. Costa Rica
40. Croatia
41. Cuba
42. Cyprus
43. Czech Republic
44. Denmark
45. Djibouti
46. Dominican Republic
47. DR Congo
48. Ecuador
49. Egypt
50. El Salvador
51. Equatorial Guinea
52. Eritrea
53. Estonia
54. Eswatini
55. Ethiopia
56. Fiji
57. Finland
58. France
59. French Guiana
60. Gabon
61. Gambia
62. Georgia
63. Germany
64. Ghana
65. Gibraltar
66. Greece
67. Grenada
68. Guadeloupe
69. Guatemala
70. Guinea
71. 

Enter the numbers of your top 3 academic interests, separated by commas: 1,3,9
Please select your top 3 extracurricular interests from the following list:
1. 57-10 Architecture Society
2. Academic and Social Society of the Postgraduate Law School
3. Academic Supply Chain
4. Biomed Society
5. Ceramics Society
6. Christian Union Society
7. Computing Society
8. Contemporary Art Practice
9. Diagnostic Radiography (D-Radz)
10. Drama Society
11. Engineering Society
12. Entrepreneurship and Innovation Society
13. Feminism Society
14. Friends of Medecins Sans Frontieres Society
15. Gaming Society
16. Ice Skating Society
17. Indian Society
18. Islamic Society
19. Jo Taylor
20. Kpop Society
21. La Sociedad
22. Law Society
23. Midwifery Society
24. Motorsport Society
25. Network (ESN)
26. Nigerian Student Society
27. Nursing Society
28. Occupational Therapy
29. Paintball Society
30. Pakistan Society
31. Paramedic Society
32. Pharmacy Society
33. Physiotherapy Society
34. Research Students Associa

In [10]:
print("Books:", recommendations['books'])

Books: ['"1900.6b-2022 - IEEE Standard for Spectrum Sensing Interfaces and Data Structures for Dynamic Spectrum Access and Other Advanced Radio Communication Systems Amendment 2 : Spectrum Database Interfaces" ', '"8802-1BA-2023 - ISO/IEC/IEEE International Standard--Information technology -- Telecommunications and information exchange between systems -- Local and metropolitan area networks -- Specific requirements -- Part 1BA : Audio video bridging (AVB) systems" ', '"A subtle and mysterious machine" : the medical world of Walter Charleton (1619-1707) ']


In [11]:
print("Societies:", recommendations['societies'])

Societies: []


In [12]:
print("Sports:", recommendations['sports'])

Sports: ['Hockey', 'Lacrosse', 'Karate']


In [13]:
print("Volunteer Programs:", recommendations['volunteer_programs'])

Volunteer Programs: ['Asthma and Allergy Foundation', 'International Voluntary Service', 'Macmillan cancer support']
