# 12. Testing ways to generate module recommendations for students

In [None]:
import pandas as pd

from google.colab import drive

# Mounting Google Drive
drive.mount('/content/drive', force_remount=True) # Adding force_remount=True to force the remounting process
df_reviews = pd.read_csv("/content/drive/MyDrive/final_processed_primary_secondary_emotion.csv")

Mounted at /content/drive


# Idea behind Module Recommendation for Users
* users submit preferences based on questions:
    * how do you want to feel at the end of the module?
        * maps to modules that match emotions most recently
    * What apects of the module do you care about?
        * user selects topics:
          * {'Exams', 'Workload', 'Assignments', 'Lectures', 'Grading', 'Course Engagement and Course Feelings(Sentiment)', 'Course Structure', 'Utility and Usefulness', 'Learning Outcomes', 'Course Material and Understanding', 'Instructor', 'Course Material'}
        * user is recommended 3 top modules with the most positive emotions and sentiment recently/consistent for most recent 6 months for example (identify by the number that the emotion is mapped to)
    * What are you interested in learning about?
      * users can select the topics that are are interested in learning about based on how modules have been categorised:
        1. "Programming": ["Python", "Programming", "Object-Oriented", "C++", "Code"],
        2. "Machine Learning": ["Machine Learning", "Data Science", "Algorithm", "Big Data"],
        3. "Cybersecurity": ["Security", "Information Security"],
        4. "Data Analysis": ["Data", "Visualization", "Statistics", "SQL", "Analytics"],
        5. "Quantum Computing": ["Quantum"],
        6. "Business": ["Business", "Management", "Lean Six Sigma", "Models"],
        7. "Other": []  # Default category for uncategorized entries

In [None]:
# Flatten the lists and count unique elements
unique_items = set(item for sublist in merge_df["topic_summary"] for item in sublist)
print(f"Number of unique items: {len(unique_items)}")
print(unique_items)


Number of unique items: 12
{'Exams', 'Workload', 'Assignments', 'Lectures', 'Grading', 'Course Engagement and Course Feelings(Sentiment)', 'Course Structure', 'Utility and Usefulness', 'Learning Outcomes', 'Course Material and Understanding', 'Instructor', 'Course Material'}


In [None]:
df_reviews["name"].unique()

array(['Machine Learning',
       'Programming for Everybody (Getting Started with Python)',
       'Programming Languages, Part A', 'The Data Scientist’s Toolbox',
       'Using Databases with Python', 'Using Python to Access Web Data',
       'Introduction to Data Science in Python', 'Python Basics',
       'Algorithmic Toolbox',
       'Information Security: Context and Introduction',
       'SQL for Data Science',
       'Fundamentals of Visualization with Tableau',
       'Computational Thinking for Problem Solving',
       'Programming Fundamentals',
       'Applied Plotting, Charting & Data Representation in Python',
       'Introduction to Big Data', 'Applied Machine Learning in Python',
       'Capstone: Retrieving, Processing, and Visualizing Data with Python',
       'Python Functions, Files, and Dictionaries',
       'Object-Oriented Data Structures in C++',
       'Python and Statistics for Financial Analysis',
       'Data Analytics for Lean Six Sigma',
       'Python Pro

# Script to demonstrate module recommendation

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

# Sample DataFrame (replace with your actual dataset)
modules = pd.DataFrame({
    "reviews": [
        "Fantastic introduction to machine learning concepts.",
        "Very engaging Python course, helped me get started with programming.",
        "Covers the basics of cybersecurity, but the assignments are tough.",
        "Loved the visualization techniques taught in this course.",
        "The instructor was great, but the workload was intense.",
        "Quantum computing explained very clearly.",
        "Useful for learning about business strategies and digital transformation.",
        "The SQL lessons were detailed and easy to follow.",
        "Good introduction to algorithms, though it requires strong math skills.",
        "Great for learning about data analysis and visualization."
    ],
    "date_reviews": [
        "2023-11-01", "2023-10-15", "2023-09-20", "2023-08-05", "2023-07-10",
        "2023-06-25", "2023-05-30", "2023-04-18", "2023-03-12", "2023-02-05"
    ],
    "rating": [4.8, 4.5, 4.0, 4.9, 3.8, 4.2, 4.7, 4.6, 4.3, 4.9],
    "course_id": [
        "machine-learning", "python-basics", "cybersecurity-basics",
        "data-visualization", "intense-workload", "quantum-computing",
        "business-strategies", "sql-course", "algorithmic-toolbox", "data-analysis"
    ],
    "month": [
        "2023-11", "2023-10", "2023-09", "2023-08", "2023-07",
        "2023-06", "2023-05", "2023-04", "2023-03", "2023-02"
    ],
    "departments": [
        "Computer Science", "Computer Science", "Cybersecurity",
        "Data Analysis", "General Studies", "Quantum Computing",
        "Business", "Data Analysis", "Computer Science", "Data Analysis"
    ],
    "name": [
        "Machine Learning", "Programming for Everybody", "Introduction to Cybersecurity",
        "Visualization Techniques", "High-Intensity Course", "Quantum Computing Basics",
        "Digital Business Models", "SQL for Data Science", "Algorithmic Toolbox", "Data Analytics for Beginners"
    ],
    "institution": [
        "Stanford University", "University of Michigan", "MIT",
        "University of Washington", "Harvard University", "Caltech",
        "INSEAD", "UC Davis", "University of Illinois", "London School of Economics"
    ],
    "sentiment_from_rating": [
        "positive", "positive", "neutral", "positive", "neutral",
        "positive", "positive", "positive", "neutral", "positive"
    ],
    "emotion_label": [
        "joy", "joy", "fear", "joy", "sadness",
        "joy", "joy", "joy", "fear", "joy"
    ],
    "sentiment_label_from_review": [
        "positive", "positive", "neutral", "positive", "neutral",
        "positive", "positive", "positive", "neutral", "positive"
    ],
    "BART_Topic": [
        ["Course Material", "Instructor"], ["Learning Outcomes"], ["Assignments"],
        ["Visualization", "Learning Outcomes"], ["Workload"],
        ["Course Structure"], ["Business Strategies", "Course Engagement"],
        ["Course Material"], ["Algorithms", "Exams"], ["Data Analysis"]
    ],
    "LDA_topic_3": [
        "Learning Outcomes", "Learning Outcomes", "Assignments",
        "Learning Outcomes", "Workload", "Course Structure",
        "Business Strategies", "Learning Outcomes", "Algorithms", "Data Analysis"
    ],
    "LDA_topic_4": [
        "Learning Outcomes", "Course Material", "Assignments",
        "Visualization", "Workload", "Course Structure",
        "Business Strategies", "Learning Outcomes", "Algorithms", "Visualization"
    ],
    "topic_summary": [
        ["Learning Outcomes", "Course Material", "Instructor"],
        ["Learning Outcomes"], ["Assignments", "Course Material"],
        ["Visualization", "Learning Outcomes"], ["Workload", "Instructor"],
        ["Course Structure"], ["Business Strategies", "Course Engagement"],
        ["Learning Outcomes", "Course Material"], ["Algorithms", "Exams"],
        ["Data Analysis", "Visualization"]
    ],
    "primary_emotion_label": [
        "joy", "joy", "fear", "joy", "sadness",
        "joy", "joy", "joy", "fear", "joy"
    ]
})

# Convert date_reviews to datetime for time-series filtering
modules["date_reviews"] = pd.to_datetime(modules["date_reviews"])

# User Preferences
def get_user_preferences():
    print("Welcome to the Module Recommendation System!")

    # Q1: How do you want to feel at the end of the module?
    emotion_options = modules["primary_emotion_label"].unique()
    print("\n1. How do you want to feel at the end of the module?")
    for i, emotion in enumerate(emotion_options, 1):
        print(f"{i}. {emotion}")
    selected_emotion = input("Select an emotion (e.g., 1 for joy): ")
    selected_emotion = emotion_options[int(selected_emotion) - 1]

    # Q2: What aspects of the module do you care about?
    aspects = [
        "Exams", "Workload", "Assignments", "Lectures", "Grading",
        "Course Engagement and Course Feelings (Sentiment)", "Course Structure",
        "Utility and Usefulness", "Learning Outcomes",
        "Course Material and Understanding", "Instructor", "Course Material"
    ]
    print("\n2. What aspects of the module do you care about? (Select multiple by separating with commas)")
    for i, aspect in enumerate(aspects, 1):
        print(f"{i}. {aspect}")
    selected_aspects = input("Enter the numbers of the aspects (e.g., 1,3,5): ").split(",")
    selected_aspects = [aspects[int(num.strip()) - 1] for num in selected_aspects]

    # Q3: What are you interested in learning about?
    categories = {
        "Programming": ["Python", "Programming", "Object-Oriented", "C++", "Code"],
        "Machine Learning": ["Machine Learning", "Data Science", "Algorithm", "Big Data"],
        "Cybersecurity": ["Security", "Information Security"],
        "Data Analysis": ["Data", "Visualization", "Statistics", "SQL", "Analytics"],
        "Quantum Computing": ["Quantum"],
        "Business": ["Business", "Management", "Lean Six Sigma", "Models"],
        "Other": []  # Default category
    }
    print("\n3. What are you interested in learning about?")
    for i, category in enumerate(categories.keys(), 1):
        print(f"{i}. {category}")
    selected_category = input("Select a category (e.g., 1 for Programming): ")
    selected_category = list(categories.keys())[int(selected_category) - 1]

    return selected_emotion, selected_aspects, categories[selected_category]

# Filter and Score Modules Based on Preferences
def recommend_modules(selected_emotion, selected_aspects, selected_category):
    # Filter by emotion
    modules["emotion_match"] = modules["primary_emotion_label"].apply(lambda x: 1 if x == selected_emotion else 0)

    # Filter by time (most recent 6 months)
    six_months_ago = datetime.now() - pd.DateOffset(months=6)
    modules["recent"] = modules["date_reviews"].apply(lambda x: 1 if x >= six_months_ago else 0)

    # Filter by aspects
    modules["aspect_match"] = modules["topic_summary"].apply(
        lambda topics: len(set(topics).intersection(set(selected_aspects)))
    )

    # Filter by category
    modules["category_match"] = modules["name"].apply(
        lambda name: any(keyword in name for keyword in selected_category)
    )

    # Combine scores to rank modules
    modules["score"] = (
        modules["emotion_match"] * 2 +   # Weight: Emotion match is prioritized
        modules["recent"] * 1 +         # Weight: Recent modules have medium importance
        modules["aspect_match"] * 3 +   # Weight: Aspects alignment has higher weight
        modules["category_match"] * 2   # Weight: Category match is prioritized
    )

    # Filter and sort modules by score
    top_modules = modules[modules["score"] > 0].sort_values(by="score", ascending=False).head(3)

    return top_modules

# Main Function to Run the Recommendation System, by priority
def main():
    selected_emotion, selected_aspects, selected_category = get_user_preferences()
    recommendations = recommend_modules(selected_emotion, selected_aspects, selected_category)

    if recommendations.empty:
        print("\nNo modules found matching your preferences. Try broadening your criteria.")
    else:
        print("\nRecommended Modules:")
        print(recommendations[["name", "institution", "rating", "primary_emotion_label", "score"]])

# Run the system
if __name__ == "__main__":
    main()


Welcome to the Module Recommendation System!

1. How do you want to feel at the end of the module?
1. joy
2. fear
3. sadness
Select an emotion (e.g., 1 for joy): 1

2. What aspects of the module do you care about? (Select multiple by separating with commas)
1. Exams
2. Workload
3. Assignments
4. Lectures
5. Grading
6. Course Engagement and Course Feelings (Sentiment)
7. Course Structure
8. Utility and Usefulness
9. Learning Outcomes
10. Course Material and Understanding
11. Instructor
12. Course Material
Enter the numbers of the aspects (e.g., 1,3,5): 1, 12, 6

3. What are you interested in learning about?
1. Programming
2. Machine Learning
3. Cybersecurity
4. Data Analysis
5. Quantum Computing
6. Business
7. Other
Select a category (e.g., 1 for Programming): 4

Recommended Modules:
                       name               institution  rating  \
7      SQL for Data Science                  UC Davis     4.6   
0          Machine Learning       Stanford University     4.8   
3  Visualiz

Module Recommendations Generator:

Input:
* a list of saved/favourited modules, recommended by default.
* Ask what topic students are interested in learning about (e.g., programming)
* Ask what aspects (identified by the topic modelling) that they care about
* Ask how important the experiences of past cohorts is to their module selection (i.e, sentiment)
* Recommend modules that:
1. align with topic interests, module performance in different categories, sentiment over last 6 months maybe.
2. Are in saved / similar to saved but also meet the requirements of 1.

Users can select the topics that they are are interested in learning about based on how modules have been categorised:

* "Programming": ["Python", "Programming", "Object-Oriented", "C++", "Code"],
* "Machine Learning": ["Machine Learning", "Data Science", "Algorithm", "Big Data"],
* "Cybersecurity": ["Security", "Information Security"],
* "Data Analysis": ["Data", "Visualization", "Statistics", "SQL", "Analytics"],
* "Quantum Computing": ["Quantum"],
* "Business": ["Business", "Management", "Lean Six Sigma", "Models"],
* "Other": [] # Default category for uncategorized entries

Topics that may be prioritised by students include:
1. 'Exams'
2. 'Workload'
3. 'Assignments'
4. 'Lectures'
5. 'Grading'
6. 'Course Engagement and Course Feelings(Sentiment)'
7. 'Course Structure'
8. 'Utility and Usefulness'
9. 'Learning Outcomes'
10. 'Course Material and Understanding'
11. 'Instructor'
12. 'Course Material'

Revelant columns for the recommendations from the data includes:
1. reviews
2. date_reviews
3. rating
4. course_id
5. month
6. name
7. sentiment_from_rating
8. sentiment_from_review
9. emotion_label
10. topic (simplified to 1, since in reality, may have multiple labels in a list)


Logic:
the shortlist of recommended modules should be updated as follows: inititlally it has the saved modules. then we ask the user what subject area they are interested in and add all modules from that subject area to the shortlist. then we ask the user How important is the experience/feelings of past cohorts of a module to you?, and if they select very important (1), then remove modules from the shortlist that don't have "positive" as the sentiment_from_rating or sentiment_from_review and don't have an emotion (where emotions should be mapped to numbers) above 0 or 1. Then, ask the user what aspects of the module they care about. Remove any modules in the shortlist that are about the topic that they specified and the review is "negative" and emotion weights are < 0. So basically an eliminatory approach where using this hierachical structure, subject interest is prioritised

In [None]:
import pandas as pd
from datetime import datetime

# Sample DataFrame 
modules = pd.DataFrame({
    "reviews": [
        "Fantastic introduction to machine learning concepts.",
        "Very engaging Python course, helped me get started with programming.",
        "Covers the basics of cybersecurity, but the assignments are tough.",
        "Loved the visualization techniques taught in this course.",
        "The instructor was great, but the workload was intense.",
        "Quantum computing explained very clearly.",
        "Useful for learning about business strategies and digital transformation.",
        "The SQL lessons were detailed and easy to follow.",
        "Good introduction to algorithms, though it requires strong math skills.",
        "Great for learning about data analysis and visualization."
    ],
    "date_reviews": [
        "2023-11-01", "2023-10-15", "2023-09-20", "2023-08-05", "2023-07-10",
        "2023-06-25", "2023-05-30", "2023-04-18", "2023-03-12", "2023-02-05"
    ],
    "rating": [4.8, 4.5, 4.0, 4.9, 3.8, 4.2, 4.7, 4.6, 4.3, 4.9],
    "course_id": [
        "machine-learning", "python-basics", "cybersecurity-basics",
        "data-visualization", "intense-workload", "quantum-computing",
        "business-strategies", "sql-course", "algorithmic-toolbox", "data-analysis"
    ],
    "departments": [
        "Computer Science", "Computer Science", "Cybersecurity",
        "Data Analysis", "General Studies", "Quantum Computing",
        "Business", "Data Analysis", "Computer Science", "Data Analysis"
    ],
    "name": [
        "Machine Learning", "Programming for Everybody", "Introduction to Cybersecurity",
        "Visualization Techniques", "High-Intensity Course", "Quantum Computing Basics",
        "Digital Business Models", "SQL for Data Science", "Algorithmic Toolbox", "Data Analytics for Beginners"
    ],
    "sentiment_from_rating": [
        "positive", "positive", "neutral", "positive", "neutral",
        "positive", "positive", "positive", "neutral", "positive"
    ],
    "sentiment_from_review": [
        "positive", "positive", "neutral", "positive", "neutral",
        "positive", "positive", "positive", "neutral", "positive"
    ],
    "emotion_label": [
        "joy", "joy", "fear", "joy", "sadness",
        "joy", "joy", "joy", "fear", "joy"
    ],
    "primary_emotion_label": [
        "joy", "joy", "fear", "joy", "sadness",
        "joy", "joy", "joy", "fear", "joy"
    ],
})

# Convert date_reviews to datetime for time-series filtering
modules["date_reviews"] = pd.to_datetime(modules["date_reviews"])

# Pre-defined emotion mapping
emotion_mapping = {"joy": 2, "neutral": 1, "sadness": -1, "fear": -2}

# User Preferences Function
def get_user_preferences(saved_modules, modules):
    shortlist = saved_modules.copy()

    # Step 1: Ask user about subject area (allow multiple selection)
    categories = {
        "Programming": ["Programming", "Python", "Code"],
        "Machine Learning": ["Machine Learning", "Algorithms", "Big Data"],
        "Cybersecurity": ["Security"],
        "Data Analysis": ["Data", "SQL", "Visualization"],
        "Quantum Computing": ["Quantum"],
        "Business": ["Business", "Management"],
        "Other": []
    }
    print("\n1. What subject area(s) are you interested in learning about? (Select multiple by separating with commas)")
    for i, category in enumerate(categories.keys(), 1):
        print(f"{i}. {category}")
    selected_categories = input("Enter the numbers of the subject areas (e.g., 1,3,5): ").split(",")
    selected_categories = [list(categories.keys())[int(num.strip()) - 1] for num in selected_categories]

    # Add modules from selected subject areas to the shortlist
    for category in selected_categories:
        shortlist.extend(
            modules[modules["name"].str.contains("|".join(categories[category]), case=False)]["name"].tolist()
        )

    # Step 2: Importance of experience/feelings
    importance_options = ["very important", "important", "neutral", "unimportant"]
    print("\n2. How important is the experience/feelings of past cohorts of a module to you?")
    for i, importance in enumerate(importance_options, 1):
        print(f"{i}. {importance}")
    selected_importance = int(input("Select an importance level (e.g., 1 for very important): "))

    if selected_importance == 1:  # If very important
        shortlisted_modules = modules[
            (modules["sentiment_from_rating"] == "positive") &
            (modules["sentiment_from_review"] == "positive") &
            (modules["emotion_label"].map(emotion_mapping) > 0)
        ]["name"].tolist()
        shortlist = [module for module in shortlist if module in shortlisted_modules]

    # Step 3: Ask user about aspects of the module they care about (allow multiple selection)
    aspects = [
        "Exams", "Workload", "Assignments", "Lectures", "Grading",
        "Course Engagement and Course Feelings (Sentiment)", "Course Structure",
        "Utility and Usefulness", "Learning Outcomes",
        "Course Material and Understanding", "Instructor"
    ]
    print("\n3. What aspects of the module do you care about? (Select multiple by separating with commas)")
    for i, aspect in enumerate(aspects, 1):
        print(f"{i}. {aspect}")
    selected_aspects = input("Enter the numbers of the aspects (e.g., 1,3,5): ").split(",")
    selected_aspects = [aspects[int(num.strip()) - 1] for num in selected_aspects]

    # Remove modules with negative reviews and low emotion scores for specified aspects
    shortlisted_modules = modules[
        ~(modules["emotion_label"].map(emotion_mapping) < 0)  # Remove negative emotion scores
    ]["name"].tolist()
    shortlist = [module for module in shortlist if module in shortlisted_modules]

    return set(shortlist)

# Main Function
def main():
    saved_modules = ["Machine Learning", "Programming for Everybody", "Algorithmic Toolbox"]
    recommendations = get_user_preferences(saved_modules, modules)
    print("\nRecommended Modules:")
    for module in recommendations:
        print(f"- {module}")

# Run the system
if __name__ == "__main__":
    main()



1. What subject area(s) are you interested in learning about? (Select multiple by separating with commas)
1. Programming
2. Machine Learning
3. Cybersecurity
4. Data Analysis
5. Quantum Computing
6. Business
7. Other
Enter the numbers of the subject areas (e.g., 1,3,5): 1,2,4

2. How important is the experience/feelings of past cohorts of a module to you?
1. very important
2. important
3. neutral
4. unimportant
Select an importance level (e.g., 1 for very important): 1

3. What aspects of the module do you care about? (Select multiple by separating with commas)
1. Exams
2. Workload
3. Assignments
4. Lectures
5. Grading
6. Course Engagement and Course Feelings (Sentiment)
7. Course Structure
8. Utility and Usefulness
9. Learning Outcomes
10. Course Material and Understanding
11. Instructor
Enter the numbers of the aspects (e.g., 1,3,5): 2,3,9

Recommended Modules:
- Visualization Techniques
- Machine Learning
- Programming for Everybody
- Data Analytics for Beginners
- SQL for Data Sci

# How does the module recommender work?

**Approach**

A filtering approach is used, based on the user's answers to each question:
* Any module that is the the user's list of saved modules is added to the shortlist of module recommendations.
*   Users are asked what is the most important to them when deciding to pick a module out of:
  1.   The Feelings and experiences of past cohorts (mainly sentiment and emotion data).
  2.   The Subject Area and what they are interested in learning about, where each module is categorised into different suvject areas, e.g., 'Programming' and 'CyberSecurity'
  3. Aspects of the module (e.g., workload, exams, grading, learning outcomes etc.)
*   Based on this, the recommended modules are processed and filtered considering the user's priority. For example,

  1.   Say the user picks a priority order (2,1,3). Then, modules of that subject area will be added to the shortlist, and any modules not in that subject area will be removed. Following this, Users will be asked how important (1) is to them, and if it is important to them, then any modules currently in the shortlist under a threshold of positive sentiment and emotion will be removed. Finally, users will be asked which (3) are most important to them, and if there are any modules in the shortlist with a lot of negative reviews / negative emotions and they are labelled under that aspect, this will be removed from the shortlist.
  2.   This leaves the user with a recommendation of modules aligned with their preferences.

expansions??

In [None]:
def get_user_preferences(saved_modules, modules):
    shortlist = saved_modules.copy()

    # Step 1: Ask user to prioritize what's most important to them
    print("\nOut of the following, what is most important to you? (Rank them 1, 2, 3)")
    print("1. Feelings and experiences of past cohorts (reviews, sentiment, emotion)")
    print("2. Subject area (What you’re interested in learning about)")
    print("3. Aspects of the module (e.g., workload, exams, grading, etc.)")
    user_priority = input("Enter your ranking (e.g., 1 for feelings, 2 for subject area, 3 for aspects): ").split(",")
    user_priority = [int(item.strip()) for item in user_priority]

    # Reorder based on user's priority
    if user_priority == [1, 2, 3]:  # Feelings -> Subject -> Aspects
        process_priority = ['feelings', 'subject_area', 'aspects']
    elif user_priority == [1, 3, 2]:  # Feelings -> Aspects -> Subject
        process_priority = ['feelings', 'aspects', 'subject_area']
    elif user_priority == [2, 1, 3]:  # Subject -> Feelings -> Aspects
        process_priority = ['subject_area', 'feelings', 'aspects']
    elif user_priority == [2, 3, 1]:  # Subject -> Aspects -> Feelings
        process_priority = ['subject_area', 'aspects', 'feelings']
    elif user_priority == [3, 1, 2]:  # Aspects -> Feelings -> Subject
        process_priority = ['aspects', 'feelings', 'subject_area']
    elif user_priority == [3, 2, 1]:  # Aspects -> Subject -> Feelings
        process_priority = ['aspects', 'subject_area', 'feelings']

    # Step 2: Process according to the priority
    for step in process_priority:
        if step == 'feelings':
            shortlist = filter_by_feelings_and_emotions(shortlist, modules)
        elif step == 'subject_area':
            shortlist = filter_by_subject_area(shortlist, modules)
        elif step == 'aspects':
            shortlist = filter_by_aspects(shortlist, modules)

    return shortlist

def filter_by_feelings_and_emotions(shortlist, modules):
    # Ask how important is the experience/feelings of past cohorts
    importance_options = ["very important", "important", "neutral", "unimportant"]
    print("\nHow important are the feelings/experiences of past cohorts to you?")
    for i, importance in enumerate(importance_options, 1):
        print(f"{i}. {importance}")
    selected_importance = int(input("Select an importance level (e.g., 1 for very important): "))

    if selected_importance == 1:  # Very important
        shortlisted_modules = modules[
            (modules["sentiment_from_rating"] == "positive") &
            (modules["sentiment_from_review"] == "positive") &
            (modules["emotion_label"].map(emotion_mapping) > 0)
        ]["name"].tolist()
        shortlist = [module for module in shortlist if module in shortlisted_modules]

    return shortlist

def filter_by_subject_area(shortlist, modules):
    # Ask user about subject area(s)
    categories = {
        "Programming": ["Programming", "Python", "Code"],
        "Machine Learning": ["Machine Learning", "Algorithms", "Big Data"],
        "Cybersecurity": ["Security"],
        "Data Analysis": ["Data", "SQL", "Visualization"],
        "Quantum Computing": ["Quantum"],
        "Business": ["Business", "Management"],
        "Other": []
    }
    print("\nWhat subject area(s) are you interested in learning about? (Select multiple by separating with commas)")
    for i, category in enumerate(categories.keys(), 1):
        print(f"{i}. {category}")
    selected_categories = input("Enter the numbers of the subject areas (e.g., 1,3,5): ").split(",")
    selected_categories = [list(categories.keys())[int(num.strip()) - 1] for num in selected_categories]

    # Add modules from selected subject areas to the shortlist
    for category in selected_categories:
        shortlist.extend(
            modules[modules["name"].str.contains("|".join(categories[category]), case=False)]["name"].tolist()
        )

    # Remove duplicates by converting shortlist to a set and back to a list
    shortlist = list(set(shortlist))

    return shortlist

def filter_by_aspects(shortlist, modules):
    # Ask user about aspects of the module they care about (allow multiple selection)
    aspects = [
        "Exams", "Workload", "Assignments", "Lectures", "Grading",
        "Course Engagement and Course Feelings (Sentiment)", "Course Structure",
        "Utility and Usefulness", "Learning Outcomes",
        "Course Material and Understanding", "Instructor"
    ]
    print("\nWhat aspects of the module do you care about? (Select multiple by separating with commas)")
    for i, aspect in enumerate(aspects, 1):
        print(f"{i}. {aspect}")
    selected_aspects = input("Enter the numbers of the aspects (e.g., 1,3,5): ").split(",")
    selected_aspects = [aspects[int(num.strip()) - 1] for num in selected_aspects]

    # Remove modules with negative reviews and low emotion scores for specified aspects
    shortlisted_modules = modules[
        ~(modules["emotion_label"].map(emotion_mapping) < 0)  # Remove negative emotion scores
    ]["name"].tolist()
    shortlist = [module for module in shortlist if module in shortlisted_modules]

    return shortlist

def main():
    saved_modules = ["Machine Learning", "Programming for Everybody", "Algorithmic Toolbox"]

    # Step 1: Get the user's preferences
    shortlisted_modules = get_user_preferences(saved_modules, modules)

    # Step 2: Output the final list of recommended modules
    print("\nRecommended Modules:")
    for module in shortlisted_modules:
        print(f"- {module}")

if __name__ == "__main__":
    main()



Out of the following, what is most important to you? (Rank them 1, 2, 3)
1. Feelings and experiences of past cohorts (reviews, sentiment, emotion)
2. Subject area (What you’re interested in learning about)
3. Aspects of the module (e.g., workload, exams, grading, etc.)
Enter your ranking (e.g., 1 for feelings, 2 for subject area, 3 for aspects): 2,1,3

What subject area(s) are you interested in learning about? (Select multiple by separating with commas)
1. Programming
2. Machine Learning
3. Cybersecurity
4. Data Analysis
5. Quantum Computing
6. Business
7. Other
Enter the numbers of the subject areas (e.g., 1,3,5): 1,2,4

How important are the feelings/experiences of past cohorts to you?
1. very important
2. important
3. neutral
4. unimportant
Select an importance level (e.g., 1 for very important): 1

What aspects of the module do you care about? (Select multiple by separating with commas)
1. Exams
2. Workload
3. Assignments
4. Lectures
5. Grading
6. Course Engagement and Course Feel