# Recommendation Functions
- This notebook contains several different types of recommender functions to recommend questions that a user hasnt answered before to relevant users in Quora


**Function Arguments**:

- `topics`: The set of topics or items for recommendations.
- `feedback`: User feedback data.
- `answers`: The answer data related to each topic.
- `n`: Number of top recommendations to return (default is 5).

In [1]:
import pandas as pd
topics = pd.read_csv(r"./topics.csv", index_col = 'Question')
feedback = pd.read_csv(r"./feedback.csv", index_col = 'Question')
answers = pd.read_csv(r"./answers.csv", index_col = 'Question')

In [2]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import math

#Defining the function to calculate cosine_similarity for all functions using the sklearn implementation
def calculate_cosine_similarity(vector1, matrix):
    return cosine_similarity([vector1], matrix)[0]

#Defining the function to create the recommendations dictionary

def initialize_recommendations(feedback, n):
    """Initialize an empty dictionary for recommendations with NaNs for each user"""
    return {user: {i: math.nan for i in range(1, n + 1)} for user in feedback.columns}

#Defining the function to sort the cosine_similarities

def get_sorted_indices(cosine_similarities):
    """Get sorted list of indices based on cosine similarities"""
    return np.argsort(cosine_similarities)[::-1]

#Defining the function to get the top n recommendations filtered by whether the user is interested in that question or not.
def get_top_recommendations(user, feedback, topics, cosine_similarities, n=5):
    """Get top n recommendations for a user"""
    top_recommendations = {}

    # Get sorted indices
    all_recommendations_indices = get_sorted_indices(cosine_similarities)

    # Get indices mapping
    feedback_indices = feedback.index
    topics_indices = topics.index
    index_mapping = {i: feedback_indices[i] for i in range(len(feedback_indices))}

    for recommendation in all_recommendations_indices:
        # Remap the recommendation index to match the feedback DataFrame
        mapped_index = index_mapping[recommendation]

        # If this is not super_user, check if feedback is already given and skip if so, we dont want to recommend questions that already have feedback.
        if user != 'super_user' and feedback.loc[mapped_index, user] != 0:
            continue

        top_recommendations[mapped_index] = cosine_similarities[recommendation]
        if len(top_recommendations) == n:  # break if found n recommendations
            break

    return top_recommendations


#Defining the simply unary user profile calculations

def user_profile_simply_unary(feedback, topics):
    """Multiply transposed feedback by topics matrix to get user profiles"""
    return feedback.values.T.dot(topics.values)


#Defining the normalization of topic (for unit weight and idf)

def normalize_topic(topic):
    """Normalize a single topic so its values sum to 1"""
    topic_total = topic.sum()
    normalized_topic = topic / topic_total
    return normalized_topic

#Defining the user_profile calculation for unit_weight

def user_profile_unit_weight(feedback, topics_normalized):
    """Multiply transposed binary feedback by normalized topics matrix to get user profiles"""
    return feedback.values.T.dot(topics_normalized.values)

#Defining function to calculate IDF

def calculate_idf(topics):
    """Calculate IDF for each topic, slides used log so we will also use log (even though the calculation in the slides used log10 not log)"""
    return np.log(topics.shape[0] / (topics != 0).sum(axis=0))

 #Defining the function to create the user profile following the slides for IDF (inverse document frequency)
def generate_user_profiles_idf(feedback, topics):
    """Generate user profiles using IDF function from the slides"""
    user_profiles = pd.DataFrame(index=feedback.columns, columns=topics.columns)
    
    idf_values = calculate_idf(topics)

    for user in feedback.columns:
        user_profile = np.zeros(topics.shape[1])

        for i, feedback_value in enumerate(feedback[user]):
            # Skip the calculation if feedback_value is zero as an multiplication by 0 is 0
            if feedback_value == 0:
                continue
            
            # Normalize the topic values by the topic total
            normalized_topic = normalize_topic(topics.iloc[i])
            
            # Calculate the contribution of this topic to the user's profile
            contribution = feedback_value * normalized_topic * idf_values
            
            # Add the contribution to the user's profile
            user_profile += contribution

        user_profiles.loc[user] = pd.Series(user_profile, index=topics.columns)

    return user_profiles

# This is the code to recommend using non-personalized methods
def non_personalized_recommendation(feedback, n=5):
    """Generates a list of questions ordered by the sum of all their positive feedback divided by the total count of feedback of all questions"""
    # Calculate positive feedback per question
    absolute_total_feedback = feedback.abs().sum(axis=0).sum()
    positive_feedback_per_question = feedback.sum(axis=1) / absolute_total_feedback

    # Recommend top questions with highest positive feedback scores
    top_questions = positive_feedback_per_question.nlargest(n)
    recommendation = {
        question: positive_feedback_per_question[question]
        for question in top_questions.index
    }

    return recommendation

In [3]:
#Function that weighs the content based and collaborative based recommendations 
def weight_recommendations(user, content_based, collaborative, n=5):
    """This is how we weigh the content based recommendations and the collaborative recommendations, currently each has a 50% weight. It takes as input the recommendations from both methods and outputs the new recommendations"""
    # Get all unique items from both sets of recommendations
    items = set(list(content_based[user].keys()) + list(collaborative[user].keys()))
    weight_content_based = 0.6
    # Calculate the weighted average recommendation score for each item
    scores = {}
    for item in items:
        cb_score = content_based[user].get(item, 0)
        cf_score = collaborative[user].get(item, 0)
        score = (weight_content_based * cb_score) + ((1 - weight_content_based) * cf_score)
        scores[item] = score

    # Sort items by score in descending order to get the highest scores first
    sorted_scores = dict(sorted(scores.items(), key=lambda item: item[1], reverse=True))
    
    # Slice the sorted scores to keep only the top n items (in our case we want 5, but it will be defined)
    top_scores = dict(list(sorted_scores.items())[:n])

    return top_scores

# Here we get recommendations based on what similar users like, using a user-user similarity matrix.

def get_collaborative_recommendations(feedback, topics, n=5):
    """This generates recommendations per user based on what other users that are similar positively interacted with"""
    # Calculate user-user similarity matrix
    user_similarity = cosine_similarity(feedback.T)
    user_similarity_df = pd.DataFrame(user_similarity, index=feedback.columns, columns=feedback.columns)

    # Initialize recommendations dictionary
    prediction = {}

    # Iterate through each user
    for user in feedback.columns:
        # Get top N similar users to the target user
        top_n_users = user_similarity_df[user].sort_values(ascending=False)[1:n+1].index
        
        # Get items liked by the top N similar users
        similar_users_items = feedback[top_n_users].mean(axis=1).sort_values(ascending=False)

        # Filter out items that the user has already interacted with
        already_interacted = feedback.loc[feedback[user] != 0].index
        recommended_items = similar_users_items.loc[[item for item in similar_users_items.index if item not in already_interacted]]
        
        # Convert list of recommendations to a dictionary where each item's score is its average score from similar users
        recommended_items_dict = recommended_items[:n].to_dict()

        # Add user's recommendations to the dictionary
        prediction[user] = recommended_items_dict

    return prediction

# Here we define a function that creates a super user that takes from all other users where feedback sum is not 0. This super user contribution per user is weighted based on how much feedback theyve given (those with more feedback data matter more)
def generate_super_user_profile(feedback, user_profiles,n=5):
    """This function creates a super user, basically creates a weighted average of all user profiles based on their feedback (those users that provide more feedback are considered more)"""
    # 1: Calculate the total feedback for each user (sum of absolute values to count all feedbacks)
    total_feedback = feedback.abs().sum()

    # 2: Normalize the user feedback against total feedback for all users
    weights = total_feedback / total_feedback.sum()

    # 3: Create a new DataFrame to store the super user profile
    super_user_profile = pd.DataFrame(np.zeros((1, len(user_profiles.columns))), columns=user_profiles.columns, index=["Super User"])

    # 4: Iterate through each user profile and compute the weighted sum
    for user in user_profiles.index:
        super_user_profile += weights[user] * user_profiles.loc[user]

    # 5: Return the super user profile as a 2D matrix
    return super_user_profile.values

# Summary of the `simply_unary` Function

The `simply_unary` function is designed to generate personalized item recommendations for users using a content-based filtering approach. The function creates user profiles using a simply unary method, meaning it considers only the occurrence of user interactions, but not the weight of the interactions.

## Detailed Function Operations:

1. **User Profiles Generation**: User profiles are generated using the `user_profile_simply_unary` function. The user profile is built only considering whether an interaction occurred, without considering the weight or frequency of the interactions.

2. **Recommendations Initialization**: A dictionary named `prediction` is initialized to store the recommendations. This initialization is carried out using the `initialize_recommendations` function.

3. **Recommendations Calculation**: For each user profile, the following steps are executed:
   - If the user has not provided any feedback (feedback values are all zero), the function skips to the next user.
   - If the user has provided feedback, the function calculates the cosine similarity between the user's profile and the topics using the `calculate_cosine_similarity` function.
   - Then, the top `n` recommendations for the user are determined using the `get_top_recommendations` function.

4. **Return Recommendations**: The function returns the `prediction` dictionary. Each key-value pair in this dictionary corresponds to a user and their corresponding item recommendations, respectively.

The `simply_unary` function provides a straightforward, occurrence-based approach to user profiling in content-based recommendation systems. It doesn't factor in the frequency or weight of the user interactions.




In [4]:
def simply_unary(topics, feedback, answers, n = 5):
    # User profiles
    user_profiles = user_profile_simply_unary(feedback, topics)

    # Recommendations
    prediction = initialize_recommendations(feedback, n)

    # For each user profile
    for user_index, user_profile in enumerate(user_profiles):
        user = feedback.columns[user_index]
        
        # Skip if no feedback data is provided by the user
        if not np.any(feedback.values[:, user_index]):
            continue

        # Calculate cosine similarities
        cosine_similarities = calculate_cosine_similarity(user_profile, topics.values)

        # Get top recommendations
        top_recommendations = get_top_recommendations(user, feedback, topics, cosine_similarities, n)

        # Add top recommendations to the dictionary
        prediction[user] = top_recommendations
        
    return prediction

In [5]:
simply_unary(topics, feedback, answers, n = 5)

{'User 1': {12: 0.5039526306789697,
  9: 0.4629100498862757,
  3: 0.25197631533948484,
  18: 0.1543033499620919,
  11: 0.0},
 'User 2': {14: 0.5833333333333333,
  13: 0.5833333333333333,
  20: 0.41666666666666663,
  18: 0.35355339059327373,
  15: 0.33333333333333337},
 'User 3': {14: 0.2182178902359924,
  19: 0.19518001458970663,
  11: 0.12598815766974242,
  20: 0.0,
  2: 0.0},
 'User 4': {1: nan, 2: nan, 3: nan, 4: nan, 5: nan}}

# Summary of the `unit_weight` Function

The `unit_weight` function generates personalized item recommendations for users based on a content-based filtering approach. Here, the user profiles are created using the unit weight method, where all items are treated as equally important.

## Detailed Function Operations:

1. **Topics Normalization**: The topics are normalized using the `normalize_topic` function. The normalized topics are stored in `topics_normalized`.

2. **User Profiles Generation**: User profiles are generated using the `user_profile_unit_weight` function, which considers all items as equally important. 

3. **Recommendations Initialization**: A dictionary named `prediction` is initialized to store the recommendations, using the `initialize_recommendations` function.

4. **Recommendations Calculation**: For each user profile, the following steps are executed:
   - If the user has not provided any feedback (feedback values are all zero), the function skips to the next user.
   - If the user has provided feedback, cosine similarity between the user's profile and the topics is calculated. The `calculate_cosine_similarity` function is used for this purpose.
   - The top `n` recommendations for the user are then determined using the `get_top_recommendations` function.

5. **Return Recommendations**: The function returns the `prediction` dictionary where each key-value pair corresponds to a user and their corresponding item recommendations.

The `unit_weight` function is a content-based recommendation system where the importance of each item is considered equal in user profile generation. It ensures that no particular item is favored due to frequency or other characteristics.


In [6]:
def unit_weight(topics, feedback, answers, n = 5):
    # Normalized topics
    topics_normalized =  topics.apply(normalize_topic, axis=1)
    
    # User profiles
    user_profiles = user_profile_unit_weight(feedback, topics_normalized)

    # Recommendations
    prediction = initialize_recommendations(feedback, n)

    # For each user profile
    for user_index, user_profile in enumerate(user_profiles):
        user = feedback.columns[user_index]
        
        # Skip if no feedback data is provided by the user
        if not np.any(feedback.values[:, user_index]):
            continue

        # Calculate cosine similarities
        cosine_similarities = calculate_cosine_similarity(user_profile, topics.values)

        # Get top recommendations
        top_recommendations = get_top_recommendations(user, feedback, topics, cosine_similarities, n)

        # Add top recommendations to the dictionary
        prediction[user] = top_recommendations
        
    return prediction

In [7]:
unit_weight(topics, feedback, answers, n = 5)

{'User 1': {12: 0.573440594836071,
  9: 0.44537266138789555,
  3: 0.32867936533287,
  18: 0.29977006054954514,
  15: 0.12112539577787733},
 'User 2': {13: 0.6055546454048788,
  14: 0.5891883036371793,
  20: 0.3982476496806861,
  18: 0.3471825374147067,
  8: 0.3327822826098884},
 'User 3': {14: 0.19943100880436643,
  19: 0.10192943828752511,
  11: 0.09869275424396537,
  10: 0.0,
  18: 0.0},
 'User 4': {1: nan, 2: nan, 3: nan, 4: nan, 5: nan}}

# Summary of the `idf` Function

The `idf` function is designed for personalized item recommendations for users. The function utilizes a content-based filtering approach, using cosine similarity between normalized topics and user profiles generated using IDF (Inverse Document Frequency).

## Detailed Function Operations:

1. **Topics Normalization**: The topics are normalized using the `normalize_topic` function.

2. **Recommendations Initialization**: A dictionary called `prediction` is initialized for storing the recommendations. This is achieved using the `initialize_recommendations` function.

3. **User Profiles Generation**: User profiles are created with the `generate_user_profiles_idf` function that takes feedback and topics as arguments.

4. **Recommendations Calculation**: For each user, the following steps are performed:
   - If the user has not provided any feedback (user profile is zero), the function skips to the next user.
   - If the user has provided feedback, cosine similarity between the user's profile and the topics is calculated. Then, top `n` recommendations for the user are determined using the `get_top_recommendations` function.

5. **Return Recommendations**: The function returns a dictionary named `prediction`, where each key-value pair corresponds to a user and their corresponding item recommendations, respectively.

In the `idf` function, recommendations are made based on individual user's profile similarity to the topics, employing content-based filtering with IDF.


In [8]:
def idf(topics, feedback, answers, n = 5):
        # Normalized topics
    normalized_topics = normalize_topic(topics)

    # Recommendations
    prediction = initialize_recommendations(feedback, n)

    # User profiles
    user_profiles = generate_user_profiles_idf(feedback, topics)

    for user in feedback.columns:
        user_profile = user_profiles.loc[user]

        # Skip if no feedback data is provided by the user
        if np.count_nonzero(user_profile) == 0:
            continue

        # Calculate cosine similarities
        cosine_similarities = calculate_cosine_similarity(user_profile, topics.values)

        # Get top recommendations
        top_recommendations = get_top_recommendations(user, feedback, topics, cosine_similarities, n)

        # Add top recommendations to the dictionary
        prediction[user] = top_recommendations
        
    return prediction

In [9]:
idf(topics, feedback, answers, n = 5)

{'User 1': {12: 0.6220964030285525,
  9: 0.3607507382998098,
  3: 0.235026926619344,
  18: 0.18160089887636754,
  15: 0.07215622694738955},
 'User 2': {13: 0.44450958478739583,
  14: 0.42657220389526124,
  7: 0.26367435403517137,
  20: 0.22113044829984108,
  18: 0.18894325836321604},
 'User 3': {14: 0.15331890699084647,
  19: 0.08118849750532613,
  11: 0.05338958795650429,
  10: 0.01583125298640302,
  18: 0.0},
 'User 4': {1: nan, 2: nan, 3: nan, 4: nan, 5: nan}}

# Summary of the `switch` Function

The `switch` function generates personalized and non-personalized recommendations for users based on their feedback. This function combines content-based filtering and a fallback non-personalized recommendation approach.

## Detailed Function Operations:

1. **User Profiles Generation**: User profiles are generated from the provided feedback and topics using the `generate_user_profiles_idf` function.

2. **Recommendations Calculation**: For each user in the feedback data, the function performs the following steps:
   - If a user has **no feedback**, a non-personalized recommendation is generated with the `non_personalized_recommendation` function and added to the predictions dictionary. The function then moves to the next user.
   - If a user has provided feedback, their user profile is used to calculate cosine similarities between the user's profile and the topics. These similarities are then used to get the top `n` recommendations for that user with the `get_top_recommendations` function.

3. **Return Recommendations**: The function returns a dictionary named `prediction` where each key-value pair corresponds to a user and their corresponding item recommendations.

The `switch` function uses non-personalized recommendations as a fallback for new users or users who have not given any feedback, addressing the cold start problem. For users with feedback, it uses content-based filtering employing cosine similarity measures.


In [10]:
def switch(topics, feedback, answers, n=5):
    # Initialize recommendations
    prediction = {}

    # User profiles
    user_profiles = generate_user_profiles_idf(feedback, topics)

    for user in feedback.columns:
        user_profile = user_profiles.loc[user]

        # Check if user has no feedback (sum of feedback = 0)
        if feedback[user].sum() == 0:
            # Non-personalized recommendation for user with no feedback
            recommendation = non_personalized_recommendation(feedback, n)
            prediction[user] = recommendation
            continue

        # Calculate cosine similarities
        cosine_similarities = calculate_cosine_similarity(user_profile, topics.values)

        # Get top recommendations
        top_recommendations = get_top_recommendations(user, feedback, topics, cosine_similarities, n)

        # Add top recommendations to the dictionary
        prediction[user] = top_recommendations
        
    return prediction

In [11]:
switch(topics, feedback, answers, n=5)

{'User 1': {12: 0.6220964030285525,
  9: 0.3607507382998098,
  3: 0.235026926619344,
  18: 0.18160089887636754,
  15: 0.07215622694738955},
 'User 2': {13: 0.44450958478739583,
  14: 0.42657220389526124,
  7: 0.26367435403517137,
  20: 0.22113044829984108,
  18: 0.18894325836321604},
 'User 3': {14: 0.15331890699084647,
  19: 0.08118849750532613,
  11: 0.05338958795650429,
  10: 0.01583125298640302,
  18: 0.0},
 'User 4': {4: 0.0625, 5: 0.0625, 6: 0.0625, 8: 0.0625, 17: 0.0625}}

# Summary of the `hybrid` Function

The `hybrid` function performs a hybrid recommendation by combining content-based and collaborative filtering methods. It recommends items for each user based on user profiles, a super user profile, and user feedback.


## Detailed Function Operations:

1. **User Profiles Generation**: User profiles are generated using the `generate_user_profiles_idf` function that utilizes user feedback and topics.

2. **Super User Profile Generation**: A super user profile is generated with `generate_super_user_profile` function. This profile acts as a default option for users who have not given any feedback (the cold start problem).

3. **Recommendations Calculation**: For each user, the function checks whether they've given any feedback:
   - For users with **no feedback** (cold start condition), the cosine similarity is calculated between the super user profile and topics. Then, top `n` recommendations for the super user are determined using the cosine similarity values. These recommendations are also utilized for the users with no feedback.
   - For users with feedback, the function calculates top `n` recommendations using both methods: **content-based** (via `idf` function) and **collaborative filtering** (via `get_collaborative_recommendations` function). The recommendations from both methods are then weighted using the `weight_recommendations` function.

4. **Return Recommendations**: The function returns a dictionary named `prediction` where each key-value pair corresponds to a user and their corresponding item recommendations, respectively.

The hybrid approach in `hybrid` function helps to tackle the cold start problem (new users with no feedback) and leverages both individual user preferences and similarities among different users to make recommendations.


In [12]:
def hybrid(topics, feedback, answers, n = 5):
    
    user_profiles = generate_user_profiles_idf(feedback, topics)  
    # Generate super user profile
    super_user_profile = generate_super_user_profile(feedback, user_profiles)
    
    # Calculate recommendations using both methods for each user
    prediction = {}
    for user in feedback.columns:
        # Check if user has no feedback (cold start condition)
        if feedback[user].sum() == 0:
            # Calculate cosine similarities for super user
            cosine_similarity_super_user = calculate_cosine_similarity(super_user_profile[0], topics.values)
            
            # Use the cosine similarity to get top N recommendations for the super user
            super_user_recommendations = get_top_recommendations('super_user', feedback, topics, cosine_similarity_super_user, n)
            
            # For users with no feedback, we will recommend the same as we would to the super user.
            prediction[user] = super_user_recommendations
        else:          
            content_based = idf(topics, feedback, answers, n)
            collaborative = get_collaborative_recommendations(feedback, topics, n)
        
            # Weight the recommendations from both methods using the predefined function
            prediction[user] = weight_recommendations(user, content_based, collaborative)
            
    return prediction

In [13]:
hybrid(topics, feedback, answers, n = 5)

{'User 1': {12: 0.3732578418171315,
  9: 0.2164504429798859,
  3: 0.14101615597160638,
  4: 0.13333333333333333,
  5: 0.13333333333333333},
 'User 2': {13: 0.2667057508724375,
  14: 0.2559433223371567,
  7: 0.1582046124211028,
  5: 0.13333333333333333,
  6: 0.13333333333333333},
 'User 3': {4: 0.13333333333333333,
  6: 0.13333333333333333,
  17: 0.13333333333333333,
  14: 0.09199134419450787,
  19: 0.048713098503195676},
 'User 4': {14: 0.49151396704795686,
  18: 0.3457474079582128,
  17: 0.2770199027616932,
  2: 0.2770199027616932,
  5: 0.22431530360921015}}