In [None]:
# Input: Low-Dimensional Data Demo

In [116]:
# Define rating history
user_ratings = [
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 2, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 5, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 2, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 0, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 5, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 3, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 5, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 2, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 4, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 1, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 3, 5, 0, 5, 0, 0, 0],  # User 5
]

# List of movies (for reference)
movies = ['Book 0', 'Book 1', 'Book 2', 'Book 3', 'Book 4', 'Book 5', 'Book 6', 'Book 7', 'Book 8', 'Book 9']

# Input ratings for a new user (0 represents missing values)
user_ratings_input = [5, 0, 0, 0, 0, 0, 0, 0, 0, 2]

# Collaborative Filtering using Genetic Algorithm

### With Hyperparameter Tuning

In [None]:
import numpy as np
import itertools

class CollaborativeFiltering:
    def __init__(self, user_ratings):
        self.user_ratings = user_ratings

    def cosine_similarity(self, a, b):
        return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

    def average_rating(self, user_ratings):
        ratings = [rating for rating in user_ratings if rating != 0]
        return np.mean(ratings)

    def calculate_predicted_rating_aws(self, target_user_ratings, target_item_index, user_ratings):
        average_rating_target_user = self.average_rating(target_user_ratings)

        total_weighted_rating = 0
        total_similarity = 0

        for user_ratings in user_ratings:
            if user_ratings[target_item_index] != 0:
                similarity = self.cosine_similarity(target_user_ratings, user_ratings)
                average_rating_user_j = self.average_rating(user_ratings)
                adjusted_rating = user_ratings[target_item_index] - average_rating_user_j
                total_weighted_rating += similarity * adjusted_rating
                total_similarity += abs(similarity)

        if total_similarity != 0:
            predicted_rating = average_rating_target_user + (total_weighted_rating / total_similarity)
        else:
            predicted_rating = average_rating_target_user

        return predicted_rating

class GeneticAlgorithm:
    def __init__(self, user_ratings_input, user_ratings):
        self.user_ratings_input = user_ratings_input
        self.user_ratings = user_ratings

    def possible_solutions(self):
        missing_items = [index for index, rating in enumerate(self.user_ratings_input) if rating == 0]
        all_recommendations = list(itertools.combinations(missing_items, 3))
        return all_recommendations

    def calculate_fitness(self, solution):
        total_rating = 0
        cf = CollaborativeFiltering(self.user_ratings)
        for item in solution:
            predicted_rating = cf.calculate_predicted_rating_aws(self.user_ratings_input, item, self.user_ratings)
            total_rating += predicted_rating
        return total_rating

    def selection(self, population, n):
        ranked_population = sorted(population, key=lambda x: self.calculate_fitness(x), reverse=True)
        return ranked_population[:n]

    def crossover(self, parent1, parent2):
        crossover_point = len(parent1) // 2
        child = parent1[:crossover_point] + parent2[crossover_point:]
        return child

    def mutation(self, solution):
        mutation_point = np.random.randint(len(solution))
        new_item = np.random.randint(len(self.user_ratings_input))
        solution[mutation_point] = new_item
        return solution

    def genetic_algorithm(self, population_size=100, generations=100, selection_size=6):
        all_recommendations = self.possible_solutions()
        population = [list(solution) for solution in all_recommendations]

        for generation in range(generations):
            # Selection
            selected_solutions = self.selection(population, selection_size)

            # Crossover
            children = []
            for i in range(len(selected_solutions)):
                child = self.crossover(selected_solutions[i], selected_solutions[(i + 1) % len(selected_solutions)])
                children.append(child)

            # Mutation
            for i in range(len(children)):
                if np.random.random() < 0.1:  # Mutation probability: 0.1
                    children[i] = self.mutation(children[i])

            # New population
            population = selected_solutions + children

        # Get the best solution from the final population
        best_solution = max(population, key=lambda x: self.calculate_fitness(x))

        return best_solution

# Function to optimize the model's performance (tuning hyperparameters)

class HyperparameterTuner:
    def __init__(self, user_ratings_input, user_ratings):
        self.user_ratings_input = user_ratings_input
        self.user_ratings = user_ratings

    def tune_genetic_algorithm(self, population_sizes, generations, selection_sizes):
        results = []
        for pop_size in population_sizes:
            for gen in generations:
                for sel_size in selection_sizes:
                    ga = GeneticAlgorithm(self.user_ratings_input, self.user_ratings)
                    solution = ga.genetic_algorithm(population_size=pop_size, generations=gen, selection_size=sel_size)
                    score = ga.calculate_fitness(solution)
                    results.append({
                        'Population Size': pop_size,
                        'Generations': gen,
                        'Selection Size': sel_size,
                        'Best Solution': solution,
                        'Score': score
                    })

        df_results = pd.DataFrame(results)
        df_results = df_results.sort_values(by='Score', ascending=False).reset_index(drop=True)
        return df_results

# ... (Rest of the code)

# Hyperparameter tuning
tuner = HyperparameterTuner(user_ratings_input, user_ratings)
population_sizes = [50, 100, 200]
generations = [50, 100, 150]
selection_sizes = [4, 6, 8]
df_results = tuner.tune_genetic_algorithm(population_sizes, generations, selection_sizes)

# Print the results in a DataFrame
print("Hyperparameter Tuning Results:")
print(df_results)


### Without Hyperparameter Tuning

In [None]:
# Cosine similarity function (you can use your existing implementation)
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Function to calculate the average rating for a user (excluding missing values)
def average_rating(user_ratings):
    ratings = [rating for rating in user_ratings if rating != 0]
    return np.mean(ratings)

# Function to calculate the Adjusted Weighted Sum predicted rating for a target item
def calculate_predicted_rating_aws(target_user_ratings, target_item_index, user_ratings):
    average_rating_target_user = average_rating(target_user_ratings)

    total_weighted_rating = 0
    total_similarity = 0

    for user_ratings in user_ratings:
        if user_ratings[target_item_index] != 0:
            similarity = cosine_similarity(target_user_ratings, user_ratings)
            average_rating_user_j = average_rating(user_ratings)
            adjusted_rating = user_ratings[target_item_index] - average_rating_user_j
            total_weighted_rating += similarity * adjusted_rating
            total_similarity += abs(similarity)

    if total_similarity != 0:
        predicted_rating = average_rating_target_user + (total_weighted_rating / total_similarity)
    else:
        predicted_rating = average_rating_target_user

    return predicted_rating

# Genetic Algorithm

# Generate Initial Population: all possible solutions
import itertools

def possible_solutions(user_ratings_input):
    missing_items = [index for index, rating in enumerate(user_ratings_input) if rating == 0]
    all_recommendations = list(itertools.combinations(missing_items, 3))
    return all_recommendations

# Fitness Function: Calculate the fitness (total rating) of each solution
def calculate_fitness(solution, user_ratings_input, user_ratings):
    total_rating = 0
    for item in solution:
        predicted_rating = calculate_predicted_rating_aws(user_ratings_input, item, user_ratings)
        total_rating += predicted_rating
    return total_rating

# Selection: Select the top n individuals (recommendations) based on their fitness
def selection(population, n, user_ratings_input, user_ratings):
    ranked_population = sorted(population, key=lambda x: calculate_fitness(x, user_ratings_input, user_ratings), reverse=True)
    return ranked_population[:n]

# Crossover: Perform crossover to create new solutions (recommendations) from the selected individuals
def crossover(parent1, parent2):
    crossover_point = len(parent1) // 2
    child = parent1[:crossover_point] + parent2[crossover_point:]
    return child

# Mutation: Apply mutation to introduce diversity in the population
def mutation(solution):
    mutation_point = np.random.randint(len(solution))
    new_item = np.random.randint(len(user_ratings_input))
    solution[mutation_point] = new_item
    return solution

# Genetic Algorithm Function
def genetic_algorithm(user_ratings_input, user_ratings, population_size=100, generations=100, selection_size=6):
    all_recommendations = possible_solutions(user_ratings_input)
    population = [list(solution) for solution in all_recommendations]

    for generation in range(generations):
        # Selection
        selected_solutions = selection(population, selection_size, user_ratings_input, user_ratings)

        # Crossover
        children = []
        for i in range(len(selected_solutions)):
            child = crossover(selected_solutions[i], selected_solutions[(i + 1) % len(selected_solutions)])
            children.append(child)

        # Mutation
        for i in range(len(children)):
            if np.random.random() < 0.1:  # Mutation probability: 0.1
                children[i] = mutation(children[i])

        # New population
        population = selected_solutions + children

    # Get the best solution from the final population
    best_solution = max(population, key=lambda x: calculate_fitness(x, user_ratings_input, user_ratings))

    return best_solution

# Run the genetic algorithm
best_recommendation = genetic_algorithm(user_ratings_input, user_ratings)
recommended_movies = [movies[item] for item in best_recommendation]

print("Recommended Books:", recommended_movies)

# Input: High-Dimensional Demo

### Input Transformed Dataset

In [28]:
import pandas as pd
user_ratings = pd.read_csv("../Mai - Demo #1/dataset/output.csv")

In [3]:
user_ratings.head(2)

Unnamed: 0,userID,1,3,6,47,50,70,101,110,151,...,147662,148166,149011,152372,158721,160341,160527,160836,163937,163981
0,539,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0
1,550,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0


In [4]:
import random
# Create an array with 1 row, m columns and random values representing ratings
no_movies = len(user_ratings.columns) + 1
user_ratings_input= []
for i in range(1,no_movies):
  rand = random.randrange(0, 5, 1)
  user_ratings_input.append(rand)
user_ratings_input

[2,
 2,
 2,
 1,
 2,
 2,
 1,
 1,
 2,
 3,
 3,
 1,
 3,
 2,
 2,
 2,
 4,
 0,
 0,
 4,
 1,
 3,
 2,
 3,
 0,
 1,
 1,
 3,
 0,
 4,
 3,
 4,
 1,
 1,
 2,
 2,
 0,
 2,
 0,
 0,
 3,
 2,
 0,
 4,
 4,
 0,
 0,
 2,
 3,
 1,
 2,
 3,
 0,
 4,
 1,
 1,
 1,
 1,
 1,
 0,
 2,
 0,
 1,
 1,
 4,
 2,
 2,
 2,
 3,
 0,
 2,
 3,
 1,
 2,
 4,
 3,
 3,
 1,
 1,
 3,
 0,
 0,
 2,
 1,
 4,
 3,
 4,
 3,
 4,
 4,
 4,
 4,
 2,
 1,
 4,
 3,
 1,
 0,
 0,
 1,
 4,
 3,
 3,
 4,
 4,
 3,
 3,
 2,
 1,
 0,
 0,
 3,
 4,
 4,
 0,
 1,
 0,
 4,
 4,
 4,
 3,
 4,
 1,
 3,
 4,
 1,
 4,
 2,
 0,
 4,
 2,
 4,
 1,
 1,
 2,
 1,
 0,
 0,
 4,
 3,
 4,
 4,
 2,
 3,
 4,
 0,
 1,
 1,
 2,
 4,
 4,
 4,
 4,
 4,
 0,
 3,
 3,
 1,
 3,
 1,
 3,
 1,
 4,
 3,
 4,
 0,
 3,
 2,
 2,
 2,
 0,
 2,
 4,
 0,
 0,
 2,
 3,
 0,
 2,
 0,
 4,
 3,
 0,
 1,
 1,
 2,
 1,
 4,
 0,
 1,
 1,
 4,
 2,
 0,
 1,
 4,
 3,
 3,
 4,
 2,
 1,
 1,
 0,
 0,
 2,
 3,
 3,
 3,
 0,
 1,
 4,
 4,
 3,
 0,
 4,
 2,
 3,
 0,
 3,
 2,
 0,
 0,
 1,
 1,
 1,
 4,
 1,
 2,
 2,
 1,
 4,
 4,
 1,
 4,
 2,
 4,
 3,
 4,
 1,
 0,
 1,
 4,
 0,
 1,
 4,
 0,
 4,
 0,
 0,
 2,


### Run GA

In [29]:
# Define rating history
user_ratings = [
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 2, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 5, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 2, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 0, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 5, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 3, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 5, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 2, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 4, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 1, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 3, 5, 0, 5, 0, 0, 0],  # User 5
]

# Input ratings for a new user (0 represents missing values)
user_ratings_input = [5, 0, 0, 0, 0, 0, 0, 0, 0, 2]

# List of books (for reference)
books = ['Book 0', 'Book 1', 'Book 2', 'Book 3', 'Book 4', 'Book 5', 'Book 6', 'Book 7', 'Book 8', 'Book 9']

# Semantic Features
genres = ['action', 'comedy', 'adventure', 'sci-fi', 'crime', 'horror', 'war']
actors = ['Author 1', 'Author 2', 'Author 3']



In [55]:
# Use features as rows for semantic list & random binary values
# = 1 means the book is in that genre or it is written by that author
semantic = pd.DataFrame(index=genres + actors, columns=books)
semantic.loc[genres] = [[random.randint(0, 1) for _ in range(len(books))] for _ in range(len(genres))]
semantic.loc[actors] = [[random.randint(0, 1) for _ in range(len(books))] for _ in range(len(actors))]

semantic

Unnamed: 0,Book 0,Book 1,Book 2,Book 3,Book 4,Book 5,Book 6,Book 7,Book 8,Book 9
action,0,1,1,1,0,0,1,0,1,0
comedy,0,0,0,0,0,0,0,0,1,0
adventure,1,1,1,0,0,0,1,1,1,0
sci-fi,1,0,0,1,1,0,0,1,0,1
crime,0,1,1,0,1,1,0,1,0,0
horror,1,0,1,0,0,1,1,0,1,1
war,1,0,1,1,1,1,0,1,0,1
Author 1,0,1,0,0,1,0,1,1,1,1
Author 2,0,0,0,1,0,0,1,1,1,1
Author 3,1,0,0,1,0,0,1,1,1,1


In [61]:
semantic.values.tolist()

[[0, 1, 1, 0, 1, 1, 1, 1, 0, 1],
 [0, 0, 1, 1, 1, 0, 0, 1, 0, 1],
 [0, 1, 0, 1, 1, 0, 1, 1, 1, 1],
 [1, 0, 0, 1, 1, 0, 0, 0, 1, 0],
 [0, 1, 0, 0, 0, 1, 0, 0, 1, 1],
 [1, 0, 1, 0, 1, 0, 1, 0, 1, 1],
 [0, 1, 0, 1, 0, 1, 1, 1, 1, 0],
 [0, 1, 0, 1, 1, 0, 0, 1, 0, 1],
 [1, 1, 1, 0, 0, 1, 1, 1, 0, 0],
 [1, 1, 1, 1, 0, 0, 0, 1, 0, 0]]

In [58]:
# Use features as rows for semantic list & random binary values
# = 1 means the book is in that genre or it is written by that author
semantic = pd.DataFrame(index=genres + actors, columns=books)
semantic.loc[genres] = [[random.randint(0, 1) for _ in range(len(books))] for _ in range(len(genres))]
semantic.loc[actors] = [[random.randint(0, 1) for _ in range(len(books))] for _ in range(len(actors))]


In [98]:
import numpy as np
import pandas as pd
import random
import itertools

# Define rating history
user_ratings = [
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 2, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 5, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 2, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 0, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 5, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 3, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 5, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 5, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 4, 5, 2, 5, 0, 0, 0],  # User 5
    [0, 3, 0, 5, 0, 0, 4, 5, 0, 2],  # User 0
    [0, 0, 3, 4, 5, 0, 4, 0, 3, 0],  # User 1
    [3, 1, 1, 3, 5, 5, 0, 4, 0, 0],  # User 2
    [0, 5, 0, 0, 0, 1, 1, 5, 5, 2],  # User 3
    [0, 0, 0, 5, 0, 5, 0, 0, 1, 0],  # User 4
    [2, 0, 0, 3, 5, 0, 5, 0, 0, 0],  # User 5
]

# Input ratings for a new user (0 represents missing values)
user_ratings_input = [0, 5, 3, 2, 0, 0, 1, 0, 0, 2]

# List of books (for reference)
books = ['Book 0', 'Book 1', 'Book 2', 'Book 3', 'Book 4', 'Book 5', 'Book 6', 'Book 7', 'Book 8', 'Book 9']

# Semantic Features
genres = ['action', 'comedy', 'adventure', 'sci-fi', 'crime', 'horror', 'war']
actors = ['Author 1', 'Author 2', 'Author 3']

# Semantic matrix
semantic = np.array([
    [0, 1, 1, 0, 1, 1, 1, 1, 0, 1],
    [0, 0, 1, 1, 1, 0, 0, 1, 0, 1],
    [0, 1, 0, 1, 1, 0, 1, 1, 1, 1],
    [1, 0, 0, 1, 1, 0, 0, 0, 1, 0],
    [0, 1, 0, 0, 0, 1, 0, 0, 1, 1],
    [1, 0, 1, 0, 1, 0, 1, 0, 1, 1],
    [0, 1, 0, 1, 0, 1, 1, 1, 1, 0],
    [0, 1, 0, 1, 1, 0, 0, 1, 0, 1],
    [1, 1, 1, 0, 0, 1, 1, 1, 0, 0],
    [1, 1, 1, 1, 0, 0, 0, 1, 0, 0]
])


In [100]:
no_rec = 3
missing_items = [index for index, rating in enumerate(user_ratings_input) if rating == 0]
all_recommendations = list(itertools.combinations(missing_items, no_rec))
all_recommendations

[(0, 4, 5),
 (0, 4, 7),
 (0, 4, 8),
 (0, 5, 7),
 (0, 5, 8),
 (0, 7, 8),
 (4, 5, 7),
 (4, 5, 8),
 (4, 7, 8),
 (5, 7, 8)]

In [131]:
# Constants
POPULATION_SIZE = 50
MUTATION_RATE = 0.1
NUM_GENERATIONS = 6
no_rec = 3


def calculate_semrating(solution):
    semrating = 0
    for i in range(len(solution)):
        for j in range(i + 1, len(solution)):
            semrating += semantic[solution[i]][solution[j]]
    return semrating

def possible_solutions(user_ratings_input, no_rec):
    missing_items = [index for index, rating in enumerate(user_ratings_input) if rating == 0]
    all_recommendations = list(itertools.combinations(missing_items, no_rec))
    return all_recommendations

def generate_initial_population(all_recommendations, semratings):
    total_semrating = sum(semratings)
    probabilities = [semrating / total_semrating for semrating in semratings]
    population = random.choices(all_recommendations, probabilities, k=POPULATION_SIZE)
    return population

def calculate_total_rating(solution):
    total_rating = 0
    for i in range(len(solution)):
        total_rating += user_ratings[solution[i]][i]
    return total_rating

def selection(population, fitness_scores):
    selected_population = []
    total_fitness = sum(fitness_scores)
    probabilities = [fitness_score / total_fitness for fitness_score in fitness_scores]
    for _ in range(POPULATION_SIZE):
        selected_solution = random.choices(population, probabilities)[0]
        selected_population.append(selected_solution)
    return selected_population


def crossover(parent1, parent2):
    crossover_point = random.randint(1, len(parent1) - 1)
    child1 = list(parent1[:crossover_point]) + list(parent2[crossover_point:])
    child2 = list(parent2[:crossover_point]) + list(parent1[crossover_point:])

    child1 = remove_duplicates(child1)
    child2 = remove_duplicates(child2)

    return child1, child2

def mutation(solution, all_recommendations):
    mutated_solution = solution.copy()
    for i in range(len(solution)):
        if random.random() < MUTATION_RATE:
            possible_items = all_recommendations[i % len(all_recommendations)]
            mutated_solution[i] = random.choice(possible_items)

    mutated_solution = remove_duplicates(mutated_solution)

    return mutated_solution


def remove_duplicates(solution):
    seen = set()
    return [x for x in solution if not (x in seen or seen.add(x))]


def genetic_algorithm(user_ratings_input, no_rec):
    all_recommendations = possible_solutions(user_ratings_input, no_rec)
    semratings = [calculate_semrating(solution) for solution in all_recommendations]
    population = generate_initial_population(all_recommendations, semratings)

    for generation in range(NUM_GENERATIONS):
        fitness_scores = [calculate_total_rating(solution) for solution in population]

        selected_population = selection(population, fitness_scores)

        next_population = []
        for _ in range(POPULATION_SIZE // 2):
            parent1, parent2 = random.sample(selected_population, 2)
            child1, child2 = crossover(parent1, parent2)
            mutated_child1 = mutation(child1, all_recommendations)
            mutated_child2 = mutation(child2, all_recommendations)
            next_population.extend([mutated_child1, mutated_child2])

        population = next_population

    best_solution = max(population, key=calculate_total_rating)
    return best_solution


Best Solution:
['Book 8', 'Book 0', 'Book 7']


In [None]:
# Run the genetic algorithm
no_rec = 5  # Number of items in each recommendation list
best_solution = genetic_algorithm(user_ratings_input, no_rec)

# Print the best solution
recommendations = [books[i] for i in best_solution]
print("Best Solution:")
print(recommendations)

### Without added Semantic

In [123]:
# Run CF using GA

import numpy as np
import itertools
import pandas as pd
import random

class CollaborativeFiltering:
    def __init__(self, user_ratings):
        self.user_ratings = user_ratings

    def cosine_similarity(self, a, b):
        return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

    def average_rating(self, user_ratings):
        ratings = [rating for rating in user_ratings if rating != 0]
        return np.mean(ratings)

    def calculate_predicted_rating_aws(self, target_user_ratings, target_item_index, user_ratings):
        average_rating_target_user = self.average_rating(target_user_ratings)
        total_weighted_rating = 0
        total_similarity = 0

        for user_ratings in user_ratings:
            if user_ratings[target_item_index] != 0:
                similarity = self.cosine_similarity(target_user_ratings, user_ratings)
                average_rating_user_j = self.average_rating(user_ratings)
                adjusted_rating = user_ratings[target_item_index] - average_rating_user_j
                total_weighted_rating += similarity * adjusted_rating
                total_similarity += abs(similarity)

        if total_similarity != 0:
            predicted_rating = average_rating_target_user + (total_weighted_rating / total_similarity)
        else:
            predicted_rating = average_rating_target_user

        return predicted_rating

class GeneticAlgorithm:
    def __init__(self, user_ratings_input, user_ratings, no_rec):
        self.user_ratings_input = user_ratings_input
        self.user_ratings = user_ratings
        self.no_rec = no_rec

    def possible_solutions(self):
        missing_items = [index for index, rating in enumerate(self.user_ratings_input) if rating == 0]
        all_recommendations = list(itertools.combinations(missing_items, self.no_rec))
        return all_recommendations

    def calculate_fitness(self, solution):
        total_rating = 0
        cf = CollaborativeFiltering(self.user_ratings)
        for item in solution:
            predicted_rating = cf.calculate_predicted_rating_aws(self.user_ratings_input, item, self.user_ratings)
            total_rating += predicted_rating
        return total_rating

    def selection(self, population, n):
        ranked_population = sorted(population, key=lambda x: self.calculate_fitness(x), reverse=True)
        return ranked_population[:n]

    def crossover(self, parent1, parent2):
        crossover_point = len(parent1) // 2
        child = parent1[:crossover_point] + parent2[crossover_point:]
        return child

    def mutation(self, solution):
        mutation_point = np.random.randint(len(solution))
        missing_items = [index for index, rating in enumerate(self.user_ratings_input) if rating == 0]
        
        new_item = random.choice(missing_items)
        for new_item in solution:
            new_item = random.choice(missing_items)
        solution[mutation_point] = new_item
        
        return solution

    def genetic_algorithm(self, population_size, generations, selection_size):
        all_recommendations = self.possible_solutions()
        population = [list(solution) for solution in all_recommendations]

        for generation in range(generations):
            # Selection
            selected_solutions = self.selection(population, selection_size)

            # Crossover
            children = []
            for i in range(len(selected_solutions)):
                child = self.crossover(selected_solutions[i], selected_solutions[(i + 1) % len(selected_solutions)])
                children.append(child)

            # Mutation
            for i in range(len(children)):
                if np.random.random() < 0.1:  # Mutation probability: 0.1
                    children[i] = self.mutation(children[i])

            # New population
            population = selected_solutions + children

        # Get the best solution from the final population
        best_solution = max(population, key=lambda x: self.calculate_fitness(x))

        return best_solution

# Function to optimize the model's performance (tuning hyperparameters)

class HyperparameterTuner:
    def __init__(self, user_ratings_input, user_ratings, no_rec):
        self.user_ratings_input = user_ratings_input
        self.user_ratings = user_ratings
        self.no_rec = no_rec

    def tune_genetic_algorithm(self, population_sizes, generations, selection_sizes, no_rec):
        results = []
        for pop_size in population_sizes:
            for gen in generations:
                for sel_size in selection_sizes:
                    ga = GeneticAlgorithm(self.user_ratings_input, self.user_ratings, self.no_rec)
                    solution = ga.genetic_algorithm(population_size=pop_size, generations=gen, selection_size=sel_size)
                    score = ga.calculate_fitness(solution)
                    results.append({
                        'Population Size': pop_size,
                        'Generations': gen,
                        'Selection Size': sel_size,
                        'Best Solution': solution,
                        'Score': score
                    })

        df_results = pd.DataFrame(results)
        df_results = df_results.sort_values(by='Score', ascending=False).reset_index(drop=True)
        return df_results

In [134]:
# Hyperparameter tuning
tuner = HyperparameterTuner(user_ratings_input, user_ratings, no_rec)
population_sizes = [10, 20]
generations = [4, 6]
selection_sizes = [2,4]
no_rec = 3
results = tuner.tune_genetic_algorithm(population_sizes, generations, selection_sizes, no_rec)

# Print the results in a DataFrame
print("Hyperparameter Tuning Results:")
print(results)

Hyperparameter Tuning Results:
   Population Size  Generations  Selection Size Best Solution      Score
0               10            4               4     [4, 4, 8]  14.928948
1               20            6               4     [4, 4, 8]  14.928948
2               10            4               2     [4, 5, 8]  14.408806
3               10            6               2     [4, 5, 8]  14.408806
4               10            6               4     [4, 5, 8]  14.408806
5               20            4               2     [4, 5, 8]  14.408806
6               20            4               4     [4, 5, 8]  14.408806
7               20            6               2     [4, 5, 8]  14.408806


In [119]:
missing_items = [index for index, rating in enumerate(user_ratings_input) if rating == 0]
all_recommendations = list(itertools.combinations(missing_items, no_rec))
all_recommendations

[(1, 2, 3, 4),
 (1, 2, 3, 5),
 (1, 2, 3, 6),
 (1, 2, 3, 7),
 (1, 2, 3, 8),
 (1, 2, 4, 5),
 (1, 2, 4, 6),
 (1, 2, 4, 7),
 (1, 2, 4, 8),
 (1, 2, 5, 6),
 (1, 2, 5, 7),
 (1, 2, 5, 8),
 (1, 2, 6, 7),
 (1, 2, 6, 8),
 (1, 2, 7, 8),
 (1, 3, 4, 5),
 (1, 3, 4, 6),
 (1, 3, 4, 7),
 (1, 3, 4, 8),
 (1, 3, 5, 6),
 (1, 3, 5, 7),
 (1, 3, 5, 8),
 (1, 3, 6, 7),
 (1, 3, 6, 8),
 (1, 3, 7, 8),
 (1, 4, 5, 6),
 (1, 4, 5, 7),
 (1, 4, 5, 8),
 (1, 4, 6, 7),
 (1, 4, 6, 8),
 (1, 4, 7, 8),
 (1, 5, 6, 7),
 (1, 5, 6, 8),
 (1, 5, 7, 8),
 (1, 6, 7, 8),
 (2, 3, 4, 5),
 (2, 3, 4, 6),
 (2, 3, 4, 7),
 (2, 3, 4, 8),
 (2, 3, 5, 6),
 (2, 3, 5, 7),
 (2, 3, 5, 8),
 (2, 3, 6, 7),
 (2, 3, 6, 8),
 (2, 3, 7, 8),
 (2, 4, 5, 6),
 (2, 4, 5, 7),
 (2, 4, 5, 8),
 (2, 4, 6, 7),
 (2, 4, 6, 8),
 (2, 4, 7, 8),
 (2, 5, 6, 7),
 (2, 5, 6, 8),
 (2, 5, 7, 8),
 (2, 6, 7, 8),
 (3, 4, 5, 6),
 (3, 4, 5, 7),
 (3, 4, 5, 8),
 (3, 4, 6, 7),
 (3, 4, 6, 8),
 (3, 4, 7, 8),
 (3, 5, 6, 7),
 (3, 5, 6, 8),
 (3, 5, 7, 8),
 (3, 6, 7, 8),
 (4, 5, 6, 7),
 (4, 5, 6,