In [None]:
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam as AdamLegacy
from collections import defaultdict
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
import warnings
from scipy.spatial.distance import cosine
import seaborn as sns
from keras.regularizers import l1, l2

In [None]:
def add_average_ratings(movies, ratings):
    """
    Calcula la valoración media para cada película y añade esta información a la tabla de películas.
    """
    average_ratings = ratings.groupby('MovieID')['Rating'].mean()
    movies = movies.merge(average_ratings, on='MovieID', how='left')
    movies = movies.rename(columns={"Rating": "AverageRating"})
    movies['AverageRating'].fillna(2.5, inplace=True)
    return movies 


In [None]:
def load_movielens_1m_from_files():
    """
    Cargar los datos en 3 tablas distintas.
    """
    movies = pd.read_csv(
        "movies.dat",
        sep="::",
        header=None,
        names=["MovieID", "Title", "Genres"],
        engine="python",
        encoding="latin-1",
    )

    ratings = pd.read_csv(
        "ratings.dat",
        sep="::",
        header=None,
        names=["UserID", "MovieID", "Rating", "Timestamp"],
        engine="python",
        encoding="latin-1",
    )

    users = pd.read_csv(
        "users.dat",
        sep="::",
        header=None,
        names=["UserID", "Gender", "Age", "Occupation", "Zip-code"],
        engine="python",
        encoding="latin-1",
    )
    movies = add_average_ratings(movies, ratings)
    return movies, ratings, users


In [None]:
def plot_expert_weights(weights_history):
  """
  Mostrar los pesos de cada experto al final de la simulación.
  """
  plt.plot(weights_history[:, 0], label='Epsilon-Greedy (e=0.95)', color='blue')
  plt.plot(weights_history[:, 1], label='Red Neuronal', color='orange')
  plt.plot(weights_history[:, 2], label='Filtro Colaborativo basado en usuarios', color='green')  
  plt.plot(weights_history[:, 3], label='Mejor brazo', color='red')  


  plt.legend()
  plt.xlabel('Ronda')
  plt.ylabel('Pesos de los expertos')
  plt.show()

In [None]:
def plot_rewards(real_rewards, fake_rewards):
  """
  Comprobamos a ver los resultados de los tipos de recompensas.
  """
  plt.figure(figsize=(10,5), dpi=100)
  plt.plot(real_rewards, label='Real Rewards', color='blue')
  plt.legend()
  plt.xlabel('Ronda')
  plt.ylabel('Real Rewards')
  plt.show()

  plt.figure(figsize=(10,5), dpi=100)
  plt.plot(fake_rewards, label='Fake Rewards', color='red')
  plt.legend()
  plt.xlabel('Ronda')
  plt.ylabel('Fake Rewards')
  plt.show()
  plot_average_rewards(real_rewards, fake_rewards)


def plot_average_rewards(real_rewards, fake_rewards):
  """
  Comprobamos a ver los resultados de las recompensas medias.
  """
  real_avg_rewards = np.cumsum(real_rewards) / np.arange(1, len(real_rewards) + 1)

  plt.figure(figsize=(10,5), dpi=100)
  plt.plot(real_avg_rewards, label='Recompensa media', color='blue')
  plt.legend()
  plt.xlabel('Ronda')
  plt.ylabel('Recompensas medias')
  plt.show()


def plot_reward_histogram(realrewards, fakerewards):
  """ 
  Histograma con las recompensas obtenidas.
  """
  plt.figure()
  plt.hist(realrewards, bins=10, alpha=0.5, label="Recompensas")
  plt.xlabel('Recompensa')
  plt.ylabel('Frecuencia')
  plt.title('Histograma de recompensas')
  plt.legend(loc='upper right')
  plt.show()

In [None]:
def plot_violin_rewards_by_category(reward_data):
    plt.figure(figsize=(18, 10))
    sns.violinplot(x='Category', y='Reward', data=reward_data)
    plt.title('Gráfico de violín de recompensas por brazo')
    plt.xlabel('Categoría')
    plt.ylabel('Recompensa')
    plt.show()

def plot_scatterplot_age_vs_rewards(reward_data):
    plt.figure(figsize=(18, 10))
    reward_data['Age'] = reward_data['Age'].astype(int)
    sns.scatterplot(x='Age', y='Reward', data=reward_data)
    plt.title('Scatterplot de Edad vs Recompensa')
    plt.xlabel('Edad')
    plt.ylabel('Recompensa')
    x_ticks = sorted(reward_data['Age'].unique())
    plt.xticks(ticks=x_ticks, labels=x_ticks)
    plt.show()



def plot_stacked_area(reward_by_context_category):
    reward_by_context_category.plot(kind='area', stacked=True, figsize=(10, 7))
    plt.title('Recomendaciones de películas a lo largo del tiempo')
    plt.ylabel('Número de recomendaciones')
    plt.xlabel('Tiempo')
    plt.show()

def plot_contour_chart(reward_by_context_category):
    sns.kdeplot(data=reward_by_context_category, x="Age", y="Reward", fill=True)
    plt.title('Gráfico de contorno de Edad vs Recompensa')
    plt.show()


def plot_boxplot_rewards_by_gender(reward_by_context_category):
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Gender', y='Reward', data=reward_by_context_category)
    plt.title('Boxplot de Recompensas por Género')
    plt.show()

def plot_barplot_avg_rewards_by_age(reward_by_context_category):
    avg_rewards = reward_by_context_category.groupby('Age')['Reward'].mean().reset_index()
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Edad', y='Recompensa', data=avg_rewards)
    plt.title('Barplot de Recompensas Medias por Grupo de Edad')
    plt.show()

def plot_densityplot_rewards_by_gender(reward_by_context_category):
    plt.figure(figsize=(10, 6))
    for gender in reward_by_context_category['Gender'].unique():
        sns.kdeplot(reward_by_context_category[reward_by_context_category['Gender']==gender]['Reward'], label=gender)
    plt.title('Densityplot de Recompensas por Género')
    plt.legend()
    plt.show()

In [None]:
def plot_cumulative_rewards(cumulative_rewards):
  """
  Gráfico auxiliar para pintar las recompensas acumuladas.
  """
  plt.figure()
  plt.plot(cumulative_rewards)
  plt.xlabel("Ronda")
  plt.ylabel("Recompensa acumulativa")
  plt.title("Evolución de la recompensa acumulativa a lo largo del tiempo")


def plot_heatmap(reward_by_context_category):
    """
    Mapa de calor de las recompensas-contextos.
    """
    age_ranges = reward_by_context_category['Age'].unique()

    for age_range in age_ranges:
        heatmap_data_age = reward_by_context_category[reward_by_context_category['Age'] == age_range]
        heatmap_data = heatmap_data_age.pivot_table(values='Reward', index=['Gender', 'Occupation'], columns='Category')

        plt.figure(figsize=(18, 10))
        sns.heatmap(heatmap_data, annot=True, cmap='coolwarm', linewidths=.5, fmt=".2f", cbar_kws={'label': 'Recompensa media'})
        plt.title(f'Mapa de calor de recompensas por categorías y contexto (Edad: {age_range})')
        plt.xlabel('Categoría')
        plt.ylabel('Contexto (Género, Ocupación)')
        plt.show()

In [None]:
def create_user_subset(users, num_userstrain, num_usersexp4):
    """
    Crea dos subconjuntos de usuarios a partir de la tabla de usuarios.
    """
    shuffled_users = users.sample(frac=1).reset_index(drop=True)  
    train_users = shuffled_users.iloc[:num_userstrain].reset_index(drop=True)  
    exp4_users = shuffled_users.iloc[num_userstrain:num_userstrain + num_usersexp4].reset_index(drop=True)  

    return train_users, exp4_users


In [None]:
class Hedge:
    def __init__(self, num_experts, eta):
        self.num_experts = num_experts
        self.eta = eta
        self.weights = np.ones(num_experts)
        self.probabilities = self.weights / np.sum(self.weights)

    def get_probabilities(self):
        return self.probabilities

    def update_weights(self, costs):
        costs = np.where(np.isnan(costs), 0, costs)

        for e in range(self.num_experts):
            self.weights[e] *= (1 - self.eta) ** costs[e]
        
        min_weight = np.min(self.weights)
        if min_weight < 1e-50:
            self.weights = np.where(self.weights == 0, 1e-20, self.weights)
            self.weights = self.weights / 1e-20

        self.weights = np.where(np.isnan(self.weights), 0.0001, self.weights) 
       
        self.probabilities = self.weights / (np.sum(self.weights)+ 1e-10)

        num_nan_values = np.count_nonzero(np.isnan(self.probabilities))
        if num_nan_values > 0:
          non_nan_prob_sum = np.sum(self.probabilities[~np.isnan(self.probabilities)])
          correction = (1 - non_nan_prob_sum) / num_nan_values
          self.probabilities = np.where(np.isnan(self.probabilities), 0.0001, self.probabilities + correction)
        self.probabilities = self.probabilities / (np.sum(self.probabilities))


In [None]:
def add_random_reward(movie_avg_rating):
    """
    Añadir aleatoriedad a la recompensa.
    """
    reward = movie_avg_rating + np.random.normal(-0.5, 0.5)
    reward = reward.clip(1, 5)
    return reward

In [None]:
def preprocess_user_data(user):
  """
  Preprocesar el contexto.
  """
  user["Gender"] = 1 if user["Gender"] == 'M' else 0

  user["Age"] = user["Age"] 
  user["Occupation"] = user["Occupation"] 

  context = user[["Gender", "Age", "Occupation"]].values.astype(np.float32)
  return context

In [None]:
def get_reward(user, chosen_category, movies, ratings, user_movies_dict, bbdd, user_moviesrewards_dict):
  """
  Calcula la recompensa o de la bbdd rating o aleatoria. Falta meter el sesgo.
  """
  user_movies = ratings[ratings["UserID"] == user["UserID"]]
  user_category_movies = user_movies[user_movies["MovieID"].isin(movies[movies["Genres"].str.contains('|'.join(chosen_category))]["MovieID"])]
  reward = -1
  categories = []
  chosen_movie = None
  if not user_category_movies.empty:
      ind = 0
      while ind < len(user_category_movies):
          chosen_movie = user_category_movies.iloc[ind] 
          if chosen_movie["MovieID"].item() not in user_movies_dict[user["UserID"]]:
              reward = chosen_movie["Rating"] / 5
              bbdd = True
              movie_id = chosen_movie["MovieID"]
              movie_row = movies.loc[movies['MovieID'] == movie_id]
              categories = movie_row['Genres'].values[0].split('|')
              break
          ind += 1
  if reward == -1 or user_category_movies.empty or chosen_movie is None: 
    category_movies = movies[movies["Genres"] == (chosen_category)]
    user_unwatched_movies = category_movies[~category_movies["MovieID"].isin(user_movies_dict[user["UserID"]])]
    if not user_unwatched_movies.empty:
        chosen_movie = user_unwatched_movies.sample().iloc[0]
        reward = add_random_reward(chosen_movie["AverageRating"]) / 5
  
  category_movies = movies[movies["Genres"] == (chosen_category)]
  user_unwatched_movies = category_movies[~category_movies["MovieID"].isin(user_movies_dict[user["UserID"]])]
    
  

  if chosen_movie is not None and reward != -1:
    user_movies_dict[user["UserID"]] = np.append(user_movies_dict[user["UserID"]], chosen_movie["MovieID"])
    user_moviesrewards_dict[user["UserID"]].append({"MovieID": chosen_movie["MovieID"], "Reward": reward})
    


  return reward, user_movies_dict, bbdd, categories

In [None]:
class Exp4:
    def __init__(self, experts, eta, gamma, num_arms):
        self.experts = experts
        self.num_experts = len(experts)
        self.chosenArms = np.zeros(len(experts)) 
        self.num_arms = num_arms
        self.hedge = Hedge(self.num_experts, eta)
        self.gamma = gamma

    def sampleExpert(self): 
        et = np.random.choice(self.num_experts, p=self.hedge.get_probabilities())
        
        if np.random.random() < (1 - self.gamma):
            at = self.chosenArms[et] 
        else:
            at = np.random.randint(self.num_arms)
        
        return at

    def sampleArmExperts(self, context):
        for e in range (self.num_experts):
            self.chosenArms[e] = self.experts[e].select_arm(context)

    def update(self, reward, at, context):
        cost = 1 - reward 
        fake_costs = np.zeros(self.num_experts) 
        pt = 0  
        for e in range(self.num_experts):
            pt += self.experts[e].get_arm_probability(at, context) * self.hedge.get_probabilities()[e]

        for e in range(self.num_experts):
            if at == self.chosenArms[e]:
              if np.isnan(pt):
                pt = 0.1
              fake_costs[e] = cost / (pt+1e-10)               
        self.hedge.update_weights(fake_costs)

    def get_weights(self):
        return self.hedge.get_probabilities()


In [None]:
class EpsilonGreedy:
    def __init__(self, epsilon, num_arms):
        self.epsilon = epsilon
        self.num_arms = num_arms
        self.counts = np.zeros(num_arms)
        self.values = np.zeros(num_arms)
        
    def select_arm(self, context):
        if np.random.random() > self.epsilon:
            return np.argmax(self.values)
        else:
            return np.random.randint(self.num_arms)
        
    def update(self, context, arm, reward):
        self.counts[arm] += 1
        n = self.counts[arm]
        value = self.values[arm]
        new_value = ((n - 1) / n) * value + (1 / n) * reward
        self.values[arm] = new_value
        
    def get_arm_probabilities(self, context):
        return self.values

    def get_arm_probability(self, arm, context):
      total_counts = np.sum(self.counts)
      if self.epsilon == 0: 
        if arm == np.argmax(self.values):
          return 1
        else:
          return 0
      else:
        if total_counts == 0:
          return np.ones(self.num_arms) / self.num_arms
        if arm == np.argmax(self.values):
          explote = 1
        else:
          explote = 0
        explore = (self.epsilon/self.num_arms)
        return explote*(1-self.epsilon)+explore

In [None]:
class NeuralNetworkExpert:
    def __init__(self, num_arms, input_shape=(3, ), learning_rate=0.05):
        self.num_arms = num_arms
        self.model = self.build_model(input_shape, learning_rate)

    def build_model(self, input_shape, learning_rate):
        model = Sequential()
        model.add(Dense(120, activation='relu', input_shape=input_shape, kernel_regularizer=l2(0.015)))
        model.add(Dense(70, activation='relu', kernel_regularizer=l2(0.015)))
        model.add(Dense(self.num_arms, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer=AdamLegacy(learning_rate=learning_rate))
        return model
    def get_arm_probability(self, arm, context):
        probabilities = self.get_arm_probabilitiess(context)
        if np.argmax(probabilities) == arm:
          return 1
        else:
          return 0 

    def get_arm_probabilitiess(self, context):
        context = context.reshape(1, -1)
        return self.model.predict(context, verbose=0)[0]
        

    def select_arm(self, context):
        probabilities = self.get_arm_probabilitiess(context)
        
        return np.argmax(probabilities)

    def update(self, X_history, chosen_arm_history, reward_history, batch_size=32, epochs=170):
        X_train = []
        y_train = []
        
        for i in range(len(X_history)):
            X = X_history[i]
            chosen_arms = chosen_arm_history[i]
            reward = reward_history[i]
            
            for chosen_arm in chosen_arms: 
                X_train.append(X)
                y_weighted = np.zeros(self.num_arms)
                y_weighted[chosen_arm] = reward
                y_weighted /= np.sum(y_weighted)
                y_train.append(y_weighted)
                
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        
        self.model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)
         




In [None]:
def similarity(u, v):
    return 1 - cosine(u, v)

class NeighborhoodBasedCollaborativeFiltering:
    def __init__(self, n_arms, k_nearest=5):
        self.n_arms = n_arms
        self.k_nearest = k_nearest
        self.contexts = []
        self.chosen_arms = []
        self.rewards = []

    def select_arm(self, context):
      if not self.contexts:
          return np.random.choice(self.n_arms)

      similarities = [similarity(context, c) for c in self.contexts]
      nearest_indices = np.argsort(similarities)[-self.k_nearest:]

      arm_rewards = np.zeros(self.n_arms)
      arm_counts = np.zeros(self.n_arms)

      for index in nearest_indices:
          chosen_arms_list = self.chosen_arms[index]
          reward = self.rewards[index]
          
          if chosen_arms_list and not isinstance(chosen_arms_list[0], list):
              chosen_arms_list = [chosen_arms_list]
          for chosen_arms in chosen_arms_list:
            for arm in chosen_arms:
              arm_rewards[arm] += reward
              arm_counts[arm] += 1

      arm_counts[arm_counts == 0] = 1
      arm_values = arm_rewards / arm_counts
      return np.argmax(arm_values)

    
    
  
    
    def update(self, X_history, chosen_arm_history, reward_history):
        self.contexts.extend(X_history)
        self.chosen_arms.extend(chosen_arm_history)
        self.rewards.extend(reward_history)
      
    def get_arm_probability(self, arm, context):
      if not self.contexts:
          return 1 / self.n_arms

      similarities = [similarity(context, c) for c in self.contexts]
      nearest_indices = np.argsort(similarities)[-self.k_nearest:]

      arm_rewards = np.zeros(self.n_arms)
      arm_counts = np.zeros(self.n_arms)

      for index in nearest_indices:
          chosen_arms_list = self.chosen_arms[index]
          reward = self.rewards[index]

          if chosen_arms_list and not isinstance(chosen_arms_list[0], list):
              chosen_arms_list = [chosen_arms_list]
          
          for chosen_arm in chosen_arms_list:
              if arm in chosen_arm:
                  arm_rewards[arm] += reward
                  arm_counts[arm] += 1

      arm_counts[arm_counts == 0] = 1
      arm_values = arm_rewards / arm_counts
      arm_values[np.isnan(arm_values)] = 0 
      total_value = np.sum(arm_values)
      if np.argmax(arm_values[arm]):
        return 1
      else:
         return 0
    


In [None]:
def createExperts(num_arms):
  """
  Se crea la lista de expertos.
  """
  return [EpsilonGreedy(0.95, num_arms)]

In [None]:
def init_simulation(movies, users, perc_train):
  """
  Se preparan ciertas variables de la simulacion
  """
  movie_genres = movies["Genres"].apply(lambda x: x.split("|")) 
  categories = np.unique(np.concatenate(movie_genres)) 
  genre_dict = {category: num for num, category in enumerate(categories)}
  total_users = len(users)
  num_userstrain = int(total_users * perc_train)
  num_usersexp4 = total_users - num_userstrain

  usersubset1, usersubset2 = create_user_subset(users, num_userstrain, num_usersexp4)
  
  num_arms = len(categories)
  experts = createExperts(num_arms) 

  user_movies_dict = np.empty(len(users), dtype=object)
  user_movies_dict[:] = [[] for _ in range(len(users))]
  user_movies_dict = dict(zip(users["UserID"], user_movies_dict))
  return usersubset1, usersubset2, num_arms, categories, user_movies_dict, experts, genre_dict

In [None]:
def first_part_simulation(user_movies_dict, num_arms, categories, user_subset, genre_dict, roundstraining):
  """
  Entrenamiento de políticas.
  """
  experts = createExperts(num_arms)
  user_moviesrewards_dict = {user: [] for user in users["UserID"]}

  context_history = []
  chosen_arm_history = []
  chosen_arms_history = []
  reward_history = []
  j = 0
  i = 0
  
  greedy = EpsilonGreedy(0, num_arms)
  num_rounds = roundstraining * len(user_subset)
  while i < num_rounds:
      if j == (len(user_subset)-1):
        j = 0
      user = user_subset.iloc[j % num_rounds]
      context = preprocess_user_data(user[["Gender", "Age", "Occupation"]])
            
      for e in experts:
        chosen_arm = e.select_arm(context)
        chosen_category = categories[chosen_arm]
        bbdd = False
        reward, user_movies_dict, bbdd, categoriesel = get_reward(user, chosen_category, movies, ratings, user_movies_dict, bbdd, user_moviesrewards_dict)

        genres_nums = [genre_dict[genre] for genre in categoriesel]
        e.update(context, int(chosen_arm), reward)
        if bbdd == True: 
          for genre in genres_nums:
            greedy.update(context, int(genre), reward)
          chosen_arms_history.append(genres_nums)
          context_history.append(context)
          chosen_arm_history.append(genres_nums)
          reward_history.append(reward)
        j +=1
        i += 1
 
 
  neigh = NeighborhoodBasedCollaborativeFiltering(num_arms)
  neigh.update(context_history, chosen_arm_history, reward_history)
  nn = NeuralNetworkExpert (num_arms)
  nn.update(context_history, chosen_arm_history, reward_history)
  experts.append(nn) 
  experts.append(neigh)
  experts.append(greedy)
  return experts, user_movies_dict

In [None]:
def second_part_simulation(user_movies_dict, exp4, user_subset, categories, genre_dict):
  """
  Esta es la segunda parte de la simulación, en la que los expertos ya han sido entrenados y el objetivo 
  es probar como exp4 sabe discernir entre la opinión de expertos.
  """ 
  user_moviesrewards_dict = {user: [] for user in users["UserID"]}

  weights_history = []
  realrewards = []
  fakerewards = []
  cumulative_rewards = [0] 
  reward_data = pd.DataFrame(columns=['Gender', 'Age', 'Occupation', 'Category', 'Reward'])
  reward_dataFaketoo = pd.DataFrame(columns=['Gender', 'Age', 'Occupation', 'Category', 'Reward'])

  j = 0
  i = 0
  num_rounds = len(user_subset) * 4
  while i < num_rounds:
      if j == (len(user_subset)-1):
        j = 0
      user = user_subset.iloc[j % num_rounds]
      context = preprocess_user_data(user[["Gender", "Age", "Occupation"]])

      exp4.sampleArmExperts(context)
      chosen_arm = int(exp4.sampleExpert())
      chosen_category = categories[chosen_arm]
      
      bbdd = False
      j += 1
      i += 1
      reward, user_movies_dict, bbdd,categoriesel = get_reward(user, chosen_category, movies, ratings, user_movies_dict, bbdd, user_moviesrewards_dict)
      genres_nums = [genre_dict[genre] for genre in categoriesel]
          

      if bbdd == True:
        exp4.update(reward, chosen_arm, context)
        weights_history.append(exp4.get_weights())
        realrewards.append(reward)
       
        for genre in genres_nums:
          reward_data = pd.concat([reward_data, pd.DataFrame({'Gender': [user['Gender']],
                                                   'Age': [user['Age']],
                                                   'Occupation': [user['Occupation']],
                                                   'Category': [categories[genre]],
                                                   'Reward': [reward]})], ignore_index=True)

      else:
        fakerewards.append(reward)
        cumulative_rewards.append(cumulative_rewards[-1] + reward)
        reward_dataFaketoo = pd.concat([reward_dataFaketoo, pd.DataFrame({'Gender': [user['Gender']],
                                                                   'Age': [user['Age']],
                                                                   'Occupation': [user['Occupation']],
                                                                   'Category': [chosen_category],
                                                                   'Reward': [reward]})], ignore_index=True)

  return np.array(weights_history), np.array(realrewards), np.array(fakerewards), np.array(cumulative_rewards), reward_data, reward_dataFaketoo

In [None]:
def run_simulation(movies, ratings, users, perc_train, roundstraining):
    """
    Ejecuta la simulación del algoritmo UCB1 con un número específico de usuarios.
    """
    user_subset1, user_subset2, num_arms, categories, user_movies_dict, experts, genre_dict = init_simulation(movies, users, perc_train)
    experts, user_movies_dict = first_part_simulation(user_movies_dict, num_arms, categories, user_subset1, genre_dict, roundstraining)
    eta = 0.05
    gamma = 0 
    exp4 = Exp4(experts, eta, gamma, num_arms)
    weights_history, realrewards, fakerewards, cumulative_rewards, reward_data, reward_dataFaketoo = second_part_simulation(user_movies_dict, exp4, user_subset2, categories, genre_dict)
    reward_by_context_category = reward_data.groupby(['Gender', 'Age', 'Occupation', 'Category']).mean().reset_index()
    reward_by_context_categoryFaketoo = reward_data.groupby(['Gender', 'Age', 'Occupation', 'Category']).mean().reset_index()
    plot_expert_weights(weights_history)
    print("Recompensa media: ", np.mean(realrewards) )
    plot_average_rewards(realrewards, fakerewards)
    plot_reward_histogram(realrewards, fakerewards)
    plot_heatmap(reward_by_context_category)
    plot_scatterplot_age_vs_rewards(reward_by_context_category)
    plot_violin_rewards_by_category(reward_by_context_category)
    plot_stacked_area(reward_by_context_category)
    plot_contour_chart(reward_by_context_category)

    plot_boxplot_rewards_by_gender(reward_by_context_category)
    plot_densityplot_rewards_by_gender(reward_by_context_category)

In [None]:
movies, ratings, users =  load_movielens_1m_from_files()
size = 1
perc_train = 0.8
roundstraining= 4

num_users = len(users)
subset_size = int(num_users * size)
subset_users = users.iloc[:subset_size].reset_index(drop=True)

run_simulation(movies, ratings, subset_users, perc_train, roundstraining)

