In [54]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

# Exploring reward funcion options

## **Creating a test framework**

In [85]:
class TestGenerator:
    def __init__(
                self,
                number_of_users_to_generate:int,
                total_views_average:int,
                total_unique_views_average:int,
                total_prompts_created_average:int,
                total_curator_views_average:int,
                average_curator_score:int = 10 ,
                number_of_curator_views:int = 5,
                reward_function: object = None,
                curator_views_compact:bool = True,
                curator_scores_compact:bool = True,
                total_views_compact:bool = True,
                unique_views_compact:bool = True,
                prompts_compact:bool = True,
                compact_factor:float = 0.2,
                extreme_factor:int = 20,
                extreme_perc:float = 0.1,
                curator_scores_extreme:bool = False,
                total_views_extreme:bool = False,
                unique_views_extreme:bool = False,
                prompts_extreme:bool = False,

                ):
        '''
        number_of_users_to_generate: self explanatory
        total_views_average: (average) total number of views from all blocks created by the user
        total_unique_views_average: (average) number of unique viewers from all the blacks generated by the user
        total_prompts_created_average: (average) number of blocks referencing all the user generated blocks (could be sandbox additions too)
        number_of_curator_views_average: (average) number of curators that referenced the user's blocks to their curation (could be sandbox additions too)
        reward_function: function that takes some the parameters above and calculates a final score
        X__compact: defines if the numbers will be cluster around the averages (above) or will be totally spread
        compact_factor: the percentage where the random values will be generate from: average_Value * +- compact_factor
        extreme_factor: factor by what I multiply when values are not compacted to create the range from 
                        [1, averege * max I want for the metric to go]
        extreme_perc: percentage of users that should get an extreme value
        X__extreme: determines for that variable if a specigic percentage of users should get a very extreme value

        '''
        self.compact_factor = compact_factor
        self.number_of_users_to_genereate = number_of_users_to_generate
        self.total_views_average = total_views_average
        self.total_unique_views_average = total_unique_views_average
        self.total_prompts_created_average = total_prompts_created_average
        self.total_views_compact = total_views_compact
        self.unique_views_compact = unique_views_compact
        self.prompts_compact = prompts_compact
        self.total_curator_views_average = total_curator_views_average
        self.number_of_curator_views = number_of_curator_views
        self.average_curator_score = average_curator_score
        self.reward_function = reward_function
        self.extreme_factor = extreme_factor
        self.curator_views_compact = curator_views_compact
        self.curator_scores_compact = curator_scores_compact
        self.simulated_users = None
        self.extreme_perc = extreme_perc
        self.curator_scores_extreme = curator_scores_extreme
        self.total_views_extreme = total_views_extreme
        self.unique_views_extreme = unique_views_extreme
        self.prompts_extreme = prompts_extreme


    def generate_users(self):
        simulated_users = []
        tviews_ext_counter = 0
        uvies_ext_counter = 0
        prompts_ext_counter = 0
        curator_ext_counter = 0

        for x in range(self.number_of_users_to_genereate):
            user_data = {'id': x}

            #generating total views
            if self.total_views_extreme and tviews_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc):
                user_data['total_views'] = self.generate_total_views() * self.extreme_factor
                tviews_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_total_views_outlier'] = True
            else:
                user_data['total_views'] = self.generate_total_views()
                user_data['is_outlier'] = False
                user_data['is_total_views_outlier'] = False
           
            #generating unique views
            if self.unique_views_extreme and uvies_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc):
                user_data['unique_views'] = self.generate_unique_views() * self.extreme_factor
                uvies_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_unique_views_outlier'] = True
            else:
                user_data['unique_views'] = self.generate_unique_views()
                user_data['is_outlier'] = False
                user_data['is_unique_views_outlier'] = False

            #generating number of prompts
            if self.prompts_extreme and prompts_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc):
                user_data['prompts_created'] = self.generate_prompts_creation() * self.extreme_factor
                prompts_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_prompts_outlier'] = True
            else:
                user_data['prompts_created'] = self.generate_prompts_creation()
                user_data['is_outlier'] = False
                user_data['is_prompts_outlier'] = False

            #generating curator view weighted average
            if self.curator_scores_extreme and curator_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc):
                user_data['curator_views_weighted'] = self.generate_curator_views() * self.extreme_factor
                curator_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_curator_views_outlier'] = True
            else:
                user_data['curator_views_weighted'] = self.generate_curator_views()
                user_data['is_outlier'] = False
                user_data['is_curator_views_outlier'] = False

            simulated_users.append(user_data)
        
        self.simulated_users = simulated_users
        
        return simulated_users
    
    def generate_user_with_scores(self, reward_function_weights:list = [0.1, 0.2, 0.3, 0.4]):
        if self.simulated_users == None:
            self.generate_users()
        
        final_data = []
        for user in self.simulated_users:
            score_function_parameters = {
                'parameters_weights' : reward_function_weights,
                **user
            }
            del score_function_parameters["id"]
            del score_function_parameters["is_outlier"]
            del score_function_parameters["is_curator_views_outlier"]
            del score_function_parameters["is_prompts_outlier"]
            del score_function_parameters["is_unique_views_outlier"]
            del score_function_parameters["is_total_views_outlier"]
            

            user_score = self.reward_function(**score_function_parameters)
            user['final_score'] = user_score

            final_data.append(user)

        return final_data
    
    def generate_total_views(self):
        if self.total_views_compact:
            delta = self.total_views_average * self.compact_factor
            return random.randrange(self.total_views_average - delta, self.total_views_average + delta)
        else:
            return  random.randrange( 1 , self.total_views_average * self.extreme_factor)
    
    def generate_unique_views(self):
        if self.unique_views_compact:
            delta = self.total_unique_views_average * self.compact_factor
            return random.randrange(self.total_unique_views_average - delta, self.total_unique_views_average + delta)
        else:
            return  random.randrange( 1 , self.total_unique_views_average * self.extreme_factor)
    
    def generate_prompts_creation(self):
        if self.prompts_compact:
            delta = self.total_prompts_created_average * self.compact_factor
            return random.randrange(self.total_prompts_created_average - delta, self.total_prompts_created_average + delta)
        else:
            return  random.randrange( 1 , self.total_prompts_created_average * self.extreme_factor)
    
    def generate_curator_views(self):
        products = 0
        scores = 0
        for _ in range(self.number_of_curator_views):
            # assigning the curator a score
            if self.curator_scores_compact:
                delta = self.average_curator_score * self.compact_factor
                curator_score = random.randrange(self.average_curator_score - delta, self.average_curator_score + delta)
            else: 
                curator_score = random.randrange( 1 , self.average_curator_score * self.extreme_factor)

            # assigning the number of views for that curator
            if self.curator_views_compact:
                delta = self.total_curator_views_average * self.compact_factor
                curator_view = random.randrange(self.total_curator_views_average - delta, self.total_curator_views_average + delta)
            else: 
                curator_view= random.randrange( 1 , self.total_curator_views_average * self.extreme_factor)
            
            products += curator_score * curator_view
            scores += curator_score
        
        return products / scores # returns weighted average of views by curator score
        

## **Reward Functions**

Rewards functions have always the same parameters:

- **List of weights for each of the variables**
- **total_views** : total number of views for all blocks created by the user
- **unique_views**: total number of unique views generated for all blocks created by the user
- **prompts_cretead**: number of blocks created by the user that were reference from another user (or added to other user sandbox)
- **curator_views_weighted**: weighted average views by score of each curator that viewed the user blocks

### **Linear weighted sum**

In [41]:
def linear_sum(parameters_weights:list,
                total_views:int, 
                unique_views:int,
                prompts_created:int,
                curator_views_weighted:dict,
                ):
    '''
    total_views: total numbers of queries that saw/used that content, 
    unique_views: number of unique users that saw/used the content through the queries,
    prompts_created: number of other user's content created on top of the user content,
    curator_views_weighted: {user_score: total_views}
    * parameters_weights: list of weights for each pameters, range should be [0, 100] and parameter_weights.sum() == 100
                            [ w_total_views, w_unique_view, w_prompts_created, w_views_by_score] in this order
    
    returns: user final score for the epoch
    '''
    # assigning weights
    w_total_views = parameters_weights[0]
    w_unique_view = parameters_weights[1]
    w_prompts_created = parameters_weights[2]
    w_views_by_score = parameters_weights[3]


    final_result = (total_views * w_total_views) \
                    + (unique_views * w_unique_view) \
                    + (prompts_created * w_prompts_created ) + \
                    + (w_views_by_score * curator_views_weighted)

    return final_result

### **Log weighted sum**

In [71]:
def log_sum(parameters_weights:list,
                total_views:int, 
                unique_views:int,
                prompts_created:int,
                curator_views_weighted:dict,
                ):
    '''
    total_views: total numbers of queries that saw/used that content, 
    unique_views: number of unique users that saw/used the content through the queries,
    prompts_created: number of other user's content created on top of the user content,
    curator_views_weighted: {user_score: total_views}
    * parameters_weights: list of weights for each pameters, range should be [0, 100] and parameter_weights.sum() == 100
                            [ w_total_views, w_unique_view, w_prompts_created, w_views_by_score] in this order
    
    returns: user final score for the epoch
    '''
    # assigning weights
    w_total_views = parameters_weights[0]
    w_unique_view = parameters_weights[1]
    w_prompts_created = parameters_weights[2]
    w_views_by_score = parameters_weights[3]


    final_result = np.log(total_views * w_total_views) \
                    + np.log(unique_views * w_unique_view) \
                    + (prompts_created * w_prompts_created ) + \
                    + (w_views_by_score * curator_views_weighted)

    return final_result

## **Simulating**

In [70]:
np.log(2) * .1

0.06931471805599453

To do:

- add function that would add extreme values to a % of users in a specigic paramter
- generate visualizations for the scores

In [58]:
np.log(10)

2.302585092994046

#### Similar users

**Linear weighted**

In [86]:
test_parameters = {
    'number_of_users_to_generate': 10,
    'total_views_average':100,
    'total_unique_views_average':30,
    'total_prompts_created_average':10,
    'total_curator_views_average': 15,
    'average_curator_score': 10 ,
    'number_of_curator_views':  5,
    'reward_function': linear_sum,
    'curator_views_compact':True,
    'curator_scores_compact':True,
    'total_views_compact':True,
    'unique_views_compact':True,
    'prompts_compact':True,
    'compact_factor':0.2,
    'extreme_factor':20,
    'extreme_perc': 0.1,
    'curator_scores_extreme':False,
    'total_views_extreme': True,
    'unique_views_extreme': False,
    'prompts_extreme': False,
}

sim_test = TestGenerator(**test_parameters)

sim_dt = pd.DataFrame(sim_test.generate_user_with_scores())

sim_dt.sort_values('final_score', ascending=False)

Unnamed: 0,id,total_views,is_outlier,is_total_views_outlier,unique_views,is_unique_views_outlier,prompts_created,is_prompts_outlier,curator_views_weighted,is_curator_views_outlier,final_score
0,0,2160,False,True,33,False,9,False,14.142857,False,230.957143
2,2,116,False,False,26,False,9,False,15.769231,False,25.807692
4,4,117,False,False,31,False,8,False,13.369565,False,25.647826
8,8,113,False,False,27,False,8,False,14.285714,False,24.814286
7,7,95,False,False,32,False,9,False,15.16,False,24.664
3,3,105,False,False,28,False,9,False,13.645833,False,24.258333
1,1,104,False,False,28,False,8,False,14.058824,False,24.023529
5,5,89,False,False,29,False,9,False,16.0,False,23.8
9,9,80,False,False,33,False,11,False,14.68,False,23.772
6,6,80,False,False,35,False,8,False,14.638298,False,23.255319


**Log weighted**

In [87]:
test_parameters = {
    'number_of_users_to_generate': 50,
    'total_views_average':100,
    'total_unique_views_average':30,
    'total_prompts_created_average':10,
    'total_curator_views_average': 15,
    'average_curator_score': 10 ,
    'number_of_curator_views':  5,
    'reward_function': log_sum,
    'curator_views_compact':True,
    'curator_scores_compact':True,
    'total_views_compact':True,
    'unique_views_compact':True,
    'prompts_compact':False,
    'compact_factor':0.2,
    'extreme_factor':20,
    'extreme_perc': 0.1,
    'curator_scores_extreme':False,
    'total_views_extreme': True,
    'unique_views_extreme': False,
    'prompts_extreme': False,
}

sim_test = TestGenerator(**test_parameters)

sim_dt = pd.DataFrame(sim_test.generate_user_with_scores())

sim_dt.sort_values('final_score', ascending=False)

Unnamed: 0,id,total_views,is_outlier,is_total_views_outlier,unique_views,is_unique_views_outlier,prompts_created,is_prompts_outlier,curator_views_weighted,is_curator_views_outlier,final_score
36,36,95,False,False,30,False,199,False,15.530612,False,69.955296
18,18,88,False,False,27,False,196,False,14.68,False,68.533151
49,49,108,False,False,32,False,189,False,14.0,False,66.535844
28,28,89,False,False,25,False,184,False,14.862745,False,64.940587
6,6,81,False,False,31,False,184,False,14.125,False,64.766413
3,3,1720,False,True,28,False,170,False,14.755102,False,63.772302
44,44,88,False,False,24,False,177,False,15.26,False,62.947368
32,32,107,False,False,32,False,170,False,15.288889,False,61.342097
27,27,88,False,False,28,False,167,False,15.044444,False,60.015296
14,14,116,False,False,30,False,160,False,14.953488,False,58.22416


#### Testing extremes 

#### Similar user scores

#### Similar user scores

#### Similar user scores