In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

# Exploring reward funcion options

## **Creating a test framework**

In [21]:
class TestGenerator:
    def __init__(
                self,
                number_of_users_to_generate:int,
                total_views_average:int,
                total_unique_views_average:int,
                total_prompts_created_average:int,
                total_curator_views_average:int,
                average_curator_score:int = 10 ,
                number_of_curator_views:int = 5,
                reward_function: object = None,
                curator_views_compact:bool = True,
                curator_scores_compact:bool = True,
                total_views_compact:bool = True,
                unique_views_compact:bool = True,
                prompts_compact:bool = True,
                compact_factor:float = 0.2,
                extreme_factor:int = 20,
                extreme_perc:float = 0.1,
                curator_scores_extreme:bool = False,
                total_views_extreme:bool = False,
                unique_views_extreme:bool = False,
                prompts_extreme:bool = False,

                ):
        '''
        number_of_users_to_generate: self explanatory
        total_views_average: (average) total number of views from all blocks created by the user
        total_unique_views_average: (average) number of unique viewers from all the blacks generated by the user
        total_prompts_created_average: (average) number of blocks referencing all the user generated blocks (could be sandbox additions too)
        number_of_curator_views_average: (average) number of curators that referenced the user's blocks to their curation (could be sandbox additions too)
        reward_function: function that takes some the parameters above and calculates a final score
        X__compact: defines if the numbers will be cluster around the averages (above) or will be totally spread
        compact_factor: the percentage where the random values will be generate from: average_Value * +- compact_factor
        extreme_factor: factor by what I multiply when values are not compacted to create the range from 
                        [1, averege * max I want for the metric to go]
        extreme_perc: percentage of users that should get an extreme value
        X__extreme: determines for that variable if a specigic percentage of users should get a very extreme value

        '''
        self.compact_factor = compact_factor
        self.number_of_users_to_genereate = number_of_users_to_generate
        self.total_views_average = total_views_average
        self.total_unique_views_average = total_unique_views_average
        self.total_prompts_created_average = total_prompts_created_average
        self.total_views_compact = total_views_compact
        self.unique_views_compact = unique_views_compact
        self.prompts_compact = prompts_compact
        self.total_curator_views_average = total_curator_views_average
        self.number_of_curator_views = number_of_curator_views
        self.average_curator_score = average_curator_score
        self.reward_function = reward_function
        self.extreme_factor = extreme_factor
        self.curator_views_compact = curator_views_compact
        self.curator_scores_compact = curator_scores_compact
        self.simulated_users = None
        self.extreme_perc = extreme_perc
        self.curator_scores_extreme = curator_scores_extreme
        self.total_views_extreme = total_views_extreme
        self.unique_views_extreme = unique_views_extreme
        self.prompts_extreme = prompts_extreme


    def generate_users(self):
        simulated_users = []
        tviews_ext_counter = 0
        uvies_ext_counter = 0
        prompts_ext_counter = 0
        curator_ext_counter = 0
        can_distribued_extreme_value = True  #this created a randomness to extremes, otherwise all possible extremes goes to the first user only

        for x in range(self.number_of_users_to_genereate):
            user_data = {'id': x}

            #initializing flag variables
            user_data['is_outlier'] = False
            user_data['is_total_views_outlier'] = False
            user_data['is_unique_views_outlier'] = False
            user_data['is_prompts_outlier'] = False
            user_data['is_curator_views_outlier'] = False

            #generating total views
            if self.total_views_extreme and \
                tviews_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc) and \
                can_distribued_extreme_value:
                user_data['total_views'] = self.generate_total_views() * self.extreme_factor
                tviews_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_total_views_outlier'] = True
                can_distribued_extreme_value = False
            else:
                user_data['total_views'] = self.generate_total_views()
           
           
            #generating unique views
            if self.unique_views_extreme and \
                uvies_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc) and \
                can_distribued_extreme_value:
                user_data['unique_views'] = self.generate_unique_views() * self.extreme_factor
                uvies_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_unique_views_outlier'] = True
                can_distribued_extreme_value = False
            else:
                user_data['unique_views'] = self.generate_unique_views()
     

            #generating number of prompts
            if self.prompts_extreme and \
                prompts_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc) and\
                can_distribued_extreme_value:
                user_data['prompts_created'] = self.generate_prompts_creation() * self.extreme_factor
                prompts_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_prompts_outlier'] = True
                can_distribued_extreme_value = False
            else:
                user_data['prompts_created'] = self.generate_prompts_creation()
     

            #generating curator view weighted average
            if self.curator_scores_extreme and \
                curator_ext_counter < (self.number_of_users_to_genereate * self.extreme_perc) and \
                can_distribued_extreme_value:
                user_data['curator_views_weighted'] = self.generate_curator_views() * self.extreme_factor
                curator_ext_counter += 1
                user_data['is_outlier'] = True
                user_data['is_curator_views_outlier'] = True
                can_distribued_extreme_value = False
            else:
                user_data['curator_views_weighted'] = self.generate_curator_views()
         
            can_distribued_extreme_value = True
            simulated_users.append(user_data)
        
        self.simulated_users = simulated_users
        
        return simulated_users
    
    def generate_user_with_scores(self, reward_function_weights:list = [0.1, 0.2, 0.3, 0.4]):
        if self.simulated_users == None:
            self.generate_users()
        
        final_data = []
        for user in self.simulated_users:
            score_function_parameters = {
                'parameters_weights' : reward_function_weights,
                **user
            }
            del score_function_parameters["id"]
            del score_function_parameters["is_outlier"]
            del score_function_parameters["is_curator_views_outlier"]
            del score_function_parameters["is_prompts_outlier"]
            del score_function_parameters["is_unique_views_outlier"]
            del score_function_parameters["is_total_views_outlier"]
            

            user_score, score_shares = self.reward_function(**score_function_parameters)
            user['final_score'] = user_score
            user['total_views_share'] = score_shares[0]
            user['unique_share'] = score_shares[1]
            user['prompts_share'] = score_shares[2]
            user['curators_share'] = score_shares[3]

            final_data.append(user)

        return final_data
    
    def generate_total_views(self):
        if self.total_views_compact:
            delta = self.total_views_average * self.compact_factor
            return random.randrange(self.total_views_average - delta, self.total_views_average + delta)
        else:
            return  random.randrange( 1 , self.total_views_average * self.extreme_factor)
    
    def generate_unique_views(self):
        if self.unique_views_compact:
            delta = self.total_unique_views_average * self.compact_factor
            return random.randrange(self.total_unique_views_average - delta, self.total_unique_views_average + delta)
        else:
            return  random.randrange( 1 , self.total_unique_views_average * self.extreme_factor)
    
    def generate_prompts_creation(self):
        if self.prompts_compact:
            delta = self.total_prompts_created_average * self.compact_factor
            return random.randrange(self.total_prompts_created_average - delta, self.total_prompts_created_average + delta)
        else:
            return  random.randrange( 1 , self.total_prompts_created_average * self.extreme_factor)
    
    def generate_curator_views(self):
        products = 0
        scores = 0
        for _ in range(self.number_of_curator_views):
            # assigning the curator a score
            if self.curator_scores_compact:
                delta = self.average_curator_score * self.compact_factor
                curator_score = random.randrange(self.average_curator_score - delta, self.average_curator_score + delta)
            else: 
                curator_score = random.randrange( 1 , self.average_curator_score * self.extreme_factor)

            # assigning the number of views for that curator
            if self.curator_views_compact:
                delta = self.total_curator_views_average * self.compact_factor
                curator_view = random.randrange(self.total_curator_views_average - delta, self.total_curator_views_average + delta)
            else: 
                curator_view= random.randrange( 1 , self.total_curator_views_average * self.extreme_factor)
            
            products += curator_score * curator_view
            scores += curator_score
        
        return products / scores # returns weighted average of views by curator score
        

## **Reward Functions**

Rewards functions have always the same parameters:

- **List of weights for each of the variables**
- **total_views** : total number of views for all blocks created by the user
- **unique_views**: total number of unique views generated for all blocks created by the user
- **prompts_cretead**: number of blocks created by the user that were reference from another user (or added to other user sandbox)
- **curator_views_weighted**: weighted average views by score of each curator that viewed the user blocks

### **Linear weighted sum**

In [19]:
def linear_sum(parameters_weights:list,
                total_views:int, 
                unique_views:int,
                prompts_created:int,
                curator_views_weighted:dict,
                ):
    '''
    total_views: total numbers of queries that saw/used that content, 
    unique_views: number of unique users that saw/used the content through the queries,
    prompts_created: number of other user's content created on top of the user content,
    curator_views_weighted: {user_score: total_views}
    * parameters_weights: list of weights for each pameters, range should be [0, 100] and parameter_weights.sum() == 100
                            [ w_total_views, w_unique_view, w_prompts_created, w_views_by_score] in this order
    
    returns: user final score for the epoch
    '''
    # assigning weights
    w_total_views = parameters_weights[0]
    w_unique_view = parameters_weights[1]
    w_prompts_created = parameters_weights[2]
    w_views_by_score = parameters_weights[3]


    final_result = (total_views * w_total_views) \
                    + (unique_views * w_unique_view) \
                    + (prompts_created * w_prompts_created ) + \
                    + (w_views_by_score * curator_views_weighted)

    total_views_share = (total_views * w_total_views) / final_result
    unique_views_share = (unique_views * w_unique_view) / final_result
    prompts_share = (prompts_created * w_prompts_created ) / final_result
    curators_share = (w_views_by_score * curator_views_weighted) / final_result

    shares = [total_views_share,unique_views_share, prompts_share, curators_share]

    return final_result, shares

### **Log weighted sum**

In [20]:
def log_sum(parameters_weights:list,
                total_views:int, 
                unique_views:int,
                prompts_created:int,
                curator_views_weighted:dict,
                ):
    '''
    total_views: total numbers of queries that saw/used that content, 
    unique_views: number of unique users that saw/used the content through the queries,
    prompts_created: number of other user's content created on top of the user content,
    curator_views_weighted: {user_score: total_views}
    * parameters_weights: list of weights for each pameters, range should be [0, 100] and parameter_weights.sum() == 100
                            [ w_total_views, w_unique_view, w_prompts_created, w_views_by_score] in this order
    
    returns: user final score for the epoch
    '''
    # assigning weights
    w_total_views = parameters_weights[0]
    w_unique_view = parameters_weights[1]
    w_prompts_created = parameters_weights[2]
    w_views_by_score = parameters_weights[3]


    final_result =  np.log(total_views * w_total_views) \
                    + np.log(unique_views * w_unique_view) \
                    + (prompts_created * w_prompts_created ) + \
                    + (w_views_by_score * curator_views_weighted)
    
    total_views_share = np.log(total_views * w_total_views) / final_result
    unique_views_share = np.log(unique_views * w_unique_view) / final_result
    prompts_share = (prompts_created * w_prompts_created ) / final_result
    curators_share = (w_views_by_score * curator_views_weighted) / final_result

    shares = [total_views_share,unique_views_share, prompts_share, curators_share]

    return final_result, shares

## **Simulating**

TO DO:

- Make panel of 4 scatter plots , 1 for each variables correlated to final score, mark outliers
- Create styled dataframe to mark outliers (?)


#### Similar users

**Linear weighted**

In [22]:
test_parameters = {
    'number_of_users_to_generate': 10,
    'total_views_average':100,
    'total_unique_views_average':30,
    'total_prompts_created_average':10,
    'total_curator_views_average': 15,
    'average_curator_score': 10 ,
    'number_of_curator_views':  5,
    'reward_function': linear_sum,
    'curator_views_compact':True,
    'curator_scores_compact':True,
    'total_views_compact':True,
    'unique_views_compact':True,
    'prompts_compact':True,
    'compact_factor':0.2,
    'extreme_factor':20,
    'extreme_perc': 0.1,
    'curator_scores_extreme':True,
    'total_views_extreme': True,
    'unique_views_extreme': True,
    'prompts_extreme': False,
}

sim_test = TestGenerator(**test_parameters)

sim_dt = pd.DataFrame(sim_test.generate_user_with_scores())

sim_dt.sort_values('final_score', ascending=False)

Unnamed: 0,id,is_outlier,is_total_views_outlier,is_unique_views_outlier,is_prompts_outlier,is_curator_views_outlier,total_views,unique_views,prompts_created,curator_views_weighted,final_score,total_views_share,unique_share,prompts_share,curators_share
0,0,True,True,False,False,False,2100,29,11,14.08,224.732,0.934446,0.025809,0.014684,0.025061
2,2,True,False,False,False,True,81,32,10,299.183673,137.173469,0.059049,0.046656,0.02187,0.872424
1,1,True,False,True,False,False,85,500,8,13.829787,116.431915,0.073004,0.858871,0.020613,0.047512
4,4,False,False,False,False,False,104,35,11,14.674419,26.569767,0.391422,0.263457,0.124201,0.220919
6,6,False,False,False,False,False,110,34,11,13.0,26.3,0.418251,0.258555,0.125475,0.197719
5,5,False,False,False,False,False,103,30,9,14.595745,24.838298,0.414682,0.241562,0.108703,0.235052
9,9,False,False,False,False,False,103,24,11,15.688889,24.675556,0.417417,0.194524,0.133736,0.254323
7,7,False,False,False,False,False,90,27,11,13.82,23.228,0.387463,0.232478,0.14207,0.237989
8,8,False,False,False,False,False,92,24,10,15.22449,23.089796,0.398444,0.207884,0.129928,0.263744
3,3,False,False,False,False,False,82,32,8,13.408163,22.363265,0.366673,0.286184,0.107319,0.239825


**Log weighted**

In [23]:
test_parameters = {
    'number_of_users_to_generate': 15,
    'total_views_average':100,
    'total_unique_views_average':30,
    'total_prompts_created_average':10,
    'total_curator_views_average': 15,
    'average_curator_score': 10 ,
    'number_of_curator_views':  5,
    'reward_function': log_sum,
    'curator_views_compact':False,
    'curator_scores_compact':True,
    'total_views_compact':True,
    'unique_views_compact':True,
    'prompts_compact':True,
    'compact_factor':0.2,
    'extreme_factor':10,
    'extreme_perc': 0.1,
    'curator_scores_extreme':False,
    'total_views_extreme': True,
    'unique_views_extreme': True,
    'prompts_extreme': False,
}

sim_test = TestGenerator(**test_parameters)

sim_dt = pd.DataFrame(sim_test.generate_user_with_scores())

sim_dt.sort_values('final_score', ascending=False)

Unnamed: 0,id,is_outlier,is_total_views_outlier,is_unique_views_outlier,is_prompts_outlier,is_curator_views_outlier,total_views,unique_views,prompts_created,curator_views_weighted,final_score,total_views_share,unique_share,prompts_share,curators_share
10,10,False,False,False,False,False,104,27,9,101.137255,47.183107,0.049632,0.035742,0.057224,0.857402
2,2,True,False,True,False,False,114,240,9,90.711538,45.28943,0.053735,0.085477,0.059617,0.801172
4,4,False,False,False,False,False,84,34,8,85.77551,40.755358,0.05222,0.047035,0.058888,0.841857
8,8,False,False,False,False,False,114,29,8,84.188679,40.266943,0.060437,0.043655,0.059602,0.836306
14,14,False,False,False,False,False,119,27,11,78.382979,38.816129,0.063802,0.043446,0.085016,0.807736
3,3,True,False,True,False,False,88,350,11,70.489796,37.919165,0.057352,0.112041,0.087027,0.74358
7,7,False,False,False,False,False,110,26,8,77.428571,37.417982,0.064084,0.044061,0.06414,0.827715
1,1,True,True,False,False,False,920,35,10,64.586957,35.302481,0.128087,0.055121,0.08498,0.731812
0,0,True,True,False,False,False,1000,24,8,65.75,34.873786,0.132052,0.04498,0.06882,0.754148
13,13,False,False,False,False,False,93,27,11,69.022222,34.825302,0.064034,0.048425,0.094759,0.792782


#### Testing extremes 

#### Similar user scores

#### Similar user scores

#### Similar user scores